From 7cfa21dd49879137b7ca1770856c8a3b4a2be6ef Mon Sep 17 00:00:00 2001 From: Redouane Kachach Date: Fri, 9 Jun 2023 15:22:14 +0200 Subject: [PATCH] mgr/cephadm: storing prometheus/alertmanager credentials in monstore Fixes: https://tracker.ceph.com/issues/61628 Signed-off-by: Redouane Kachach --- src/cephadm/cephadm.py | 2 +- src/pybind/mgr/cephadm/module.py | 90 ++++++++++--------- src/pybind/mgr/cephadm/services/monitoring.py | 43 +++++---- src/pybind/mgr/cephadm/tests/test_services.py | 44 ++++----- .../mgr/dashboard/controllers/prometheus.py | 2 +- src/pybind/mgr/orchestrator/_interface.py | 8 ++ src/pybind/mgr/orchestrator/module.py | 24 ++++- 7 files changed, 132 insertions(+), 81 deletions(-) diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py index fab7888650207..3531b0b8e5af2 100755 --- a/src/cephadm/cephadm.py +++ b/src/cephadm/cephadm.py @@ -2982,7 +2982,7 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id): if daemon_type == 'node-exporter': config = fetch_configs(ctx) try: - r += [f'--web.config={config["web_config"]}'] + r += [f'--web.config.file={config["web_config"]}'] except KeyError: pass r += ['--path.procfs=/host/proc', diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 50e6cd857f679..6d95ff476604b 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -452,30 +452,6 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, default=False, desc='Log all refresh metadata. Includes daemon, device, and host info collected regularly. Only has effect if logging at debug level' ), - Option( - 'prometheus_web_user', - type='str', - default='admin', - desc='Prometheus web user' - ), - Option( - 'prometheus_web_password', - type='str', - default='admin', - desc='Prometheus web password' - ), - Option( - 'alertmanager_web_user', - type='str', - default='admin', - desc='Alertmanager web user' - ), - Option( - 'alertmanager_web_password', - type='str', - default='admin', - desc='Alertmanager web password' - ), Option( 'secure_monitoring_stack', type='bool', @@ -563,10 +539,6 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.agent_starting_port = 0 self.service_discovery_port = 0 self.secure_monitoring_stack = False - self.prometheus_web_password: Optional[str] = None - self.prometheus_web_user: Optional[str] = None - self.alertmanager_web_password: Optional[str] = None - self.alertmanager_web_user: Optional[str] = None self.apply_spec_fails: List[Tuple[str, str]] = [] self.max_osd_draining_count = 10 self.device_enhanced_scan = False @@ -2629,6 +2601,9 @@ Then run the following: daemon_names.append(dd.name()) return daemon_names + alertmanager_user, alertmanager_password = self._get_alertmanager_credentials() + prometheus_user, prometheus_password = self._get_prometheus_credentials() + deps = [] if daemon_type == 'haproxy': # because cephadm creates new daemon instances whenever @@ -2683,19 +2658,18 @@ Then run the following: # add dependency on ceph-exporter daemons deps += [d.name() for d in self.cache.get_daemons_by_service('ceph-exporter')] if self.secure_monitoring_stack: - if self.prometheus_web_user and self.prometheus_web_password: - deps.append(f'{hash(self.prometheus_web_user + self.prometheus_web_password)}') - if self.alertmanager_web_user and self.alertmanager_web_password: - deps.append( - f'{hash(self.alertmanager_web_user + self.alertmanager_web_password)}') + if prometheus_user and prometheus_password: + deps.append(f'{hash(prometheus_user + prometheus_password)}') + if alertmanager_user and alertmanager_password: + deps.append(f'{hash(alertmanager_user + alertmanager_password)}') elif daemon_type == 'grafana': deps += get_daemon_names(['prometheus', 'loki']) - if self.secure_monitoring_stack and self.prometheus_web_user and self.prometheus_web_password: - deps.append(f'{hash(self.prometheus_web_user + self.prometheus_web_password)}') + if self.secure_monitoring_stack and prometheus_user and prometheus_password: + deps.append(f'{hash(prometheus_user + prometheus_password)}') elif daemon_type == 'alertmanager': deps += get_daemon_names(['mgr', 'alertmanager', 'snmp-gateway']) - if self.secure_monitoring_stack and self.alertmanager_web_user and self.alertmanager_web_password: - deps.append(f'{hash(self.alertmanager_web_user + self.alertmanager_web_password)}') + if self.secure_monitoring_stack and alertmanager_user and alertmanager_password: + deps.append(f'{hash(alertmanager_user + alertmanager_password)}') elif daemon_type == 'promtail': deps += get_daemon_names(['loki']) else: @@ -2796,16 +2770,50 @@ Then run the following: self.events.from_orch_error(e) raise + def _get_alertmanager_credentials(self) -> Tuple[str, str]: + user = self.get_store(AlertmanagerService.USER_CFG_KEY) + password = self.get_store(AlertmanagerService.PASS_CFG_KEY) + if user is None or password is None: + user = 'admin' + password = 'admin' + self.set_store(AlertmanagerService.USER_CFG_KEY, user) + self.set_store(AlertmanagerService.PASS_CFG_KEY, password) + return (user, password) + + def _get_prometheus_credentials(self) -> Tuple[str, str]: + user = self.get_store(PrometheusService.USER_CFG_KEY) + password = self.get_store(PrometheusService.PASS_CFG_KEY) + if user is None or password is None: + user = 'admin' + password = 'admin' + self.set_store(PrometheusService.USER_CFG_KEY, user) + self.set_store(PrometheusService.PASS_CFG_KEY, password) + return (user, password) + + @handle_orch_error + def set_prometheus_access_info(self, user: str, password: str) -> str: + self.set_store(PrometheusService.USER_CFG_KEY, user) + self.set_store(PrometheusService.PASS_CFG_KEY, password) + return 'prometheus credentials updated correctly' + + @handle_orch_error + def set_alertmanager_access_info(self, user: str, password: str) -> str: + self.set_store(AlertmanagerService.USER_CFG_KEY, user) + self.set_store(AlertmanagerService.PASS_CFG_KEY, password) + return 'alertmanager credentials updated correctly' + @handle_orch_error def get_prometheus_access_info(self) -> Dict[str, str]: - return {'user': self.prometheus_web_user or '', - 'password': self.prometheus_web_password or '', + user, password = self._get_prometheus_credentials() + return {'user': user, + 'password': password, 'certificate': self.http_server.service_discovery.ssl_certs.get_root_cert()} @handle_orch_error def get_alertmanager_access_info(self) -> Dict[str, str]: - return {'user': self.alertmanager_web_user or '', - 'password': self.alertmanager_web_password or '', + user, password = self._get_alertmanager_credentials() + return {'user': user, + 'password': password, 'certificate': self.http_server.service_discovery.ssl_certs.get_root_cert()} @handle_orch_error diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index e0c0640ae49bf..114c848608a30 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -29,9 +29,10 @@ class GrafanaService(CephadmService): def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: assert self.TYPE == daemon_spec.daemon_type + prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() deps = [] # type: List[str] - if self.mgr.secure_monitoring_stack and self.mgr.prometheus_web_user and self.mgr.prometheus_web_password: - deps.append(f'{hash(self.mgr.prometheus_web_user + self.mgr.prometheus_web_password)}') + if self.mgr.secure_monitoring_stack and prometheus_user and prometheus_password: + deps.append(f'{hash(prometheus_user + prometheus_password)}') deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') prom_services = [] # type: List[str] @@ -58,8 +59,8 @@ class GrafanaService(CephadmService): oneline_root_cert = '\\n'.join([line.strip() for line in root_cert.splitlines()]) grafana_data_sources = self.mgr.template.render('services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services, - 'prometheus_user': self.mgr.prometheus_web_user, - 'prometheus_password': self.mgr.prometheus_web_password, + 'prometheus_user': prometheus_user, + 'prometheus_password': prometheus_password, 'cephadm_root_ca': oneline_root_cert, 'security_enabled': self.mgr.secure_monitoring_stack, 'loki_host': loki_host}) @@ -191,6 +192,8 @@ class GrafanaService(CephadmService): class AlertmanagerService(CephadmService): TYPE = 'alertmanager' DEFAULT_SERVICE_PORT = 9093 + USER_CFG_KEY = 'alertmanager/web_user' + PASS_CFG_KEY = 'alertmanager/web_password' def prepare_create(self, daemon_spec: CephadmDaemonDeploySpec) -> CephadmDaemonDeploySpec: assert self.TYPE == daemon_spec.daemon_type @@ -279,15 +282,16 @@ class AlertmanagerService(CephadmService): deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') if self.mgr.secure_monitoring_stack: - if self.mgr.alertmanager_web_user and self.mgr.alertmanager_web_password: - deps.append(f'{hash(self.mgr.alertmanager_web_user + self.mgr.alertmanager_web_password)}') + alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() + if alertmanager_user and alertmanager_password: + deps.append(f'{hash(alertmanager_user + alertmanager_password)}') node_ip = self.mgr.inventory.get_addr(daemon_spec.host) host_fqdn = self._inventory_get_fqdn(daemon_spec.host) cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert( host_fqdn, node_ip) context = { - 'alertmanager_web_user': self.mgr.alertmanager_web_user, - 'alertmanager_web_password': password_hash(self.mgr.alertmanager_web_password), + 'alertmanager_web_user': alertmanager_user, + 'alertmanager_web_password': password_hash(alertmanager_password), } return { "files": { @@ -343,6 +347,8 @@ class PrometheusService(CephadmService): TYPE = 'prometheus' DEFAULT_SERVICE_PORT = 9095 DEFAULT_MGR_PROMETHEUS_PORT = 9283 + USER_CFG_KEY = 'prometheus/web_user' + PASS_CFG_KEY = 'prometheus/web_password' def config(self, spec: ServiceSpec) -> None: # make sure module is enabled @@ -397,10 +403,13 @@ class PrometheusService(CephadmService): mgr_prometheus_sd_url = f'{srv_end_point}service=mgr-prometheus' # always included ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter' # always included + alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() + prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() + # generate the prometheus configuration context = { - 'alertmanager_web_user': self.mgr.alertmanager_web_user, - 'alertmanager_web_password': self.mgr.alertmanager_web_password, + 'alertmanager_web_user': alertmanager_user, + 'alertmanager_web_password': alertmanager_password, 'secure_monitoring_stack': self.mgr.secure_monitoring_stack, 'service_discovery_username': self.mgr.http_server.service_discovery.username, 'service_discovery_password': self.mgr.http_server.service_discovery.password, @@ -412,8 +421,8 @@ class PrometheusService(CephadmService): } web_context = { - 'prometheus_web_user': self.mgr.prometheus_web_user, - 'prometheus_web_password': password_hash(self.mgr.prometheus_web_password), + 'prometheus_web_user': prometheus_user, + 'prometheus_web_password': password_hash(prometheus_password), } if self.mgr.secure_monitoring_stack: @@ -482,10 +491,12 @@ class PrometheusService(CephadmService): # re-deploy prometheus if the mgr has changed (due to a fail-over i.e). deps.append(self.mgr.get_active_mgr().name()) if self.mgr.secure_monitoring_stack: - if self.mgr.prometheus_web_user and self.mgr.prometheus_web_password: - deps.append(f'{hash(self.mgr.prometheus_web_user + self.mgr.prometheus_web_password)}') - if self.mgr.alertmanager_web_user and self.mgr.alertmanager_web_password: - deps.append(f'{hash(self.mgr.alertmanager_web_user + self.mgr.alertmanager_web_password)}') + alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() + prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() + if prometheus_user and prometheus_password: + deps.append(f'{hash(prometheus_user + prometheus_password)}') + if alertmanager_user and alertmanager_password: + deps.append(f'{hash(alertmanager_user + alertmanager_password)}') deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') # add dependency on ceph-exporter daemons deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('ceph-exporter')] diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 8e99eb8db8cc0..25e557631e118 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -545,7 +545,7 @@ class TestMonitoring: @patch("cephadm.serve.CephadmServe._run_cephadm") @patch("socket.getfqdn") @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') - @patch("cephadm.services.monitoring.password_hash", lambda password: 'fake_password') + @patch("cephadm.services.monitoring.password_hash", lambda password: 'alertmanager_password_hash') def test_alertmanager_config_security_enabled(self, _get_fqdn, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) @@ -560,8 +560,8 @@ class TestMonitoring: with with_host(cephadm_module, 'test'): cephadm_module.secure_monitoring_stack = True - cephadm_module.alertmanager_web_password = 'fake_password' - cephadm_module.alertmanager_web_user = 'admin' + cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user') + cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password') cephadm_module.http_server.service_discovery.ssl_certs.generate_cert = MagicMock(side_effect=gen_cert) cephadm_module.http_server.service_discovery.ssl_certs.get_root_cert = MagicMock(side_effect=get_root_cert) with with_service(cephadm_module, AlertManagerSpec()): @@ -598,7 +598,7 @@ class TestMonitoring: cert_file: alertmanager.crt key_file: alertmanager.key basic_auth_users: - admin: fake_password""").lstrip() + alertmanager_user: alertmanager_password_hash""").lstrip() _run_cephadm.assert_called_with( 'test', @@ -727,7 +727,7 @@ class TestMonitoring: @patch("cephadm.serve.CephadmServe._run_cephadm") @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') - @patch("cephadm.services.monitoring.password_hash", lambda password: 'fake_password') + @patch("cephadm.services.monitoring.password_hash", lambda password: 'prometheus_password_hash') def test_prometheus_config_security_enabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast') @@ -737,8 +737,12 @@ class TestMonitoring: with with_host(cephadm_module, 'test'): cephadm_module.secure_monitoring_stack = True - cephadm_module.http_server.service_discovery.username = 'admin' - cephadm_module.http_server.service_discovery.password = 'fake_password' + cephadm_module.set_store(PrometheusService.USER_CFG_KEY, 'prometheus_user') + cephadm_module.set_store(PrometheusService.PASS_CFG_KEY, 'prometheus_plain_password') + cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user') + cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password') + cephadm_module.http_server.service_discovery.username = 'sd_user' + cephadm_module.http_server.service_discovery.password = 'sd_password' cephadm_module.http_server.service_discovery.ssl_certs.generate_cert = MagicMock( side_effect=gen_cert) with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \ @@ -759,7 +763,7 @@ class TestMonitoring: cert_file: prometheus.crt key_file: prometheus.key basic_auth_users: - admin: fake_password""").lstrip() + prometheus_user: prometheus_password_hash""").lstrip() y = dedent(""" # This file is generated by cephadm. @@ -773,15 +777,15 @@ class TestMonitoring: alertmanagers: - scheme: https basic_auth: - username: admin - password: admin + username: alertmanager_user + password: alertmanager_plain_password tls_config: ca_file: root_cert.pem http_sd_configs: - url: https://[::1]:8765/sd/prometheus/sd-config?service=alertmanager basic_auth: - username: admin - password: fake_password + username: sd_user + password: sd_password tls_config: ca_file: root_cert.pem @@ -794,8 +798,8 @@ class TestMonitoring: http_sd_configs: - url: https://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus basic_auth: - username: admin - password: fake_password + username: sd_user + password: sd_password tls_config: ca_file: root_cert.pem @@ -806,8 +810,8 @@ class TestMonitoring: http_sd_configs: - url: https://[::1]:8765/sd/prometheus/sd-config?service=node-exporter basic_auth: - username: admin - password: fake_password + username: sd_user + password: sd_password tls_config: ca_file: root_cert.pem @@ -818,8 +822,8 @@ class TestMonitoring: http_sd_configs: - url: https://[::1]:8765/sd/prometheus/sd-config?service=haproxy basic_auth: - username: admin - password: fake_password + username: sd_user + password: sd_password tls_config: ca_file: root_cert.pem @@ -831,8 +835,8 @@ class TestMonitoring: http_sd_configs: - url: https://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter basic_auth: - username: admin - password: fake_password + username: sd_user + password: sd_password tls_config: ca_file: root_cert.pem """).lstrip() diff --git a/src/pybind/mgr/dashboard/controllers/prometheus.py b/src/pybind/mgr/dashboard/controllers/prometheus.py index 5aab37596e1ec..7222b14f7b5c5 100644 --- a/src/pybind/mgr/dashboard/controllers/prometheus.py +++ b/src/pybind/mgr/dashboard/controllers/prometheus.py @@ -65,7 +65,7 @@ class PrometheusRESTController(RESTController): 'secure_monitoring_stack', 'false')) if secure_monitoring_stack: - cmd = {'prefix': f'orch {module_name} access info'} + cmd = {'prefix': f'orch {module_name} get-credentials'} ret, out, _ = mgr.mon_command(cmd) if ret == 0 and out is not None: access_info = json.loads(out) diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index c68f04056d18c..b1410b29dacc0 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -691,6 +691,14 @@ class Orchestrator(object): """get prometheus access information""" raise NotImplementedError() + def set_alertmanager_access_info(self, user: str, password: str) -> OrchResult[str]: + """set alertmanager access information""" + raise NotImplementedError() + + def set_prometheus_access_info(self, user: str, password: str) -> OrchResult[str]: + """set prometheus access information""" + raise NotImplementedError() + def get_alertmanager_access_info(self) -> OrchResult[Dict[str, str]]: """get alertmanager access information""" raise NotImplementedError() diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index 70461d05bff4b..1e2727d4087a4 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -883,13 +883,33 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule, return HandleCommandResult(stdout=table.get_string()) - @_cli_write_command('orch prometheus access info') + @_cli_write_command('orch prometheus set-credentials') + def _set_prometheus_access_info(self, user: str, password: Optional[str] = None, inbuf: Optional[str] = None) -> HandleCommandResult: + _password = password or inbuf + if not (user and _password): + return HandleCommandResult(-errno.EINVAL, "", ("Invalid arguments. Please provide arguments " + "or -i ")) + completion = self.set_prometheus_access_info(user, _password) + result = raise_if_exception(completion) + return HandleCommandResult(stdout=json.dumps(result)) + + @_cli_write_command('orch alertmanager set-credentials') + def _set_alertmanager_access_info(self, user: str, password: Optional[str] = None, inbuf: Optional[str] = None) -> HandleCommandResult: + _password = password or inbuf + if not (user and _password): + return HandleCommandResult(-errno.EINVAL, "", ("Invalid arguments. Please provide arguments " + "or -i ")) + completion = self.set_alertmanager_access_info(user, _password) + result = raise_if_exception(completion) + return HandleCommandResult(stdout=json.dumps(result)) + + @_cli_write_command('orch prometheus get-credentials') def _get_prometheus_access_info(self) -> HandleCommandResult: completion = self.get_prometheus_access_info() access_info = raise_if_exception(completion) return HandleCommandResult(stdout=json.dumps(access_info)) - @_cli_write_command('orch alertmanager access info') + @_cli_write_command('orch alertmanager get-credentials') def _get_alertmanager_access_info(self) -> HandleCommandResult: completion = self.get_alertmanager_access_info() access_info = raise_if_exception(completion) -- 2.39.5