From: Redouane Kachach Date: Tue, 4 Feb 2025 12:49:10 +0000 (+0100) Subject: mgr/cepahdm: adjusting grafana custom code to handle certificates X-Git-Tag: v20.3.0~386^2~12 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8be392f62b19e1ef74c4f93bb72599fdd203c45c;p=ceph.git mgr/cepahdm: adjusting grafana custom code to handle certificates Signed-off-by: Redouane Kachach --- diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index d76aff4d123..543909bbf9c 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -1,3 +1,4 @@ +from datetime import datetime import ipaddress import hashlib import json @@ -30,7 +31,7 @@ from cephadm.autotune import MemoryAutotuner from cephadm.utils import forall_hosts, cephadmNoImage, is_repo_digest, \ CephadmNoImage, CEPH_TYPES, ContainerInspectInfo, SpecialHostLabels from mgr_module import MonCommandFailed -from mgr_util import format_bytes, verify_tls, get_cert_issuer_info, ServerConfigException +from mgr_util import format_bytes from cephadm.services.service_registry import service_registry from . import utils @@ -63,6 +64,7 @@ class CephadmServe: def __init__(self, mgr: "CephadmOrchestrator"): self.mgr: "CephadmOrchestrator" = mgr self.log = logger + self.last_certificates_check: Optional[datetime] = None def serve(self) -> None: """ @@ -111,10 +113,7 @@ class CephadmServe: self._check_daemons() - services_to_reconfig, _ = self.mgr.cert_mgr.check_services_certificates(fix_issues=True) - for svc in services_to_reconfig: - logger.info(f'certmgr: certificate has changed, reconfiguring service {svc}') - self.mgr.service_action('reconfig', svc) + self._check_certificates() self._purge_deleted_services() @@ -142,39 +141,24 @@ class CephadmServe: self.log.debug("serve exit") def _check_certificates(self) -> None: - for d in self.mgr.cache.get_daemons_by_type('grafana'): - host = d.hostname - assert host is not None - cert = self.mgr.cert_mgr.get_cert('grafana_cert', host=host) - key = self.mgr.cert_mgr.get_key('grafana_key', host=host) - if not cert or not key: - # certificate/key are empty... nothing to check - return - - try: - get_cert_issuer_info(cert) - verify_tls(cert, key) - self.mgr.remove_health_warning('CEPHADM_CERT_ERROR') - except ServerConfigException as e: - err_msg = f""" - Detected invalid grafana certificates. Please, use the following commands: - - > ceph config-key set mgr/cephadm/{d.hostname}/grafana_crt -i - > ceph config-key set mgr/cephadm/{d.hostname}/grafana_key -i - to set valid key and certificate or reset their value to an empty string - in case you want cephadm to generate self-signed Grafana certificates. - - Once done, run the following command to reconfig the daemon: - - > ceph orch daemon reconfig grafana.{d.hostname} - - """ - self.log.error(f'Detected invalid grafana certificate on host {d.hostname}: {e}') - self.mgr.set_health_warning('CEPHADM_CERT_ERROR', - f'Invalid grafana certificate on host {d.hostname}: {e}', - 1, [err_msg]) - break + # Check certificates if: + # - This is the first time (startup, last_certificates_check is None) + # - Or the elapsed time is greater than or equal to the configured check period + check_certificates = False + if self.last_certificates_check is None: + check_certificates = True + else: + elapsed_time = datetime_now() - self.last_certificates_check + check_certificates = elapsed_time.days >= self.mgr.certificate_check_period + + if check_certificates: + self.log.debug('_check_certificates') + self.last_certificates_check = datetime_now() + services_to_reconfig, _ = self.mgr.cert_mgr.check_services_certificates(fix_issues=True) + for svc in services_to_reconfig: + logger.info(f'certmgr: certificate has changed, reconfiguring service {svc}') + self.mgr.service_action('reconfig', svc) def _serve_sleep(self) -> None: sleep_interval = max( diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index 5f1e09bfca3..c60bf79796d 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -14,7 +14,7 @@ from orchestrator import DaemonDescription from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \ SNMPGatewaySpec, PrometheusSpec, MgmtGatewaySpec from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec, get_dashboard_urls -from mgr_util import verify_tls, ServerConfigException, build_url, get_cert_issuer_info, password_hash +from mgr_util import build_url, password_hash from ceph.deployment.utils import wrap_ipv6 from .. import utils @@ -143,13 +143,24 @@ class GrafanaService(CephadmService): def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: assert self.TYPE == daemon_spec.daemon_type - cert, pkey = self.prepare_certificates(daemon_spec) + host_fqdns = [socket.getfqdn(daemon_spec.host), 'grafana_servers'] + host_ips = self.mgr.inventory.get_addr(daemon_spec.host) + cert, pkey = self.mgr.cert_mgr.prepare_certificate('grafana_cert', 'grafana_key', host_fqdns, host_ips, target_host=daemon_spec.host) + if not cert or not pkey: + logger.error(f'Cannot generate the needed certificates to deploy Grafana on {daemon_spec.host}') + cert, pkey = ('', '') # this will lead to an error in the daemon as certificates are needed + security_enabled, mgmt_gw_enabled, oauth2_enabled = self.mgr._get_security_config() grafana_ini = self.generate_grafana_ini(daemon_spec, mgmt_gw_enabled, oauth2_enabled) grafana_data_sources = self.generate_data_sources(security_enabled, mgmt_gw_enabled, cert, pkey) # the path of the grafana dashboards are assumed from the providers.yml.j2 file by grafana grafana_dashboards_path = self.mgr.grafana_dashboards_path or '/etc/grafana/dashboards/ceph-dashboard/' + if 'dashboard' in self.mgr.get('mgr_map')['modules']: + self.mgr.check_mon_command({ + 'prefix': 'dashboard set-grafana-api-ssl-verify', + 'value': 'false'}) + config_file = { 'files': { "grafana.ini": grafana_ini, @@ -178,61 +189,6 @@ class GrafanaService(CephadmService): return config_file, self.get_dependencies(self.mgr) - def prepare_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]: - cert = self.mgr.cert_mgr.get_cert('grafana_cert', host=daemon_spec.host) - pkey = self.mgr.cert_mgr.get_key('grafana_key', host=daemon_spec.host) - certs_present = (cert and pkey) - is_valid_certificate = False - (org, cn) = (None, None) - if certs_present: - try: - (org, cn) = get_cert_issuer_info(cert) - verify_tls(cert, pkey) - is_valid_certificate = True - except ServerConfigException as e: - logger.warning(f'Provided grafana TLS certificates are invalid: {e}') - - if is_valid_certificate: - # let's clear health error just in case it was set - self.mgr.remove_health_warning('CEPHADM_CERT_ERROR') - return cert, pkey - - # certificate is not valid, to avoid overwriting user generated - # certificates we only re-generate in case of self signed certificates - # that were originally generated by cephadm or in case cert/key are empty. - if not certs_present or (org == 'Ceph' and cn == 'cephadm'): - logger.info('Regenerating cephadm self-signed grafana TLS certificates') - host_fqdn = socket.getfqdn(daemon_spec.host) - node_ip = self.mgr.inventory.get_addr(daemon_spec.host) - cert, pkey = self.mgr.cert_mgr.generate_cert([host_fqdn, "grafana_servers"], node_ip) - # cert, pkey = create_self_signed_cert('Ceph', host_fqdn) - self.mgr.cert_mgr.save_cert('grafana_cert', cert, host=daemon_spec.host) - self.mgr.cert_mgr.save_key('grafana_key', pkey, host=daemon_spec.host) - if 'dashboard' in self.mgr.get('mgr_map')['modules']: - self.mgr.check_mon_command({ - 'prefix': 'dashboard set-grafana-api-ssl-verify', - 'value': 'false', - }) - self.mgr.remove_health_warning('CEPHADM_CERT_ERROR') # clear if any - else: - # the certificate was not generated by cephadm, we cannot overwrite - # it by new self-signed ones. Let's warn the user to fix the issue - err_msg = """ - Detected invalid grafana certificates. Set mgr/cephadm/grafana_crt - and mgr/cephadm/grafana_key to valid certificates or reset their value - to an empty string in case you want cephadm to generate self-signed Grafana - certificates. - - Once done, run the following command to reconfig the daemon: - - > ceph orch daemon reconfig - - """ - self.mgr.set_health_warning( - 'CEPHADM_CERT_ERROR', 'Invalid grafana certificate: ', 1, [err_msg]) - - return cert, pkey - def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: # Use the least-created one as the active daemon if daemon_descrs: diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index ac8f7ddfdcd..c8682c3b5d1 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -1532,7 +1532,6 @@ class TestMonitoring: @patch("cephadm.serve.CephadmServe._run_cephadm") @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4') @patch("cephadm.module.CephadmOrchestrator.get_fqdn", lambda a, b: 'host_fqdn') - @patch("cephadm.services.monitoring.verify_tls", lambda *_: None) @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: cephadm_root_ca) def test_grafana_config_with_mgmt_gw_and_ouath2_proxy(self, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(("{}", "", 0)) @@ -1693,7 +1692,6 @@ class TestMonitoring: @patch("cephadm.serve.CephadmServe._run_cephadm") @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4') @patch("cephadm.module.CephadmOrchestrator.get_fqdn", lambda a, b: 'host_fqdn') - @patch("cephadm.services.monitoring.verify_tls", lambda *_: None) @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: cephadm_root_ca) def test_grafana_config_with_mgmt_gw(self, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(("{}", "", 0)) @@ -1834,7 +1832,6 @@ class TestMonitoring: @patch("cephadm.serve.CephadmServe._run_cephadm") @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4') @patch("cephadm.module.CephadmOrchestrator.get_fqdn", lambda a, b: 'host_fqdn') - @patch("cephadm.services.monitoring.verify_tls", lambda *_: None) def test_grafana_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(("{}", "", 0)) diff --git a/src/pybind/mgr/mgr_util.py b/src/pybind/mgr/mgr_util.py index c20609255c6..8fb9c998247 100644 --- a/src/pybind/mgr/mgr_util.py +++ b/src/pybind/mgr/mgr_util.py @@ -991,6 +991,7 @@ def password_hash(password: Optional[str], salt_password: Optional[str] = None) salt = salt_password.encode('utf8') return bcrypt.hashpw(password.encode('utf8'), salt).decode('utf8') + def parse_combined_pem_file(pem_data: str) -> Tuple[Optional[str], Optional[str]]: # Extract the certificate