]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cepahdm: adjusting grafana custom code to handle certificates
authorRedouane Kachach <rkachach@ibm.com>
Tue, 4 Feb 2025 12:49:10 +0000 (13:49 +0100)
committerRedouane Kachach <rkachach@ibm.com>
Tue, 11 Mar 2025 09:34:21 +0000 (10:34 +0100)
Signed-off-by: Redouane Kachach <rkachach@ibm.com>
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/tests/test_services.py
src/pybind/mgr/mgr_util.py

index d76aff4d1238a2b956eff08a28cdf34cef79f06b..543909bbf9c59df1b1a8c2abc7507835fa3521cd 100644 (file)
@@ -1,3 +1,4 @@
+from datetime import datetime
 import ipaddress
 import hashlib
 import json
@@ -30,7 +31,7 @@ from cephadm.autotune import MemoryAutotuner
 from cephadm.utils import forall_hosts, cephadmNoImage, is_repo_digest, \
     CephadmNoImage, CEPH_TYPES, ContainerInspectInfo, SpecialHostLabels
 from mgr_module import MonCommandFailed
-from mgr_util import format_bytes, verify_tls, get_cert_issuer_info, ServerConfigException
+from mgr_util import format_bytes
 from cephadm.services.service_registry import service_registry
 
 from . import utils
@@ -63,6 +64,7 @@ class CephadmServe:
     def __init__(self, mgr: "CephadmOrchestrator"):
         self.mgr: "CephadmOrchestrator" = mgr
         self.log = logger
+        self.last_certificates_check: Optional[datetime] = None
 
     def serve(self) -> None:
         """
@@ -111,10 +113,7 @@ class CephadmServe:
 
                     self._check_daemons()
 
-                    services_to_reconfig, _ = self.mgr.cert_mgr.check_services_certificates(fix_issues=True)
-                    for svc in services_to_reconfig:
-                        logger.info(f'certmgr: certificate has changed, reconfiguring service {svc}')
-                        self.mgr.service_action('reconfig', svc)
+                    self._check_certificates()
 
                     self._purge_deleted_services()
 
@@ -142,39 +141,24 @@ class CephadmServe:
         self.log.debug("serve exit")
 
     def _check_certificates(self) -> None:
-        for d in self.mgr.cache.get_daemons_by_type('grafana'):
-            host = d.hostname
-            assert host is not None
-            cert = self.mgr.cert_mgr.get_cert('grafana_cert', host=host)
-            key = self.mgr.cert_mgr.get_key('grafana_key', host=host)
-            if not cert or not key:
-                # certificate/key are empty... nothing to check
-                return
-
-            try:
-                get_cert_issuer_info(cert)
-                verify_tls(cert, key)
-                self.mgr.remove_health_warning('CEPHADM_CERT_ERROR')
-            except ServerConfigException as e:
-                err_msg = f"""
-                Detected invalid grafana certificates. Please, use the following commands:
-
-                  > ceph config-key set mgr/cephadm/{d.hostname}/grafana_crt -i <path-to-ctr-file>
-                  > ceph config-key set mgr/cephadm/{d.hostname}/grafana_key -i <path-to-key-file>
 
-                to set valid key and certificate or reset their value to an empty string
-                in case you want cephadm to generate self-signed Grafana certificates.
-
-                Once done, run the following command to reconfig the daemon:
-
-                  > ceph orch daemon reconfig grafana.{d.hostname}
-
-                """
-                self.log.error(f'Detected invalid grafana certificate on host {d.hostname}: {e}')
-                self.mgr.set_health_warning('CEPHADM_CERT_ERROR',
-                                            f'Invalid grafana certificate on host {d.hostname}: {e}',
-                                            1, [err_msg])
-                break
+        # Check certificates if:
+        # - This is the first time (startup, last_certificates_check is None)
+        # - Or the elapsed time is greater than or equal to the configured check period
+        check_certificates = False
+        if self.last_certificates_check is None:
+            check_certificates = True
+        else:
+            elapsed_time = datetime_now() - self.last_certificates_check
+            check_certificates = elapsed_time.days >= self.mgr.certificate_check_period
+
+        if check_certificates:
+            self.log.debug('_check_certificates')
+            self.last_certificates_check = datetime_now()
+            services_to_reconfig, _ = self.mgr.cert_mgr.check_services_certificates(fix_issues=True)
+            for svc in services_to_reconfig:
+                logger.info(f'certmgr: certificate has changed, reconfiguring service {svc}')
+                self.mgr.service_action('reconfig', svc)
 
     def _serve_sleep(self) -> None:
         sleep_interval = max(
index 5f1e09bfca367b20351ecf4e28255f5de304a407..c60bf79796d3a7df84f4baca0facdd80e327a4ba 100644 (file)
@@ -14,7 +14,7 @@ from orchestrator import DaemonDescription
 from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \
     SNMPGatewaySpec, PrometheusSpec, MgmtGatewaySpec
 from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec, get_dashboard_urls
-from mgr_util import verify_tls, ServerConfigException, build_url, get_cert_issuer_info, password_hash
+from mgr_util import build_url, password_hash
 from ceph.deployment.utils import wrap_ipv6
 from .. import utils
 
@@ -143,13 +143,24 @@ class GrafanaService(CephadmService):
     def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
         assert self.TYPE == daemon_spec.daemon_type
 
-        cert, pkey = self.prepare_certificates(daemon_spec)
+        host_fqdns = [socket.getfqdn(daemon_spec.host), 'grafana_servers']
+        host_ips = self.mgr.inventory.get_addr(daemon_spec.host)
+        cert, pkey = self.mgr.cert_mgr.prepare_certificate('grafana_cert', 'grafana_key', host_fqdns, host_ips, target_host=daemon_spec.host)
+        if not cert or not pkey:
+            logger.error(f'Cannot generate the needed certificates to deploy Grafana on {daemon_spec.host}')
+            cert, pkey = ('', '')  # this will lead to an error in the daemon as certificates are needed
+
         security_enabled, mgmt_gw_enabled, oauth2_enabled = self.mgr._get_security_config()
         grafana_ini = self.generate_grafana_ini(daemon_spec, mgmt_gw_enabled, oauth2_enabled)
         grafana_data_sources = self.generate_data_sources(security_enabled, mgmt_gw_enabled, cert, pkey)
         # the path of the grafana dashboards are assumed from the providers.yml.j2 file by grafana
         grafana_dashboards_path = self.mgr.grafana_dashboards_path or '/etc/grafana/dashboards/ceph-dashboard/'
 
+        if 'dashboard' in self.mgr.get('mgr_map')['modules']:
+            self.mgr.check_mon_command({
+                'prefix': 'dashboard set-grafana-api-ssl-verify',
+                'value': 'false'})
+
         config_file = {
             'files': {
                 "grafana.ini": grafana_ini,
@@ -178,61 +189,6 @@ class GrafanaService(CephadmService):
 
         return config_file, self.get_dependencies(self.mgr)
 
-    def prepare_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]:
-        cert = self.mgr.cert_mgr.get_cert('grafana_cert', host=daemon_spec.host)
-        pkey = self.mgr.cert_mgr.get_key('grafana_key', host=daemon_spec.host)
-        certs_present = (cert and pkey)
-        is_valid_certificate = False
-        (org, cn) = (None, None)
-        if certs_present:
-            try:
-                (org, cn) = get_cert_issuer_info(cert)
-                verify_tls(cert, pkey)
-                is_valid_certificate = True
-            except ServerConfigException as e:
-                logger.warning(f'Provided grafana TLS certificates are invalid: {e}')
-
-        if is_valid_certificate:
-            # let's clear health error just in case it was set
-            self.mgr.remove_health_warning('CEPHADM_CERT_ERROR')
-            return cert, pkey
-
-        # certificate is not valid, to avoid overwriting user generated
-        # certificates we only re-generate in case of self signed certificates
-        # that were originally generated by cephadm or in case cert/key are empty.
-        if not certs_present or (org == 'Ceph' and cn == 'cephadm'):
-            logger.info('Regenerating cephadm self-signed grafana TLS certificates')
-            host_fqdn = socket.getfqdn(daemon_spec.host)
-            node_ip = self.mgr.inventory.get_addr(daemon_spec.host)
-            cert, pkey = self.mgr.cert_mgr.generate_cert([host_fqdn, "grafana_servers"], node_ip)
-            # cert, pkey = create_self_signed_cert('Ceph', host_fqdn)
-            self.mgr.cert_mgr.save_cert('grafana_cert', cert, host=daemon_spec.host)
-            self.mgr.cert_mgr.save_key('grafana_key', pkey, host=daemon_spec.host)
-            if 'dashboard' in self.mgr.get('mgr_map')['modules']:
-                self.mgr.check_mon_command({
-                    'prefix': 'dashboard set-grafana-api-ssl-verify',
-                    'value': 'false',
-                })
-            self.mgr.remove_health_warning('CEPHADM_CERT_ERROR')  # clear if any
-        else:
-            # the certificate was not generated by cephadm, we cannot overwrite
-            # it by new self-signed ones. Let's warn the user to fix the issue
-            err_msg = """
-            Detected invalid grafana certificates. Set mgr/cephadm/grafana_crt
-            and mgr/cephadm/grafana_key to valid certificates or reset their value
-            to an empty string in case you want cephadm to generate self-signed Grafana
-            certificates.
-
-            Once done, run the following command to reconfig the daemon:
-
-               > ceph orch daemon reconfig <grafana-daemon>
-
-            """
-            self.mgr.set_health_warning(
-                'CEPHADM_CERT_ERROR', 'Invalid grafana certificate: ', 1, [err_msg])
-
-        return cert, pkey
-
     def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
         # Use the least-created one as the active daemon
         if daemon_descrs:
index ac8f7ddfdcde20979f18acf03ec2d1be283633c8..c8682c3b5d172baf7a6c410463b9a1c07eca7435 100644 (file)
@@ -1532,7 +1532,6 @@ class TestMonitoring:
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4')
     @patch("cephadm.module.CephadmOrchestrator.get_fqdn", lambda a, b: 'host_fqdn')
-    @patch("cephadm.services.monitoring.verify_tls", lambda *_: None)
     @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: cephadm_root_ca)
     def test_grafana_config_with_mgmt_gw_and_ouath2_proxy(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
         _run_cephadm.side_effect = async_side_effect(("{}", "", 0))
@@ -1693,7 +1692,6 @@ class TestMonitoring:
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4')
     @patch("cephadm.module.CephadmOrchestrator.get_fqdn", lambda a, b: 'host_fqdn')
-    @patch("cephadm.services.monitoring.verify_tls", lambda *_: None)
     @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: cephadm_root_ca)
     def test_grafana_config_with_mgmt_gw(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
         _run_cephadm.side_effect = async_side_effect(("{}", "", 0))
@@ -1834,7 +1832,6 @@ class TestMonitoring:
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4')
     @patch("cephadm.module.CephadmOrchestrator.get_fqdn", lambda a, b: 'host_fqdn')
-    @patch("cephadm.services.monitoring.verify_tls", lambda *_: None)
     def test_grafana_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
         _run_cephadm.side_effect = async_side_effect(("{}", "", 0))
 
index c20609255c6dacf0c3ca1b74f949db546d211193..8fb9c9982478ec29a644443ab481ba5f048e848b 100644 (file)
@@ -991,6 +991,7 @@ def password_hash(password: Optional[str], salt_password: Optional[str] = None)
         salt = salt_password.encode('utf8')
     return bcrypt.hashpw(password.encode('utf8'), salt).decode('utf8')
 
+
 def parse_combined_pem_file(pem_data: str) -> Tuple[Optional[str], Optional[str]]:
 
     # Extract the certificate