]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: introducing new cmd to generate self-signed certs
authorRedouane Kachach <rkachach@ibm.com>
Wed, 3 Jul 2024 08:15:20 +0000 (10:15 +0200)
committerRedouane Kachach <rkachach@ibm.com>
Wed, 31 Jul 2024 06:47:17 +0000 (08:47 +0200)
this new Cephadm cmd introduces the ability to generate self-signed
certificates for external modules, signed by Cephadm as the root CA.
This feature is essential for implementing mTLS. Previously, if the
user did not provide a certificate and key, the dashboard would
generate its own. With this update, the dashboard now calls Cephadm
to generate self-signed certificates, enabling secure mTLS
communication with other backend applications. Prometheus module
also makes use of this new functionality to generate self-signed
certificates.

Signed-off-by: Redouane Kachach <rkachach@ibm.com>
src/cephadm/cephadm.py
src/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/dashboard/controllers/prometheus.py
src/pybind/mgr/dashboard/tests/test_prometheus.py
src/pybind/mgr/orchestrator/_interface.py
src/pybind/mgr/orchestrator/module.py
src/pybind/mgr/prometheus/module.py

index 5deaec55949c8bf1c29d0174f407a06a556b9664..75ac3045c1e3115555eafbda7623dd44d03ef2a6 100755 (executable)
@@ -2421,11 +2421,23 @@ def prepare_dashboard(
             pathify(ctx.dashboard_crt.name): '/tmp/dashboard.crt:z',
             pathify(ctx.dashboard_key.name): '/tmp/dashboard.key:z'
         }
-        cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
-        cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
     else:
-        logger.info('Generating a dashboard self-signed certificate...')
-        cli(['dashboard', 'create-self-signed-cert'])
+        logger.info('Using certmgr to generate dashboard self-signed certificate...')
+        cert_key = json_loads_retry(lambda: cli(['orch', 'certmgr', 'generate-certificates', 'dashboard'],
+                                                verbosity=CallVerbosity.QUIET_UNLESS_ERROR))
+        mounts = {}
+        if cert_key:
+            cert_file = write_tmp(cert_key['cert'], uid, gid)
+            key_file = write_tmp(cert_key['key'], uid, gid)
+            mounts = {
+                cert_file.name: '/tmp/dashboard.crt:z',
+                key_file.name: '/tmp/dashboard.key:z'
+            }
+        else:
+            logger.error('Cannot generate certificates for Ceph dashboard.')
+
+    cli(['dashboard', 'set-ssl-certificate', '-i', '/tmp/dashboard.crt'], extra_mounts=mounts)
+    cli(['dashboard', 'set-ssl-certificate-key', '-i', '/tmp/dashboard.key'], extra_mounts=mounts)
 
     logger.info('Creating initial admin user...')
     password = ctx.initial_dashboard_password or generate_password()
index 6a5f4c9f00c4fa0214184317a9ed743a4214e7f2..9e0345fe7582e34b2ef98072a59634d46f8b6433 100644 (file)
@@ -282,7 +282,8 @@ class TestCephAdm(object):
     @mock.patch('cephadmlib.firewalld.Firewalld', mock_bad_firewalld)
     @mock.patch('cephadm.Firewalld', mock_bad_firewalld)
     @mock.patch('cephadm.logger')
-    def test_skip_firewalld(self, _logger, cephadm_fs):
+    @mock.patch('cephadm.json_loads_retry', return_value=None)
+    def test_skip_firewalld(self, _logger, _jlr, cephadm_fs):
         """
         test --skip-firewalld actually skips changing firewall
         """
index 85e496b556b83024f65386c1ba7cf8db463a7c76..97a9404a31c1e0aee9fb96a4db570e47878d9917 100644 (file)
@@ -539,7 +539,6 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
         super(CephadmOrchestrator, self).__init__(*args, **kwargs)
         self._cluster_fsid: str = self.get('mon_map')['fsid']
         self.last_monmap: Optional[datetime.datetime] = None
-        self.cert_mgr = CertMgr(self, self.get_mgr_ip())
 
         # for serve()
         self.run = True
@@ -675,6 +674,8 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
         self.cert_key_store = CertKeyStore(self)
         self.cert_key_store.load()
 
+        self.cert_mgr = CertMgr(self, self.get_mgr_ip())
+
         # ensure the host lists are in sync
         for h in self.inventory.keys():
             if h not in self.cache.daemons:
@@ -3085,6 +3086,14 @@ Then run the following:
             self.set_store(PrometheusService.PASS_CFG_KEY, password)
         return (user, password)
 
+    @handle_orch_error
+    def generate_certificates(self, module_name: str) -> Optional[Dict[str, str]]:
+        supported_moduels = ['dashboard', 'prometheus']
+        if module_name not in supported_moduels:
+            raise OrchestratorError(f'Unsupported modlue {module_name}. Supported moduels are: {supported_moduels}')
+        cert, key = self.cert_mgr.generate_cert(self.get_hostname(), self.get_mgr_ip())
+        return {'cert': cert, 'key': key}
+
     @handle_orch_error
     def set_prometheus_access_info(self, user: str, password: str) -> str:
         self.set_store(PrometheusService.USER_CFG_KEY, user)
@@ -3297,13 +3306,6 @@ Then run the following:
         self._kick_serve_loop()
         return f'Removed setting {setting} from tuned profile {profile_name}'
 
-    @handle_orch_error
-    def service_discovery_dump_cert(self) -> str:
-        root_cert = self.cert_key_store.get_cert('service_discovery_root_cert')
-        if not root_cert:
-            raise OrchestratorError('No certificate found for service discovery')
-        return root_cert
-
     def set_health_warning(self, name: str, summary: str, count: int, detail: List[str]) -> None:
         self.health_checks[name] = {
             'severity': 'warning',
index 75a607d1e31119a289374b73937308dc2ba8a92b..d0ad51c8f7d708b921838dffec101225d9627d0f 100644 (file)
@@ -32,35 +32,43 @@ class PrometheusReceiver(BaseController):
 class PrometheusRESTController(RESTController):
     def prometheus_proxy(self, method, path, params=None, payload=None):
         # type (str, str, dict, dict)
-        user, password, cert_file = self.get_access_info('prometheus')
-        verify = cert_file.name if cert_file else Settings.PROMETHEUS_API_SSL_VERIFY
+        user, password, ca_cert_file, cert_file, key_file = self.get_access_info('prometheus')
+        verify = ca_cert_file.name if ca_cert_file else Settings.PROMETHEUS_API_SSL_VERIFY
+        cert = (cert_file.name, key_file.name) if cert_file and key_file else None
         response = self._proxy(self._get_api_url(Settings.PROMETHEUS_API_HOST),
                                method, path, 'Prometheus', params, payload,
-                               user=user, password=password, verify=verify)
-        if cert_file:
-            cert_file.close()
-            os.unlink(cert_file.name)
+                               user=user, password=password, verify=verify,
+                               cert=cert)
+        for f in [ca_cert_file, cert_file, key_file]:
+            if f:
+                f.close()
+                os.unlink(f.name)
         return response
 
     def alert_proxy(self, method, path, params=None, payload=None):
         # type (str, str, dict, dict)
-        user, password, cert_file = self.get_access_info('alertmanager')
-        verify = cert_file.name if cert_file else Settings.ALERTMANAGER_API_SSL_VERIFY
+        user, password, ca_cert_file, cert_file, key_file = self.get_access_info('alertmanager')
+        verify = ca_cert_file.name if ca_cert_file else Settings.ALERTMANAGER_API_SSL_VERIFY
+        cert = (cert_file.name, key_file.name) if cert_file and key_file else None
         response = self._proxy(self._get_api_url(Settings.ALERTMANAGER_API_HOST, version='v2'),
                                method, path, 'Alertmanager', params, payload,
-                               user=user, password=password, verify=verify, is_alertmanager=True)
-        if cert_file:
-            cert_file.close()
-            os.unlink(cert_file.name)
+                               user=user, password=password, verify=verify,
+                               cert=cert, is_alertmanager=True)
+        for f in [ca_cert_file, cert_file, key_file]:
+            if f:
+                f.close()
+                os.unlink(f.name)
         return response
 
     def get_access_info(self, module_name):
-        # type (str, str, str)
+        # type (str, str, str, str, srt)
         if module_name not in ['prometheus', 'alertmanager']:
             raise DashboardException(f'Invalid module name {module_name}', component='prometheus')
         user = None
         password = None
         cert_file = None
+        pkey_file = None
+        ca_cert_file = None
 
         orch_backend = mgr.get_module_option_ex('orchestrator', 'orchestrator')
         if orch_backend == 'cephadm':
@@ -75,11 +83,25 @@ class PrometheusRESTController(RESTController):
                     user = access_info['user']
                     password = access_info['password']
                     certificate = access_info['certificate']
-                    cert_file = tempfile.NamedTemporaryFile(delete=False)
-                    cert_file.write(certificate.encode('utf-8'))
-                    cert_file.flush()
-
-        return user, password, cert_file
+                    ca_cert_file = tempfile.NamedTemporaryFile(delete=False)
+                    ca_cert_file.write(certificate.encode('utf-8'))
+                    ca_cert_file.flush()
+
+                    cert_file = None
+                    cert = mgr.get_localized_store("crt")  # type: ignore
+                    if cert is not None:
+                        cert_file = tempfile.NamedTemporaryFile(delete=False)
+                        cert_file.write(cert.encode('utf-8'))
+                        cert_file.flush()  # cert_tmp must not be gc'ed
+
+                    pkey_file = None
+                    pkey = mgr.get_localized_store("key")  # type: ignore
+                    if pkey is not None:
+                        pkey_file = tempfile.NamedTemporaryFile(delete=False)
+                        pkey_file.write(pkey.encode('utf-8'))
+                        pkey_file.flush()
+
+        return user, password, ca_cert_file, cert_file, pkey_file
 
     def _get_api_url(self, host, version='v1'):
         return f'{host.rstrip("/")}/api/{version}'
@@ -88,7 +110,7 @@ class PrometheusRESTController(RESTController):
         return ceph_service.CephService.send_command('mon', 'balancer status')
 
     def _proxy(self, base_url, method, path, api_name, params=None, payload=None, verify=True,
-               user=None, password=None, is_alertmanager=False):
+               user=None, password=None, is_alertmanager=False, cert=None):
         # type (str, str, str, str, dict, dict, bool)
         content = None
         try:
@@ -96,10 +118,11 @@ class PrometheusRESTController(RESTController):
             auth = HTTPBasicAuth(user, password) if user and password else None
             response = requests.request(method, base_url + path, params=params,
                                         json=payload, verify=verify,
+                                        cert=cert,
                                         auth=auth)
-        except Exception:
+        except Exception as e:
             raise DashboardException(
-                "Could not reach {}'s API on {}".format(api_name, base_url),
+                "Could not reach {}'s API on {} error {}".format(api_name, base_url, e),
                 http_status_code=404,
                 component='prometheus')
         try:
index 7ff3e5dc166599b05367b082706f091748387bc0..7f795a47450e1fa35e09d1d678f45ef7d30720e3 100644 (file)
@@ -39,7 +39,7 @@ class PrometheusControllerTest(ControllerTestCase):
         mock_request.assert_called_with('GET',
                                         self.prometheus_host_api + '/rules',
                                         json=None, params={},
-                                        verify=True, auth=None)
+                                        verify=True, cert=None, auth=None)
         assert mock_mon_command.called
 
     @patch("dashboard.controllers.prometheus.mgr.get_module_option_ex", return_value='cephadm')
@@ -55,7 +55,7 @@ class PrometheusControllerTest(ControllerTestCase):
                                         self.prometheus_host_api + '/rules',
                                         json=None,
                                         params={},
-                                        verify=True, auth=None)
+                                        verify=True, cert=None, auth=None)
         assert not mock_mon_command.called
 
     @patch("dashboard.controllers.prometheus.mgr.get_module_option_ex", lambda a, b, c=None: None)
@@ -63,14 +63,14 @@ class PrometheusControllerTest(ControllerTestCase):
         with patch('requests.request') as mock_request:
             self._get('/api/prometheus')
             mock_request.assert_called_with('GET', self.alert_host_api + '/alerts',
-                                            json=None, params={}, verify=True, auth=None)
+                                            json=None, params={}, verify=True, cert=None, auth=None)
 
     @patch("dashboard.controllers.prometheus.mgr.get_module_option_ex", lambda a, b, c=None: None)
     def test_get_silences(self):
         with patch('requests.request') as mock_request:
             self._get('/api/prometheus/silences')
             mock_request.assert_called_with('GET', self.alert_host_api + '/silences',
-                                            json=None, params={}, verify=True, auth=None)
+                                            json=None, params={}, verify=True, cert=None, auth=None)
 
     @patch("dashboard.controllers.prometheus.mgr.get_module_option_ex", lambda a, b, c=None: None)
     def test_add_silence(self):
@@ -78,7 +78,7 @@ class PrometheusControllerTest(ControllerTestCase):
             self._post('/api/prometheus/silence', {'id': 'new-silence'})
             mock_request.assert_called_with('POST', self.alert_host_api + '/silences',
                                             params=None, json={'id': 'new-silence'},
-                                            verify=True, auth=None)
+                                            verify=True, cert=None, auth=None)
 
     @patch("dashboard.controllers.prometheus.mgr.get_module_option_ex", lambda a, b, c=None: None)
     def test_update_silence(self):
@@ -86,14 +86,15 @@ class PrometheusControllerTest(ControllerTestCase):
             self._post('/api/prometheus/silence', {'id': 'update-silence'})
             mock_request.assert_called_with('POST', self.alert_host_api + '/silences',
                                             params=None, json={'id': 'update-silence'},
-                                            verify=True, auth=None)
+                                            verify=True, cert=None, auth=None)
 
     @patch("dashboard.controllers.prometheus.mgr.get_module_option_ex", lambda a, b, c=None: None)
     def test_expire_silence(self):
         with patch('requests.request') as mock_request:
             self._delete('/api/prometheus/silence/0')
             mock_request.assert_called_with('DELETE', self.alert_host_api + '/silence/0',
-                                            json=None, params=None, verify=True, auth=None)
+                                            json=None, params=None, verify=True, cert=None,
+                                            auth=None)
 
     def test_silences_empty_delete(self):
         with patch('requests.request') as mock_request:
index b302f702fc4b0a4fc3b63340b6e80d687fb32ac0..7584fabec0f80549251474bf507f1835a8c4e4a9 100644 (file)
@@ -793,6 +793,10 @@ class Orchestrator(object):
         """set prometheus access information"""
         raise NotImplementedError()
 
+    def generate_certificates(self, module_name: str) -> OrchResult[Optional[Dict[str, str]]]:
+        """set prometheus access information"""
+        raise NotImplementedError()
+
     def set_custom_prometheus_alerts(self, alerts_file: str) -> OrchResult[str]:
         """set prometheus custom alerts files and schedule reconfig of prometheus"""
         raise NotImplementedError()
index ecf10d900580e52903a6bfe5d892c15c5eec793d..484c2f39e9cf85d67b22f2f41bd2e3d7751926e6 100644 (file)
@@ -1207,6 +1207,15 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
 
         return _username, _password
 
+    @_cli_write_command('orch certmgr generate-certificates')
+    def _cert_mgr_generate_certificates(self, module_name: str) -> HandleCommandResult:
+        try:
+            completion = self.generate_certificates(module_name)
+            result = raise_if_exception(completion)
+            return HandleCommandResult(stdout=json.dumps(result))
+        except ArgumentError as e:
+            return HandleCommandResult(-errno.EINVAL, "", (str(e)))
+
     @_cli_write_command('orch prometheus set-credentials')
     def _set_prometheus_access_info(self, username: Optional[str] = None, password: Optional[str] = None, inbuf: Optional[str] = None) -> HandleCommandResult:
         try:
index 6f675e3be003853c6b7ea37a4f530810cf499bf5..7a4bca70fa459e5585bdc5b0db52efc8d9f23955 100644 (file)
@@ -10,13 +10,14 @@ import time
 import enum
 from packaging import version  # type: ignore
 from collections import namedtuple
+import tempfile
 
 from mgr_module import CLIReadCommand, MgrModule, MgrStandbyModule, PG_STATES, Option, ServiceInfoT, HandleCommandResult, CLIWriteCommand
 from mgr_util import get_default_addr, profile_method, build_url
 from orchestrator import OrchestratorClientMixin, raise_if_exception, OrchestratorError
 from rbd import RBD
 
-from typing import DefaultDict, Optional, Dict, Any, Set, cast, Tuple, Union, List, Callable
+from typing import DefaultDict, Optional, Dict, Any, Set, cast, Tuple, Union, List, Callable, IO
 
 LabelValues = Tuple[str, ...]
 Number = Union[int, float]
@@ -616,6 +617,8 @@ class Module(MgrModule, OrchestratorClientMixin):
 
     def __init__(self, *args: Any, **kwargs: Any) -> None:
         super(Module, self).__init__(*args, **kwargs)
+        self.key_file: IO[bytes]
+        self.cert_file: IO[bytes]
         self.metrics = self._setup_static_metrics()
         self.shutdown_event = threading.Event()
         self.collect_lock = threading.Lock()
@@ -1769,7 +1772,7 @@ class Module(MgrModule, OrchestratorClientMixin):
             'cephadm', 'secure_monitoring_stack', False)
         if cephadm_secure_monitoring_stack:
             try:
-                self.setup_cephadm_tls_config(server_addr, server_port)
+                self.setup_tls_config(server_addr, server_port)
                 return
             except Exception as e:
                 self.log.exception(f'Failed to setup cephadm based secure monitoring stack: {e}\n',
@@ -1789,28 +1792,29 @@ class Module(MgrModule, OrchestratorClientMixin):
         self.set_uri(build_url(scheme='http', host=self.get_server_addr(),
                      port=server_port, path='/'))
 
-    def setup_cephadm_tls_config(self, server_addr: str, server_port: int) -> None:
-        from cephadm.ssl_cert_utils import SSLCerts
-        # the ssl certs utils uses a NamedTemporaryFile for the cert files
-        # generated with generate_cert_files function. We need the SSLCerts
-        # object to not be cleaned up in order to have those temp files not
-        # be cleaned up, so making it an attribute of the module instead
-        # of just a standalone object
-        self.cephadm_monitoring_tls_ssl_certs = SSLCerts()
-        host = self.get_mgr_ip()
-        try:
-            old_cert = self.get_store('root/cert')
-            old_key = self.get_store('root/key')
-            if not old_cert or not old_key:
-                raise Exception('No old credentials for mgr-prometheus endpoint')
-            self.cephadm_monitoring_tls_ssl_certs.load_root_credentials(old_cert, old_key)
-        except Exception:
-            self.cephadm_monitoring_tls_ssl_certs.generate_root_cert(host)
-            self.set_store('root/cert', self.cephadm_monitoring_tls_ssl_certs.get_root_cert())
-            self.set_store('root/key', self.cephadm_monitoring_tls_ssl_certs.get_root_key())
-
-        cert_file_path, key_file_path = self.cephadm_monitoring_tls_ssl_certs.generate_cert_files(
-            self.get_hostname(), host)
+    def setup_tls_config(self, server_addr: str, server_port: int) -> None:
+        from mgr_util import verify_tls_files
+        cmd = {'prefix': 'orch certmgr generate-certificates',
+               'module_name': 'prometheus',
+               'format': 'json'}
+        ret, out, err = self.mon_command(cmd)
+        if ret != 0:
+            self.log.error(f'mon command to generate-certificates failed: {err}')
+            return
+        elif out is None:
+            self.log.error('mon command to generate-certificates failed to generate certificates')
+            return
+
+        cert_key = json.loads(out)
+        self.cert_file = tempfile.NamedTemporaryFile()
+        self.cert_file.write(cert_key['cert'].encode('utf-8'))
+        self.cert_file.flush()  # cert_tmp must not be gc'ed
+        self.key_file = tempfile.NamedTemporaryFile()
+        self.key_file.write(cert_key['key'].encode('utf-8'))
+        self.key_file.flush()  # pkey_tmp must not be gc'ed
+
+        verify_tls_files(self.cert_file.name, self.key_file.name)
+        cert_file_path, key_file_path = self.cert_file.name, self.key_file.name
 
         cherrypy.config.update({
             'server.socket_host': server_addr,