]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: adding mTLS support
authorRedouane Kachach <rkachach@ibm.com>
Wed, 3 Jul 2024 13:43:16 +0000 (15:43 +0200)
committerRedouane Kachach <rkachach@ibm.com>
Wed, 31 Jul 2024 17:37:12 +0000 (19:37 +0200)
Signed-off-by: Redouane Kachach <rkachach@ibm.com>
23 files changed:
src/cephadm/cephadmlib/daemons/mgmt_gateway.py
src/pybind/mgr/cephadm/cert_mgr.py
src/pybind/mgr/cephadm/http_server.py
src/pybind/mgr/cephadm/inventory.py
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/services/cephadmservice.py
src/pybind/mgr/cephadm/services/mgmt_gateway.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/ssl_cert_utils.py
src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2
src/pybind/mgr/cephadm/templates/services/alertmanager/web.yml.j2
src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2
src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2
src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2
src/pybind/mgr/cephadm/templates/services/node-exporter/web.yml.j2
src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2
src/pybind/mgr/cephadm/templates/services/prometheus/web.yml.j2
src/pybind/mgr/cephadm/tests/fixtures.py
src/pybind/mgr/cephadm/tests/test_cephadm.py
src/pybind/mgr/cephadm/tests/test_services.py
src/pybind/mgr/dashboard/controllers/prometheus.py
src/pybind/mgr/orchestrator/_interface.py
src/pybind/mgr/prometheus/module.py

index 93dfc275c41c9d2822afca48f5aa66e2ba2363be..b0a6f0579d2f3d28721ea435b01c2b74638be197 100644 (file)
@@ -104,9 +104,22 @@ class MgmtGateway(ContainerDaemonForm):
             raise OSError('data_dir is not a directory: %s' % (data_dir))
         logger.info('Writing mgmt-gateway config...')
         config_dir = os.path.join(data_dir, 'etc/')
-        makedirs(config_dir, uid, gid, 0o755)
-        recursive_chown(config_dir, uid, gid)
-        populate_files(config_dir, self.files, uid, gid)
+        ssl_dir = os.path.join(data_dir, 'etc/ssl')
+        for ddir in [config_dir, ssl_dir]:
+            makedirs(ddir, uid, gid, 0o755)
+            recursive_chown(ddir, uid, gid)
+        conf_files = {
+            fname: content
+            for fname, content in self.files.items()
+            if fname.endswith('.conf')
+        }
+        cert_files = {
+            fname: content
+            for fname, content in self.files.items()
+            if fname.endswith('.crt') or fname.endswith('.key')
+        }
+        populate_files(config_dir, conf_files, uid, gid)
+        populate_files(ssl_dir, cert_files, uid, gid)
 
     def _get_container_mounts(self, data_dir: str) -> Dict[str, str]:
         mounts: Dict[str, str] = {}
@@ -152,23 +165,6 @@ class MgmtGateway(ContainerDaemonForm):
                 os.path.join(
                     data_dir, 'etc/nginx_external_server.conf'
                 ): '/etc/nginx_external_server.conf:Z',
-                os.path.join(
-                    data_dir, 'etc/nginx_internal.crt'
-                ): '/etc/nginx/ssl/nginx_internal.crt:Z',
-                os.path.join(
-                    data_dir, 'etc/nginx_internal.key'
-                ): '/etc/nginx/ssl/nginx_internal.key:Z',
+                os.path.join(data_dir, 'etc/ssl'): '/etc/nginx/ssl/',
             }
         )
-
-        if 'nginx.crt' in self.files:
-            mounts.update(
-                {
-                    os.path.join(
-                        data_dir, 'etc/nginx.crt'
-                    ): '/etc/nginx/ssl/nginx.crt:Z',
-                    os.path.join(
-                        data_dir, 'etc/nginx.key'
-                    ): '/etc/nginx/ssl/nginx.key:Z',
-                }
-            )
index 9b87c4651d86605579743abc5cfff3bc566d795f..e1715424a95c2071fdd81e2efb40cc9aacef0878 100644 (file)
@@ -1,6 +1,5 @@
 
-from cephadm.ssl_cert_utils import SSLCerts
-from threading import Lock
+from cephadm.ssl_cert_utils import SSLCerts, SSLConfigException
 from typing import TYPE_CHECKING, Tuple, Union, List
 
 if TYPE_CHECKING:
@@ -13,31 +12,21 @@ class CertMgr:
     CEPHADM_ROOT_CA_KEY = 'cephadm_root_ca_key'
 
     def __init__(self, mgr: "CephadmOrchestrator", ip: str) -> None:
-        self.lock = Lock()
-        self.initialized = False
-        with self.lock:
-            if self.initialized:
-                return
-            self.initialized = True
-            self.mgr = mgr
-            self.ssl_certs: SSLCerts = SSLCerts()
-            old_cert = self.mgr.cert_key_store.get_cert(self.CEPHADM_ROOT_CA_CERT)
-            old_key = self.mgr.cert_key_store.get_key(self.CEPHADM_ROOT_CA_KEY)
-            if old_key and old_cert:
+        self.ssl_certs: SSLCerts = SSLCerts()
+        old_cert = mgr.cert_key_store.get_cert(self.CEPHADM_ROOT_CA_CERT)
+        old_key = mgr.cert_key_store.get_key(self.CEPHADM_ROOT_CA_KEY)
+        if old_key and old_cert:
+            try:
                 self.ssl_certs.load_root_credentials(old_cert, old_key)
-            else:
-                self.ssl_certs.generate_root_cert(ip)
-                self.mgr.cert_key_store.save_cert(self.CEPHADM_ROOT_CA_CERT, self.ssl_certs.get_root_cert())
-                self.mgr.cert_key_store.save_key(self.CEPHADM_ROOT_CA_KEY, self.ssl_certs.get_root_key())
+            except SSLConfigException:
+                raise Exception("Cannot load cephadm root CA certificates.")
+        else:
+            self.ssl_certs.generate_root_cert(ip)
+            mgr.cert_key_store.save_cert(self.CEPHADM_ROOT_CA_CERT, self.ssl_certs.get_root_cert())
+            mgr.cert_key_store.save_key(self.CEPHADM_ROOT_CA_KEY, self.ssl_certs.get_root_key())
 
     def get_root_ca(self) -> str:
-        with self.lock:
-            if self.initialized:
-                return self.ssl_certs.get_root_cert()
-        raise Exception("Not initialized")
-
-    def generate_cert(self, host_fqdn: Union[str, List[str]], node_ip: str) -> Tuple[str, str]:
-        with self.lock:
-            if self.initialized:
-                return self.ssl_certs.generate_cert(host_fqdn, node_ip)
-        raise Exception("Not initialized")
+        return self.ssl_certs.get_root_cert()
+
+    def generate_cert(self, host_fqdn: Union[str, List[str]], node_ip: Union[str, List[str]]) -> Tuple[str, str]:
+        return self.ssl_certs.generate_cert(host_fqdn, node_ip)
index 56a87bdcf642f8008fc61bf6bd55642d4fc570e5..7ddce2e8be279e62a4b0b780531f5ffaba63feb1 100644 (file)
@@ -31,7 +31,8 @@ class CephadmHttpServer(threading.Thread):
         self.service_discovery = ServiceDiscovery(mgr)
         self.cherrypy_shutdown_event = threading.Event()
         self._service_discovery_port = self.mgr.service_discovery_port
-        self.secure_monitoring_stack = self.mgr.secure_monitoring_stack
+        security_enabled, mgmt_gw_enabled = self.mgr._get_security_config()
+        self.security_enabled = security_enabled
         super().__init__(target=self.run)
 
     def configure_cherrypy(self) -> None:
@@ -45,12 +46,13 @@ class CephadmHttpServer(threading.Thread):
         self.agent.configure()
         self.service_discovery.configure(self.mgr.service_discovery_port,
                                          self.mgr.get_mgr_ip(),
-                                         self.secure_monitoring_stack)
+                                         self.security_enabled)
 
     def config_update(self) -> None:
         self.service_discovery_port = self.mgr.service_discovery_port
-        if self.secure_monitoring_stack != self.mgr.secure_monitoring_stack:
-            self.secure_monitoring_stack = self.mgr.secure_monitoring_stack
+        security_enabled, mgmt_gw_enabled = self.mgr._get_security_config()
+        if self.security_enabled != security_enabled:
+            self.security_enabled = security_enabled
             self.restart()
 
     @property
index 64d286e30ac18b0a667636db7328aca490c89783..5a89b36280924c85c97f2f0dbeec966d428457bd 100644 (file)
@@ -1942,9 +1942,7 @@ class CertKeyStore():
             'nvmeof_server_cert': {},  # service-name -> cert
             'nvmeof_client_cert': {},  # service-name -> cert
             'nvmeof_root_ca_cert': {},  # service-name -> cert
-            'agent_endpoint_root_cert': Cert(),  # cert
-            'mgmt_gw_root_cert': Cert(),  # cert
-            'service_discovery_root_cert': Cert(),  # cert
+            'mgmt_gw_cert': Cert(),  # cert
             'cephadm_root_ca_cert': Cert(),  # cert
             'grafana_cert': {},  # host -> cert
         }
@@ -1952,6 +1950,7 @@ class CertKeyStore():
         # that don't have a key here are probably certs in PEM format
         # so there is no need to store a separate key
         self.known_keys = {
+            'mgmt_gw_key': PrivKey(),  # cert
             'cephadm_root_ca_key': PrivKey(),  # cert
             'grafana_key': {},  # host -> key
             'iscsi_ssl_key': {},  # service-name -> key
index 97a9404a31c1e0aee9fb96a4db570e47878d9917..51d677ec6cd6d36fcb8ca5cc948ad3713763272c 100644 (file)
@@ -6,6 +6,7 @@ import ipaddress
 import logging
 import re
 import shlex
+import socket
 from collections import defaultdict
 from configparser import ConfigParser
 from contextlib import contextmanager
@@ -771,6 +772,23 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
         assert service_type in ServiceSpec.KNOWN_SERVICE_TYPES
         return self.cephadm_services[service_type]
 
+    def get_fqdn(self, hostname: str) -> str:
+        """Get a host's FQDN with its hostname.
+
+           If the FQDN can't be resolved, the address from the inventory will
+           be returned instead.
+        """
+        # TODO(redo): get fqdn from the inventory
+        addr = self.inventory.get_addr(hostname)
+        return socket.getfqdn(addr)
+
+    def _get_security_config(self) -> Tuple[bool, bool]:
+        # TODO(redo): enable when oauth2-proxy code is active
+        # oauth2_proxy_enabled = len(self.mgr.cache.get_daemons_by_service('oauth2-proxy')) > 0
+        mgmt_gw_enabled = len(self.cache.get_daemons_by_service('mgmt-gateway')) > 0
+        security_enabled = self.secure_monitoring_stack or mgmt_gw_enabled
+        return security_enabled, mgmt_gw_enabled
+
     def _get_cephadm_binary_path(self) -> str:
         import hashlib
         m = hashlib.sha256()
@@ -2611,9 +2629,6 @@ Then run the following:
                 raise OrchestratorError(
                     f'If {service_name} is removed then the following OSDs will remain, --force to proceed anyway\n{msg}')
 
-        if service_name == 'mgmt-gateway':
-            self.set_module_option('secure_monitoring_stack', False)
-
         found = self.spec_store.rm(service_name)
         if found and service_name.startswith('osd.'):
             self.spec_store.finally_rm(service_name)
@@ -2943,21 +2958,26 @@ Then run the following:
             # add dependency on ceph-exporter daemons
             deps += [d.name() for d in self.cache.get_daemons_by_service('ceph-exporter')]
             deps += [d.name() for d in self.cache.get_daemons_by_service('mgmt-gateway')]
-            if self.secure_monitoring_stack:
+            security_enabled, _ = self._get_security_config()
+            if security_enabled:
                 if prometheus_user and prometheus_password:
                     deps.append(f'{hash(prometheus_user + prometheus_password)}')
                 if alertmanager_user and alertmanager_password:
                     deps.append(f'{hash(alertmanager_user + alertmanager_password)}')
         elif daemon_type == 'grafana':
             deps += get_daemon_names(['prometheus', 'loki', 'mgmt-gateway'])
-            if self.secure_monitoring_stack and prometheus_user and prometheus_password:
+            security_enabled, _ = self._get_security_config()
+            if security_enabled and prometheus_user and prometheus_password:
                 deps.append(f'{hash(prometheus_user + prometheus_password)}')
         elif daemon_type == 'alertmanager':
             deps += get_daemon_names(['mgr', 'alertmanager', 'snmp-gateway', 'mgmt-gateway'])
-            if self.secure_monitoring_stack and alertmanager_user and alertmanager_password:
+            security_enabled, _ = self._get_security_config()
+            if security_enabled and alertmanager_user and alertmanager_password:
                 deps.append(f'{hash(alertmanager_user + alertmanager_password)}')
         elif daemon_type == 'promtail':
             deps += get_daemon_names(['loki'])
+        elif daemon_type in ['ceph-exporter', 'node-exporter']:
+            deps += get_daemon_names(['mgmt-gateway'])
         elif daemon_type == JaegerAgentService.TYPE:
             for dd in self.cache.get_daemons_by_type(JaegerCollectorService.TYPE):
                 assert dd.hostname is not None
@@ -2972,7 +2992,7 @@ Then run the following:
             # this daemon type doesn't need deps mgmt
             pass
 
-        if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana', 'mgmt-gateway']:
+        if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana']:
             deps.append(f'secure_monitoring_stack:{self.secure_monitoring_stack}')
 
         return sorted(deps)
@@ -3088,10 +3108,17 @@ Then run the following:
 
     @handle_orch_error
     def generate_certificates(self, module_name: str) -> Optional[Dict[str, str]]:
+        import socket
         supported_moduels = ['dashboard', 'prometheus']
         if module_name not in supported_moduels:
             raise OrchestratorError(f'Unsupported modlue {module_name}. Supported moduels are: {supported_moduels}')
-        cert, key = self.cert_mgr.generate_cert(self.get_hostname(), self.get_mgr_ip())
+
+        host_fqdns = [socket.getfqdn(self.get_hostname())]
+        node_ip = self.get_mgr_ip()
+        if module_name == 'dashboard':
+            host_fqdns.append('dashboard_servers')
+
+        cert, key = self.cert_mgr.generate_cert(host_fqdns, node_ip)
         return {'cert': cert, 'key': key}
 
     @handle_orch_error
@@ -3148,6 +3175,9 @@ Then run the following:
 
     @handle_orch_error
     def get_prometheus_access_info(self) -> Dict[str, str]:
+        security_enabled, _ = self._get_security_config()
+        if not security_enabled:
+            return {}
         user, password = self._get_prometheus_credentials()
         return {'user': user,
                 'password': password,
@@ -3155,6 +3185,9 @@ Then run the following:
 
     @handle_orch_error
     def get_alertmanager_access_info(self) -> Dict[str, str]:
+        security_enabled, _ = self._get_security_config()
+        if not security_enabled:
+            return {}
         user, password = self._get_alertmanager_credentials()
         return {'user': user,
                 'password': password,
@@ -3403,9 +3436,6 @@ Then run the following:
         host_count = len(self.inventory.keys())
         max_count = self.max_count_per_host
 
-        if spec.service_type == 'mgmt-gateway':
-            self.set_module_option('secure_monitoring_stack', True)
-
         if spec.placement.count is not None:
             if spec.service_type in ['mon', 'mgr']:
                 if spec.placement.count > max(5, host_count):
index 4ef6207187575dc0ee5a124c09c0017f3008e6e1..2964a44e2c3b188aa5d854f7b3ca1471dde25709 100644 (file)
@@ -90,7 +90,7 @@ def get_dashboard_endpoints(svc: 'CephadmService') -> Tuple[List[str], Optional[
             if not port:
                 continue
             assert dd.hostname is not None
-            addr = svc._inventory_get_fqdn(dd.hostname)
+            addr = svc.mgr.get_fqdn(dd.hostname)
             dashboard_endpoints.append(f'{addr}:{port}')
 
     return dashboard_endpoints, protocol
@@ -124,7 +124,7 @@ def get_dashboard_urls(svc: 'CephadmService') -> List[str]:
         if dd.daemon_id == svc.mgr.get_mgr_id():
             continue
         assert dd.hostname is not None
-        addr = svc._inventory_get_fqdn(dd.hostname)
+        addr = svc.mgr.get_fqdn(dd.hostname)
         dashboard_urls.append(build_url(scheme=proto, host=addr, port=port).rstrip('/'))
 
     return dashboard_urls
@@ -384,15 +384,6 @@ class CephadmService(metaclass=ABCMeta):
                 raise OrchestratorError(f"Unable to fetch keyring for {entity}: {err}")
         return simplified_keyring(entity, keyring)
 
-    def _inventory_get_fqdn(self, hostname: str) -> str:
-        """Get a host's FQDN with its hostname.
-
-           If the FQDN can't be resolved, the address from the inventory will
-           be returned instead.
-        """
-        addr = self.mgr.inventory.get_addr(hostname)
-        return socket.getfqdn(addr)
-
     def _set_value_on_dashboard(self,
                                 service_name: str,
                                 get_mon_cmd: str,
@@ -1282,11 +1273,29 @@ class CephExporterService(CephService):
         if spec.stats_period:
             exporter_config.update({'stats-period': f'{spec.stats_period}'})
 
+        security_enabled, mgmt_gw_enabled = self.mgr._get_security_config()
+        if security_enabled:
+            exporter_config.update({'https_enabled': True})
+            crt, key = self.get_certificates(daemon_spec)
+            exporter_config['files'] = {
+                'ceph-exporter.crt': crt,
+                'ceph-exporter.key': key
+            }
         daemon_spec.keyring = keyring
         daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
         daemon_spec.final_config = merge_dicts(daemon_spec.final_config, exporter_config)
+
+        deps = []
+        deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')]
+        deps += [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}']
+        daemon_spec.deps = deps
+
         return daemon_spec
 
+    def get_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]:
+        node_ip = self.mgr.inventory.get_addr(daemon_spec.host)
+        host_fqdn = self.mgr.get_fqdn(daemon_spec.host)
+        return self.mgr.cert_mgr.generate_cert(host_fqdn, node_ip)
 
 class CephfsMirrorService(CephService):
     TYPE = 'cephfs-mirror'
index 610634f92d8ede5c3427522594a10c983d1c4653..2470b7de4cbba7cf35b028dc3271ab17017a89e6 100644 (file)
@@ -1,13 +1,44 @@
 import logging
-from typing import List, Any, Tuple, Dict, cast
+from typing import TYPE_CHECKING, List, Any, Tuple, Dict, cast, Optional
 
 from orchestrator import DaemonDescription
 from ceph.deployment.service_spec import MgmtGatewaySpec, GrafanaSpec
 from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec, get_dashboard_endpoints
+from mgr_util import build_url
+
+if TYPE_CHECKING:
+    from cephadm.module import CephadmOrchestrator
 
 logger = logging.getLogger(__name__)
 
 
+def get_mgmt_gw_internal_endpoint(mgr: "CephadmOrchestrator") -> Optional[str]:
+    mgmt_gw_daemons = mgr.cache.get_daemons_by_service('mgmt-gateway')
+    if not mgmt_gw_daemons:
+        return None
+
+    dd = mgmt_gw_daemons[0]
+    assert dd.hostname is not None
+    mgmt_gw_addr = mgr.get_fqdn(dd.hostname)
+    mgmt_gw_internal_endpoint = build_url(scheme='https', host=mgmt_gw_addr, port=MgmtGatewayService.INTERNAL_SERVICE_PORT)
+    return f'{mgmt_gw_internal_endpoint}/internal'
+
+
+def get_mgmt_gw_external_endpoint(mgr: "CephadmOrchestrator") -> Optional[str]:
+    mgmt_gw_daemons = mgr.cache.get_daemons_by_service('mgmt-gateway')
+    if not mgmt_gw_daemons:
+        return None
+
+    dd = mgmt_gw_daemons[0]
+    assert dd.hostname is not None
+    mgmt_gw_port = dd.ports[0] if dd.ports else None
+    mgmt_gw_addr = mgr.get_fqdn(dd.hostname)
+    mgmt_gw_spec = cast(MgmtGatewaySpec, mgr.spec_store['mgmt-gateway'].spec)
+    protocol = 'http' if mgmt_gw_spec.disable_https else 'https'
+    mgmt_gw_external_endpoint = build_url(scheme=protocol, host=mgmt_gw_addr, port=mgmt_gw_port)
+    return mgmt_gw_external_endpoint
+
+
 class MgmtGatewayService(CephadmService):
     TYPE = 'mgmt-gateway'
     SVC_TEMPLATE_PATH = 'services/mgmt-gateway/nginx.conf.j2'
@@ -40,17 +71,31 @@ class MgmtGatewayService(CephadmService):
         self.mgr.set_module_option_ex('dashboard', 'standby_error_status_code', '503')
         self.mgr.set_module_option_ex('dashboard', 'standby_behaviour', 'error')
 
-    def get_certificates(self, svc_spec: MgmtGatewaySpec, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str, str, str]:
+    def get_external_certificates(self, svc_spec: MgmtGatewaySpec, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]:
+        cert = self.mgr.cert_key_store.get_cert('mgmt_gw_cert')
+        key = self.mgr.cert_key_store.get_key('mgmt_gw_key')
+        if not (cert and key):
+            # not available on store, check if provided on the spec
+            if svc_spec.ssl_certificate and svc_spec.ssl_certificate_key:
+                cert = svc_spec.ssl_certificate
+                key = svc_spec.ssl_certificate_key
+            else:
+                # not provided on the spec, let's generate self-sigend certificates
+                addr = self.mgr.inventory.get_addr(daemon_spec.host)
+                host_fqdn = self.mgr.get_fqdn(daemon_spec.host)
+                cert, key = self.mgr.cert_mgr.generate_cert(host_fqdn, addr)
+            # save certificates
+            if cert and key:
+                self.mgr.cert_key_store.save_cert('mgmt_gw_cert', cert)
+                self.mgr.cert_key_store.save_key('mgmt_gw_key', key)
+            else:
+                logger.error("Failed to obtain certificate and key from mgmt-gateway.")
+        return cert, key
+
+    def get_internal_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]:
         node_ip = self.mgr.inventory.get_addr(daemon_spec.host)
-        host_fqdn = self._inventory_get_fqdn(daemon_spec.host)
-        internal_cert, internal_pkey = self.mgr.cert_mgr.generate_cert(host_fqdn, node_ip)
-        cert = svc_spec.ssl_certificate
-        pkey = svc_spec.ssl_certificate_key
-        if not (cert and pkey):
-            # In case the user has not provided certificates then we generate self-signed ones
-            cert, pkey = self.mgr.cert_mgr.generate_cert(host_fqdn, node_ip)
-
-        return internal_cert, internal_pkey, cert, pkey
+        host_fqdn = self.mgr.get_fqdn(daemon_spec.host)
+        return self.mgr.cert_mgr.generate_cert(host_fqdn, node_ip)
 
     def get_mgmt_gateway_deps(self) -> List[str]:
         # url_prefix for the following services depends on the presence of mgmt-gateway
@@ -58,8 +103,6 @@ class MgmtGatewayService(CephadmService):
         deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('prometheus')]
         deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('alertmanager')]
         deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('grafana')]
-        # secure_monitoring_stack affects the protocol used by monitoring services
-        deps += [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}']
         for dd in self.mgr.cache.get_daemons_by_service('mgr'):
             # we consider mgr a dep even if the dashboard is disabled
             # in order to be consistent with _calc_daemon_deps().
@@ -70,9 +113,8 @@ class MgmtGatewayService(CephadmService):
     def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
         assert self.TYPE == daemon_spec.daemon_type
         svc_spec = cast(MgmtGatewaySpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+        scheme = 'https'
         dashboard_endpoints, dashboard_scheme = get_dashboard_endpoints(self)
-        scheme = 'https' if self.mgr.secure_monitoring_stack else 'http'
-
         prometheus_endpoints = self.get_service_endpoints('prometheus')
         alertmanager_endpoints = self.get_service_endpoints('alertmanager')
         grafana_endpoints = self.get_service_endpoints('grafana')
@@ -88,20 +130,11 @@ class MgmtGatewayService(CephadmService):
             'alertmanager_endpoints': alertmanager_endpoints,
             'grafana_endpoints': grafana_endpoints
         }
-        external_server_context = {
+        server_context = {
             'spec': svc_spec,
+            'internal_port': self.INTERNAL_SERVICE_PORT,
             'dashboard_scheme': dashboard_scheme,
-            'grafana_scheme': grafana_protocol,
-            'prometheus_scheme': scheme,
-            'alertmanager_scheme': scheme,
             'dashboard_endpoints': dashboard_endpoints,
-            'prometheus_endpoints': prometheus_endpoints,
-            'alertmanager_endpoints': alertmanager_endpoints,
-            'grafana_endpoints': grafana_endpoints
-        }
-        internal_server_context = {
-            'spec': svc_spec,
-            'internal_port': self.INTERNAL_SERVICE_PORT,
             'grafana_scheme': grafana_protocol,
             'prometheus_scheme': scheme,
             'alertmanager_scheme': scheme,
@@ -110,19 +143,21 @@ class MgmtGatewayService(CephadmService):
             'grafana_endpoints': grafana_endpoints
         }
 
-        internal_cert, internal_pkey, cert, pkey = self.get_certificates(svc_spec, daemon_spec)
+        cert, key = self.get_external_certificates(svc_spec, daemon_spec)
+        internal_cert, internal_pkey = self.get_internal_certificates(daemon_spec)
         daemon_config = {
             "files": {
                 "nginx.conf": self.mgr.template.render(self.SVC_TEMPLATE_PATH, main_context),
-                "nginx_external_server.conf": self.mgr.template.render(self.EXTERNAL_SVC_TEMPLATE_PATH, external_server_context),
-                "nginx_internal_server.conf": self.mgr.template.render(self.INTERNAL_SVC_TEMPLATE_PATH, internal_server_context),
+                "nginx_external_server.conf": self.mgr.template.render(self.EXTERNAL_SVC_TEMPLATE_PATH, server_context),
+                "nginx_internal_server.conf": self.mgr.template.render(self.INTERNAL_SVC_TEMPLATE_PATH, server_context),
                 "nginx_internal.crt": internal_cert,
-                "nginx_internal.key": internal_pkey
+                "nginx_internal.key": internal_pkey,
+                "ca.crt": self.mgr.cert_mgr.get_root_ca()
             }
         }
         if not svc_spec.disable_https:
             daemon_config["files"]["nginx.crt"] = cert
-            daemon_config["files"]["nginx.key"] = pkey
+            daemon_config["files"]["nginx.key"] = key
 
         return daemon_config, sorted(self.get_mgmt_gateway_deps())
 
@@ -133,3 +168,7 @@ class MgmtGatewayService(CephadmService):
         # reset the standby dashboard redirection behaviour
         self.mgr.set_module_option_ex('dashboard', 'standby_error_status_code', '500')
         self.mgr.set_module_option_ex('dashboard', 'standby_behaviour', 'redirect')
+        if daemon.hostname is not None:
+            # delete cert/key entires for this mgmt-gateway daemon
+            self.mgr.cert_key_store.rm_cert('mgmt_gw_cert')
+            self.mgr.cert_key_store.rm_key('mgmt_gw_key')
index a20b1202ccd143fddd740e0381e350ff943c6647..3a20bbfe4853ae07f3ace03fb6bb8ca27c8a3212 100644 (file)
@@ -8,10 +8,10 @@ from mgr_module import HandleCommandResult
 
 from orchestrator import DaemonDescription
 from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \
-    SNMPGatewaySpec, PrometheusSpec, MgmtGatewaySpec
+    SNMPGatewaySpec, PrometheusSpec
 from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec, get_dashboard_urls
-from cephadm.services.mgmt_gateway import MgmtGatewayService
-from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url, get_cert_issuer_info, password_hash
+from cephadm.services.mgmt_gateway import get_mgmt_gw_internal_endpoint, get_mgmt_gw_external_endpoint
+from mgr_util import verify_tls, ServerConfigException, build_url, get_cert_issuer_info, password_hash
 from ceph.deployment.utils import wrap_ipv6
 
 logger = logging.getLogger(__name__)
@@ -28,11 +28,12 @@ class GrafanaService(CephadmService):
 
     def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
         assert self.TYPE == daemon_spec.daemon_type
-        prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
         deps = []  # type: List[str]
-        if self.mgr.secure_monitoring_stack and prometheus_user and prometheus_password:
-            deps.append(f'{hash(prometheus_user + prometheus_password)}')
+        security_enabled, mgmt_gw_enabled = self.mgr._get_security_config()
         deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
+        prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
+        if security_enabled and prometheus_user and prometheus_password:
+            deps.append(f'{hash(prometheus_user + prometheus_password)}')
 
         # add a dependency since url_prefix depends on the existence of mgmt-gateway
         deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')]
@@ -40,31 +41,40 @@ class GrafanaService(CephadmService):
         prom_services = []  # type: List[str]
         for dd in self.mgr.cache.get_daemons_by_service('prometheus'):
             assert dd.hostname is not None
-            addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
+            addr = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname)
             port = dd.ports[0] if dd.ports else 9095
-            protocol = 'https' if self.mgr.secure_monitoring_stack else 'http'
+            protocol = 'https' if security_enabled else 'http'
             prom_services.append(build_url(scheme=protocol, host=addr, port=port))
-
             deps.append(dd.name())
 
+        # in case mgmt-gw is enabeld we only use one url pointing to the internal
+        # mgmt gw for dashboard which will take care of HA in this case
+        if mgmt_gw_enabled:
+            prom_services = [f'{get_mgmt_gw_internal_endpoint(self.mgr)}/prometheus']
+
         daemons = self.mgr.cache.get_daemons_by_service('loki')
         loki_host = ''
         for i, dd in enumerate(daemons):
             assert dd.hostname is not None
             if i == 0:
-                addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
+                addr = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname)
                 loki_host = build_url(scheme='http', host=addr, port=3100)
 
             deps.append(dd.name())
 
-        root_cert = self.mgr.http_server.service_discovery.ssl_certs.get_root_cert()
+        root_cert = self.mgr.cert_mgr.get_root_ca()
+        cert, pkey = self.prepare_certificates(daemon_spec)
         oneline_root_cert = '\\n'.join([line.strip() for line in root_cert.splitlines()])
+        oneline_cert = '\\n'.join([line.strip() for line in cert.splitlines()])
+        oneline_key = '\\n'.join([line.strip() for line in pkey.splitlines()])
         grafana_data_sources = self.mgr.template.render('services/grafana/ceph-dashboard.yml.j2',
                                                         {'hosts': prom_services,
                                                          'prometheus_user': prometheus_user,
                                                          'prometheus_password': prometheus_password,
                                                          'cephadm_root_ca': oneline_root_cert,
-                                                         'security_enabled': self.mgr.secure_monitoring_stack,
+                                                         'cert': oneline_cert,
+                                                         'key': oneline_key,
+                                                         'security_enabled': security_enabled,
                                                          'loki_host': loki_host})
 
         spec: GrafanaSpec = cast(
@@ -80,7 +90,6 @@ class GrafanaService(CephadmService):
                 daemon_spec.port_ips = {str(grafana_port): ip_to_bind_to}
                 grafana_ip = ip_to_bind_to
 
-        mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0
         grafana_ini = self.mgr.template.render(
             'services/grafana/grafana.ini.j2', {
                 'anonymous_access': spec.anonymous_access,
@@ -103,7 +112,6 @@ class GrafanaService(CephadmService):
             }
         )
 
-        cert, pkey = self.prepare_certificates(daemon_spec)
         config_file = {
             'files': {
                 "grafana.ini": grafana_ini,
@@ -190,19 +198,12 @@ class GrafanaService(CephadmService):
         # TODO: signed cert
         dd = self.get_active_daemon(daemon_descrs)
         assert dd.hostname is not None
-        addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
+        addr = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname)
         port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
         spec = cast(GrafanaSpec, self.mgr.spec_store[dd.service_name()].spec)
 
-        mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway')
-        if mgmt_gw_daemons:
-            dd = mgmt_gw_daemons[0]
-            assert dd.hostname is not None
-            mgmt_gw_spec = cast(MgmtGatewaySpec, self.mgr.spec_store['mgmt-gateway'].spec)
-            mgmt_gw_port = dd.ports[0] if dd.ports else None
-            mgmt_gw_addr = self._inventory_get_fqdn(dd.hostname)
-            protocol = 'http' if mgmt_gw_spec.disable_https else 'https'
-            mgmt_gw_external_endpoint = build_url(scheme=protocol, host=mgmt_gw_addr, port=mgmt_gw_port)
+        mgmt_gw_external_endpoint = get_mgmt_gw_external_endpoint(self.mgr)
+        if mgmt_gw_external_endpoint is not None:
             self._set_value_on_dashboard(
                 'Grafana',
                 'dashboard get-grafana-api-url',
@@ -256,7 +257,7 @@ class AlertmanagerService(CephadmService):
 
     def get_alertmanager_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]:
         node_ip = self.mgr.inventory.get_addr(daemon_spec.host)
-        host_fqdn = self._inventory_get_fqdn(daemon_spec.host)
+        host_fqdn = self.mgr.get_fqdn(daemon_spec.host)
         cert, key = self.mgr.cert_mgr.generate_cert([host_fqdn, "alertmanager_servers"], node_ip)
         return cert, key
 
@@ -283,19 +284,25 @@ class AlertmanagerService(CephadmService):
             # in order to be consistent with _calc_daemon_deps().
             deps.append(dd.name())
 
+        security_enabled, mgmt_gw_enabled = self.mgr._get_security_config()
+        if mgmt_gw_enabled:
+            dashboard_urls = [f'{get_mgmt_gw_internal_endpoint(self.mgr)}/dashboard']
+        else:
+            dashboard_urls = get_dashboard_urls(self)
+
         snmp_gateway_urls: List[str] = []
         for dd in self.mgr.cache.get_daemons_by_service('snmp-gateway'):
             assert dd.hostname is not None
             assert dd.ports
-            addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
+            addr = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname)
             deps.append(dd.name())
 
             snmp_gateway_urls.append(build_url(scheme='http', host=addr,
                                      port=dd.ports[0], path='/alerts'))
 
         context = {
-            'secure_monitoring_stack': self.mgr.secure_monitoring_stack,
-            'dashboard_urls': get_dashboard_urls(self),
+            'security_enabled': security_enabled,
+            'dashboard_urls': dashboard_urls,
             'default_webhook_urls': default_webhook_urls,
             'snmp_gateway_urls': snmp_gateway_urls,
             'secure': secure,
@@ -307,17 +314,18 @@ class AlertmanagerService(CephadmService):
         for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
             assert dd.hostname is not None
             deps.append(dd.name())
-            addr = self._inventory_get_fqdn(dd.hostname)
+            addr = self.mgr.get_fqdn(dd.hostname)
             peers.append(build_url(host=addr, port=port).lstrip('/'))
 
-        mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0
         deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
-        if self.mgr.secure_monitoring_stack:
+        if security_enabled:
             alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
             if alertmanager_user and alertmanager_password:
                 deps.append(f'{hash(alertmanager_user + alertmanager_password)}')
             cert, key = self.get_alertmanager_certificates(daemon_spec)
             context = {
+                'enable_mtls': mgmt_gw_enabled,
+                'enable_basic_auth': True,  # TODO(redo): disable when ouath2-proxy is enabled
                 'alertmanager_web_user': alertmanager_user,
                 'alertmanager_web_password': password_hash(alertmanager_password),
             }
@@ -327,7 +335,7 @@ class AlertmanagerService(CephadmService):
                     'alertmanager.crt': cert,
                     'alertmanager.key': key,
                     'web.yml': self.mgr.template.render('services/alertmanager/web.yml.j2', context),
-                    'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert()
+                    'root_cert.pem': self.mgr.cert_mgr.get_root_ca()
                 },
                 'peers': peers,
                 'web_config': '/etc/alertmanager/web.yml',
@@ -352,21 +360,16 @@ class AlertmanagerService(CephadmService):
     def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
         dd = self.get_active_daemon(daemon_descrs)
         assert dd.hostname is not None
-        addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
+        addr = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname)
         port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
-        protocol = 'https' if self.mgr.secure_monitoring_stack else 'http'
-
-        mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway')
-        if mgmt_gw_daemons:
-            dd = mgmt_gw_daemons[0]
-            assert dd.hostname is not None
-            mgmt_gw_addr = self._inventory_get_fqdn(dd.hostname)
-            mgmt_gw_internal_endpoint = build_url(scheme='https', host=mgmt_gw_addr, port=MgmtGatewayService.INTERNAL_SERVICE_PORT)
+        security_enabled, mgmt_gw_enabled = self.mgr._get_security_config()
+        protocol = 'https' if security_enabled else 'http'
+        if mgmt_gw_enabled:
             self._set_value_on_dashboard(
                 'AlertManager',
                 'dashboard get-alertmanager-api-host',
                 'dashboard set-alertmanager-api-host',
-                f'{mgmt_gw_internal_endpoint}/internal/alertmanager'
+                f'{get_mgmt_gw_internal_endpoint(self.mgr)}/alertmanager'
             )
             self._set_value_on_dashboard(
                 'Alertmanager',
@@ -413,8 +416,8 @@ class PrometheusService(CephadmService):
 
     def get_mgr_prometheus_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]:
         node_ip = self.mgr.inventory.get_addr(daemon_spec.host)
-        host_fqdn = self._inventory_get_fqdn(daemon_spec.host)
-        cert, key = self.mgr.cert_mgr.generate_cert([host_fqdn, "prometheus_servers"], node_ip)
+        host_fqdn = self.mgr.get_fqdn(daemon_spec.host)
+        cert, key = self.mgr.cert_mgr.generate_cert([host_fqdn, 'prometheus_servers'], node_ip)
         return cert, key
 
     def prepare_create(
@@ -450,9 +453,10 @@ class PrometheusService(CephadmService):
             retention_size = '0'
 
         # build service discovery end-point
+        security_enabled, mgmt_gw_enabled = self.mgr._get_security_config()
         port = self.mgr.service_discovery_port
         mgr_addr = wrap_ipv6(self.mgr.get_mgr_ip())
-        protocol = 'https' if self.mgr.secure_monitoring_stack else 'http'
+        protocol = 'https' if security_enabled else 'http'
         srv_end_point = f'{protocol}://{mgr_addr}:{port}/sd/prometheus/sd-config?'
 
         node_exporter_cnt = len(self.mgr.cache.get_daemons_by_service('node-exporter'))
@@ -464,6 +468,7 @@ class PrometheusService(CephadmService):
         mgr_prometheus_sd_url = f'{srv_end_point}service=mgr-prometheus'  # always included
         ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter'  # always included
         nvmeof_sd_url = f'{srv_end_point}service=nvmeof'  # always included
+        mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0
 
         alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
         prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
@@ -471,9 +476,10 @@ class PrometheusService(CephadmService):
 
         # generate the prometheus configuration
         context = {
+            'alertmanager_url_prefix': '/alertmanager' if mgmt_gw_enabled else '/',
             'alertmanager_web_user': alertmanager_user,
             'alertmanager_web_password': alertmanager_password,
-            'secure_monitoring_stack': self.mgr.secure_monitoring_stack,
+            'security_enabled': security_enabled,
             'service_discovery_username': self.mgr.http_server.service_discovery.username,
             'service_discovery_password': self.mgr.http_server.service_discovery.password,
             'mgr_prometheus_sd_url': mgr_prometheus_sd_url,
@@ -494,12 +500,13 @@ class PrometheusService(CephadmService):
                 daemon_spec.port_ips = {str(port): ip_to_bind_to}
 
         web_context = {
+            'enable_mtls': mgmt_gw_enabled,
+            'enable_basic_auth': True,  # TODO(redo): disable when ouath2-proxy is enabled
             'prometheus_web_user': prometheus_user,
             'prometheus_web_password': password_hash(prometheus_password),
         }
 
-        mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0
-        if self.mgr.secure_monitoring_stack:
+        if security_enabled:
             cert, key = self.get_mgr_prometheus_certificates(daemon_spec)
             r: Dict[str, Any] = {
                 'files': {
@@ -559,14 +566,15 @@ class PrometheusService(CephadmService):
         # add an explicit dependency on the active manager. This will force to
         # re-deploy prometheus if the mgr has changed (due to a fail-over i.e).
         deps.append(self.mgr.get_active_mgr().name())
-        if self.mgr.secure_monitoring_stack:
+        deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
+        security_enabled, mgmt_gw_enabled = self.mgr._get_security_config()
+        if security_enabled:
             alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
             prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
             if prometheus_user and prometheus_password:
                 deps.append(f'{hash(prometheus_user + prometheus_password)}')
             if alertmanager_user and alertmanager_password:
                 deps.append(f'{hash(alertmanager_user + alertmanager_password)}')
-        deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
 
         # add a dependency since url_prefix depends on the existence of mgmt-gateway
         deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')]
@@ -588,21 +596,16 @@ class PrometheusService(CephadmService):
     def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
         dd = self.get_active_daemon(daemon_descrs)
         assert dd.hostname is not None
-        addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
+        addr = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname)
         port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
-        protocol = 'https' if self.mgr.secure_monitoring_stack else 'http'
-
-        mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway')
-        if mgmt_gw_daemons:
-            dd = mgmt_gw_daemons[0]
-            assert dd.hostname is not None
-            mgmt_gw_addr = self._inventory_get_fqdn(dd.hostname)
-            mgmt_gw_internal_endpoint = build_url(scheme='https', host=mgmt_gw_addr, port=MgmtGatewayService.INTERNAL_SERVICE_PORT)
+        security_enabled, mgmt_gw_enabled = self.mgr._get_security_config()
+        protocol = 'https' if security_enabled else 'http'
+        if mgmt_gw_enabled:
             self._set_value_on_dashboard(
                 'Prometheus',
                 'dashboard get-prometheus-api-host',
                 'dashboard set-prometheus-api-host',
-                f'{mgmt_gw_internal_endpoint}/internal/prometheus'
+                f'{get_mgmt_gw_internal_endpoint(self.mgr)}/prometheus'
             )
             self._set_value_on_dashboard(
                 'Prometheus',
@@ -640,20 +643,23 @@ class NodeExporterService(CephadmService):
 
     def get_node_exporter_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]:
         node_ip = self.mgr.inventory.get_addr(daemon_spec.host)
-        host_fqdn = self._inventory_get_fqdn(daemon_spec.host)
+        host_fqdn = self.mgr.get_fqdn(daemon_spec.host)
         cert, key = self.mgr.cert_mgr.generate_cert(host_fqdn, node_ip)
         return cert, key
 
     def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
         assert self.TYPE == daemon_spec.daemon_type
-        deps = [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}']
-        if self.mgr.secure_monitoring_stack:
+        deps = []
+        deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')]
+        deps += [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}']
+        security_enabled, mgmt_gw_enabled = self.mgr._get_security_config()
+        if security_enabled:
             cert, key = self.get_node_exporter_certificates(daemon_spec)
-            mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0
             r = {
                 'files': {
-                    'web.yml': self.mgr.template.render('services/node-exporter/web.yml.j2', {}),
-                    'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert(),
+                    'web.yml': self.mgr.template.render('services/node-exporter/web.yml.j2',
+                                                        {'enable_mtls': mgmt_gw_enabled}),
+                    'root_cert.pem': self.mgr.cert_mgr.get_root_ca(),
                     'node_exporter.crt': cert,
                     'node_exporter.key': key,
                 },
@@ -713,7 +719,7 @@ class PromtailService(CephadmService):
         for i, dd in enumerate(daemons):
             assert dd.hostname is not None
             if i == 0:
-                loki_host = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
+                loki_host = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname)
 
             deps.append(dd.name())
 
index 8724b4d37552bd9074fdb4c885e96482d7a9c599..2a8d6fe4e3d79d129594555100d8cb2084b90fb9 100644 (file)
@@ -1,5 +1,5 @@
 
-from typing import Any, Tuple, IO, List
+from typing import Any, Tuple, IO, List, Union
 import ipaddress
 
 from datetime import datetime, timedelta
@@ -9,11 +9,6 @@ from cryptography.hazmat.primitives.asymmetric import rsa
 from cryptography.hazmat.primitives import hashes, serialization
 from cryptography.hazmat.backends import default_backend
 
-from orchestrator import OrchestratorError
-
-
-logger = logging.getLogger(__name__)
-
 
 class SSLConfigException(Exception):
     pass
@@ -64,19 +59,23 @@ class SSLCerts:
 
         return (cert_str, key_str)
 
-    def generate_cert(self, hosts: Any, addr: str) -> Tuple[str, str]:
-        have_ip = True
+    def generate_cert(self, _hosts: Union[str, List[str]], _addrs: Union[str, List[str]]) -> Tuple[str, str]:
+
+        addrs = [_addrs] if isinstance(_addrs, str) else _addrs
+        hosts = [_hosts] if isinstance(_hosts, str) else _hosts
+
+        valid_ips = True
         try:
-            ip = x509.IPAddress(ipaddress.ip_address(addr))
+            ips = [x509.IPAddress(ipaddress.ip_address(addr)) for addr in addrs]
         except Exception:
-            have_ip = False
+            valid_ips = False
 
         private_key = rsa.generate_private_key(
             public_exponent=65537, key_size=4096, backend=default_backend())
         public_key = private_key.public_key()
 
         builder = x509.CertificateBuilder()
-        builder = builder.subject_name(x509.Name([x509.NameAttribute(NameOID.COMMON_NAME, addr), ]))
+        builder = builder.subject_name(x509.Name([x509.NameAttribute(NameOID.COMMON_NAME, addrs[0]), ]))
         builder = builder.issuer_name(
             x509.Name([x509.NameAttribute(NameOID.COMMON_NAME, u'cephadm-root'), ]))
         builder = builder.not_valid_before(datetime.now())
@@ -84,11 +83,9 @@ class SSLCerts:
         builder = builder.serial_number(x509.random_serial_number())
         builder = builder.public_key(public_key)
 
-        if isinstance(hosts, str):
-            hosts = [hosts]
         san_list: List[x509.GeneralName] = [x509.DNSName(host) for host in hosts]
-        if have_ip:
-            san_list.append(ip)
+        if valid_ips:
+            san_list.extend(ips)
 
         builder = builder.add_extension(
             x509.SubjectAlternativeName(
@@ -129,7 +126,7 @@ class SSLCerts:
         given_cert = x509.load_pem_x509_certificate(cert.encode('utf-8'), backend=default_backend())
         tz = given_cert.not_valid_after.tzinfo
         if datetime.now(tz) >= given_cert.not_valid_after:
-            raise OrchestratorError('Given cert is expired')
+            raise SSLConfigException('Given cert is expired')
         self.root_cert = given_cert
         self.root_key = serialization.load_pem_private_key(
             data=priv_key.encode('utf-8'), backend=default_backend(), password=None)
index b34a1fc17e2811bb626670c0508c0b6ffdc92012..de993cb6ce369be9f6043098069af08b5cff6be0 100644 (file)
@@ -6,7 +6,7 @@ global:
 {% if not secure %}
   http_config:
     tls_config:
-{% if secure_monitoring_stack %}
+{% if security_enabled %}
       ca_file: root_cert.pem
 {% else %}
       insecure_skip_verify: true
index ef4f0b4c750c13f1232f25a4b7c623f3beb45b7e..47bcc5a0f65875e3c91dd3d9200bb537f0360520 100644 (file)
@@ -1,5 +1,11 @@
 tls_server_config:
   cert_file: alertmanager.crt
   key_file: alertmanager.key
+{% if enable_mtls %}
+  client_auth_type: RequireAndVerifyClientCert
+  client_ca_file: root_cert.pem
+{% endif %}
+{% if enable_basic_auth %}
 basic_auth_users:
     {{ alertmanager_web_user }}: {{ alertmanager_web_password }}
+{% endif %}
index 46aea864f536d69e41709f05b9c877cf275faef7..4b2c05c38afc9636d8e319a202646656931cb9a6 100644 (file)
@@ -27,6 +27,8 @@ datasources:
     secureJsonData:
       basicAuthPassword: {{ prometheus_password }}
       tlsCACert: "{{ cephadm_root_ca }}"
+      tlsClientCert: "{{ cert }}"
+      tlsClientKey: "{{ key }}"
 {% endif %}
 {% endfor %}
 
index 2220e8e47599702afff95c3530845a23c7099b44..29da8954ccc368e7d063f62e63223bb7da355266 100644 (file)
@@ -58,18 +58,33 @@ server {
     location /grafana {
         rewrite ^/grafana/(.*) /$1 break;
         proxy_pass {{ grafana_scheme }}://grafana_servers;
+        # clear any Authorization header as Prometheus and Alertmanager are using basic-auth browser
+        # will send this header if Grafana is running on the same node as one of those services
+        proxy_set_header Authorization "";
     }
 {% endif %}
 
 {% if prometheus_endpoints %}
     location /prometheus {
         proxy_pass {{ prometheus_scheme }}://prometheus_servers;
+
+        proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt;
+        proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
+        proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
+        proxy_ssl_verify on;
+        proxy_ssl_verify_depth 2;
     }
 {% endif %}
 
 {% if alertmanager_endpoints %}
     location /alertmanager {
         proxy_pass {{ alertmanager_scheme }}://alertmanager_servers;
+
+        proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt;
+        proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
+        proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
+        proxy_ssl_verify on;
+        proxy_ssl_verify_depth 2;
     }
 {% endif %}
 }
index 6848c04ebe88c126bc8be304d42e8a831a8eee18..f48582c2ce15a8d327e41ad201637b75696d7291 100644 (file)
@@ -8,6 +8,14 @@ server {
     ssl_ciphers         AES128-SHA:AES256-SHA:RC4-SHA:DES-CBC3-SHA:RC4-MD5;
     ssl_prefer_server_ciphers on;
 
+{% if dashboard_endpoints %}
+    location /internal/dashboard {
+        rewrite ^/internal/dashboard/(.*) /$1 break;
+        proxy_pass {{ dashboard_scheme }}://dashboard_servers;
+        proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
+    }
+{% endif %}
+
 {% if grafana_endpoints %}
     location /internal/grafana {
         rewrite ^/internal/grafana/(.*) /$1 break;
@@ -19,6 +27,12 @@ server {
     location /internal/prometheus {
         rewrite ^/internal/prometheus/(.*) /prometheus/$1 break;
         proxy_pass {{ prometheus_scheme }}://prometheus_servers;
+
+        proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt;
+        proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
+        proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
+        proxy_ssl_verify on;
+        proxy_ssl_verify_depth 2;
     }
 {% endif %}
 
@@ -26,6 +40,12 @@ server {
     location /internal/alertmanager {
         rewrite ^/internal/alertmanager/(.*) /alertmanager/$1 break;
         proxy_pass {{ alertmanager_scheme }}://alertmanager_servers;
+
+        proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt;
+        proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
+        proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
+        proxy_ssl_verify on;
+        proxy_ssl_verify_depth 2;
     }
 {% endif %}
 }
index 1c1220345181472a51ca38efc1e2b51a0dc9073d..594ad5751309f5c3b977c25de4ee3f7a097e1005 100644 (file)
@@ -1,3 +1,7 @@
 tls_server_config:
   cert_file: node_exporter.crt
   key_file: node_exporter.key
+{% if enable_mtls %}
+  client_auth_type: RequireAndVerifyClientCert
+  client_ca_file: root_cert.pem
+{% endif %}
index faccc8f6de26cf2b1d0427328c02d7c06ae588ec..ac1ceb54f21aec6da86eb62984a8a0894ac95cce 100644 (file)
@@ -2,7 +2,7 @@
 global:
   scrape_interval: 10s
   evaluation_interval: 10s
-{% if not secure_monitoring_stack %}
+{% if not security_enabled %}
   external_labels:
     cluster: {{ cluster_fsid }}
 {% endif %}
@@ -13,13 +13,16 @@ rule_files:
 {% if alertmanager_sd_url %}
 alerting:
   alertmanagers:
-{% if secure_monitoring_stack %}
+{% if security_enabled %}
     - scheme: https
       basic_auth:
         username: {{ alertmanager_web_user }}
         password: {{ alertmanager_web_password }}
       tls_config:
         ca_file: root_cert.pem
+        cert_file: prometheus.crt
+        key_file:  prometheus.key
+      path_prefix: '{{ alertmanager_url_prefix }}'
       http_sd_configs:
         - url: {{ alertmanager_sd_url }}
           basic_auth:
@@ -36,10 +39,10 @@ alerting:
 
 scrape_configs:
   - job_name: 'ceph'
-{% if secure_monitoring_stack %}
+{% if security_enabled %}
     scheme: https
     tls_config:
-      ca_file: mgr_prometheus_cert.pem
+      ca_file: root_cert.pem
     honor_labels: true
     relabel_configs:
     - source_labels: [instance]
@@ -67,10 +70,12 @@ scrape_configs:
 
 {% if node_exporter_sd_url %}
   - job_name: 'node'
-{% if secure_monitoring_stack %}
+{% if security_enabled %}
     scheme: https
     tls_config:
       ca_file: root_cert.pem
+      cert_file: prometheus.crt
+      key_file:  prometheus.key
     http_sd_configs:
     - url: {{ node_exporter_sd_url }}
       basic_auth:
@@ -90,7 +95,7 @@ scrape_configs:
 
 {% if haproxy_sd_url %}
   - job_name: 'haproxy'
-{% if secure_monitoring_stack %}
+{% if security_enabled %}
     scheme: https
     tls_config:
       ca_file: root_cert.pem
@@ -113,7 +118,7 @@ scrape_configs:
 
 {% if ceph_exporter_sd_url %}
   - job_name: 'ceph-exporter'
-{% if secure_monitoring_stack %}
+{% if security_enabled %}
     honor_labels: true
     scheme: https
     tls_config:
@@ -138,7 +143,7 @@ scrape_configs:
 
 {% if nvmeof_sd_url %}
   - job_name: 'nvmeof'
-{% if secure_monitoring_stack %}
+{% if security_enabled %}
     honor_labels: true
     scheme: https
     tls_config:
@@ -156,7 +161,7 @@ scrape_configs:
 {% endif %}
 {% endif %}
 
-{% if not secure_monitoring_stack %}
+{% if not security_enabled %}
   - job_name: 'federate'
     scrape_interval: 15s
     honor_labels: true
index da3c3d724e8580d4af76938a1292f8dfcad04983..c58c580e60e6ba445344371c58240ac1df330501 100644 (file)
@@ -1,5 +1,11 @@
 tls_server_config:
   cert_file: prometheus.crt
   key_file: prometheus.key
+{% if enable_mtls %}
+  client_auth_type: RequireAndVerifyClientCert
+  client_ca_file: root_cert.pem
+{% endif %}
+{% if enable_basic_auth %}
 basic_auth_users:
     {{ prometheus_web_user }}: {{ prometheus_web_password }}
+{% endif %}
index 5b50d4baf294993dfbfcb24598f72bd6f2b4eda8..c49c637e6ed4307d4270112007041ee8fe8276a2 100644 (file)
@@ -95,7 +95,7 @@ def with_cephadm_module(module_options=None, store=None):
             mock.patch('cephadm.module.CephadmOrchestrator.get_module_option_ex', get_module_option_ex), \
             mock.patch("cephadm.module.CephadmOrchestrator.get_osdmap"), \
             mock.patch("cephadm.module.CephadmOrchestrator.remote"), \
-            mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1'),\
+            mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1'), \
             mock.patch("cephadm.agent.CephadmAgentHelpers._request_agent_acks"), \
             mock.patch("cephadm.agent.CephadmAgentHelpers._apply_agent", return_value=False), \
             mock.patch("cephadm.agent.CephadmAgentHelpers._agent_down", return_value=False), \
index 50009cbce1f2c7918689c17ac621bcd719a99d9a..9774e107bcecf1b59cc1a28352cf81aa6b69bad4 100644 (file)
@@ -346,8 +346,8 @@ class TestCephadm(object):
     ))
     def test_list_daemons(self, cephadm_module: CephadmOrchestrator):
         cephadm_module.service_cache_timeout = 10
-        with with_host(cephadm_module, 'test'):
-            CephadmServe(cephadm_module)._refresh_host_daemons('test')
+        with with_host(cephadm_module, 'myhost'):
+            CephadmServe(cephadm_module)._refresh_host_daemons('myhost')
             dds = wait(cephadm_module, cephadm_module.list_daemons())
             assert {d.name() for d in dds} == {'rgw.myrgw.foobar', 'haproxy.test.bar'}
 
@@ -1705,8 +1705,6 @@ class TestCephadm(object):
         nvmeof_client_cert = 'fake-nvmeof-client-cert'
         nvmeof_server_cert = 'fake-nvmeof-server-cert'
         nvmeof_root_ca_cert = 'fake-nvmeof-root-ca-cert'
-        cephadm_module.cert_key_store.save_cert('agent_endpoint_root_cert', agent_endpoint_root_cert)
-        cephadm_module.cert_key_store.save_cert('alertmanager_cert', alertmanager_host1_cert, host='host1')
         cephadm_module.cert_key_store.save_cert('rgw_frontend_ssl_cert', rgw_frontend_rgw_foo_host2_cert, service_name='rgw.foo', user_made=True)
         cephadm_module.cert_key_store.save_cert('nvmeof_server_cert', nvmeof_server_cert, service_name='nvmeof.foo', user_made=True)
         cephadm_module.cert_key_store.save_cert('nvmeof_client_cert', nvmeof_client_cert, service_name='nvmeof.foo', user_made=True)
@@ -1728,12 +1726,9 @@ class TestCephadm(object):
             'rgw_frontend_ssl_cert': False,
             'iscsi_ssl_cert': False,
             'ingress_ssl_cert': False,
-            'mgmt_gw_root_cert': False,
+            'mgmt_gw_cert': False,
             'cephadm_root_ca_cert': False,
             'grafana_cert': False,
-            'alertmanager_cert': False,
-            'prometheus_cert': False,
-            'node_exporter_cert': False,
             'nvmeof_client_cert': False,
             'nvmeof_server_cert': False,
             'nvmeof_root_ca_cert': False,
@@ -1783,7 +1778,7 @@ class TestCephadm(object):
 
         expected_ls = {
             'grafana_key': False,
-            'mgmt_gw_root_key': False,
+            'mgmt_gw_key': False,
             'cephadm_root_ca_key': False,
             'iscsi_ssl_key': False,
             'ingress_ssl_key': False,
index 4a2aae9c6ad037151ba4b447a5236b9c840d31a2..93768ff1f8fda828770e339ad698eeb0c62ba4b7 100644 (file)
@@ -46,6 +46,8 @@ from orchestrator._interface import DaemonDescription
 
 from typing import Dict, List
 
+cephadm_root_ca = """-----BEGIN CERTIFICATE-----\\nMIIE7DCCAtSgAwIBAgIUE8b2zZ64geu2ns3Zfn3/4L+Cf6MwDQYJKoZIhvcNAQEL\\nBQAwFzEVMBMGA1UEAwwMY2VwaGFkbS1yb290MB4XDTI0MDYyNjE0NDA1M1oXDTM0\\nMDYyNzE0NDA1M1owFzEVMBMGA1UEAwwMY2VwaGFkbS1yb290MIICIjANBgkqhkiG\\n9w0BAQEFAAOCAg8AMIICCgKCAgEAsZRJsdtTr9GLG1lWFql5SGc46ldFanNJd1Gl\\nqXq5vgZVKRDTmNgAb/XFuNEEmbDAXYIRZolZeYKMHfn0pouPRSel0OsC6/02ZUOW\\nIuN89Wgo3IYleCFpkVIumD8URP3hwdu85plRxYZTtlruBaTRH38lssyCqxaOdEt7\\nAUhvYhcMPJThB17eOSQ73mb8JEC83vB47fosI7IhZuvXvRSuZwUW30rJanWNhyZq\\neS2B8qw2RSO0+77H6gA4ftBnitfsE1Y8/F9Z/f92JOZuSMQXUB07msznPbRJia3f\\nueO8gOc32vxd1A1/Qzp14uX34yEGY9ko2lW226cZO29IVUtXOX+LueQttwtdlpz8\\ne6Npm09pXhXAHxV/OW3M28MdXmobIqT/m9MfkeAErt5guUeC5y8doz6/3VQRjFEn\\nRpN0WkblgnNAQ3DONPc+Qd9Fi/wZV2X7bXoYpNdoWDsEOiE/eLmhG1A2GqU/mneP\\nzQ6u79nbdwTYpwqHpa+PvusXeLfKauzI8lLUJotdXy9EK8iHUofibB61OljYye6B\\nG3b8C4QfGsw8cDb4APZd/6AZYyMx/V3cGZ+GcOV7WvsC8k7yx5Uqasm/kiGQ3EZo\\nuNenNEYoGYrjb8D/8QzqNUTwlEh27/ps80tO7l2GGTvWVZL0PRZbmLDvO77amtOf\\nOiRXMoUCAwEAAaMwMC4wGwYDVR0RBBQwEocQAAAAAAAAAAAAAAAAAAAAATAPBgNV\\nHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQAxwzX5AhYEWhTV4VUwUj5+\\nqPdl4Q2tIxRokqyE+cDxoSd+6JfGUefUbNyBxDt0HaBq8obDqqrbcytxnn7mpnDu\\nhtiauY+I4Amt7hqFOiFA4cCLi2mfok6g2vL53tvhd9IrsfflAU2wy7hL76Ejm5El\\nA+nXlkJwps01Whl9pBkUvIbOn3pXX50LT4hb5zN0PSu957rjd2xb4HdfuySm6nW4\\n4GxtVWfmGA6zbC4XMEwvkuhZ7kD2qjkAguGDF01uMglkrkCJT3OROlNBuSTSBGqt\\ntntp5VytHvb7KTF7GttM3ha8/EU2KYaHM6WImQQTrOfiImAktOk4B3lzUZX3HYIx\\n+sByO4P4dCvAoGz1nlWYB2AvCOGbKf0Tgrh4t4jkiF8FHTXGdfvWmjgi1pddCNAy\\nn65WOCmVmLZPERAHOk1oBwqyReSvgoCFo8FxbZcNxJdlhM0Z6hzKggm3O3Dl88Xl\\n5euqJjh2STkBW8Xuowkg1TOs5XyWvKoDFAUzyzeLOL8YSG+gXV22gPTUaPSVAqdb\\nwd0Fx2kjConuC5bgTzQHs8XWA930U3XWZraj21Vaa8UxlBLH4fUro8H5lMSYlZNE\\nJHRNW8BkznAClaFSDG3dybLsrzrBFAu/Qb5zVkT1xyq0YkepGB7leXwq6vjWA5Pw\\nmZbKSphWfh0qipoqxqhfkw==\\n-----END CERTIFICATE-----\\n"""
+
 ceph_generated_cert = """-----BEGIN CERTIFICATE-----\nMIICxjCCAa4CEQDIZSujNBlKaLJzmvntjukjMA0GCSqGSIb3DQEBDQUAMCExDTAL\nBgNVBAoMBENlcGgxEDAOBgNVBAMMB2NlcGhhZG0wHhcNMjIwNzEzMTE0NzA3WhcN\nMzIwNzEwMTE0NzA3WjAhMQ0wCwYDVQQKDARDZXBoMRAwDgYDVQQDDAdjZXBoYWRt\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyyMe4DMA+MeYK7BHZMHB\nq7zjliEOcNgxomjU8qbf5USF7Mqrf6+/87XWqj4pCyAW8x0WXEr6A56a+cmBVmt+\nqtWDzl020aoId6lL5EgLLn6/kMDCCJLq++Lg9cEofMSvcZh+lY2f+1p+C+00xent\nrLXvXGOilAZWaQfojT2BpRnNWWIFbpFwlcKrlg2G0cFjV5c1m6a0wpsQ9JHOieq0\nSvwCixajwq3CwAYuuiU1wjI4oJO4Io1+g8yB3nH2Mo/25SApCxMXuXh4kHLQr/T4\n4hqisvG4uJYgKMcSIrWj5o25mclByGi1UI/kZkCUES94i7Z/3ihx4Bad0AMs/9tw\nFwIDAQABMA0GCSqGSIb3DQEBDQUAA4IBAQAf+pwz7Gd7mDwU2LY0TQXsK6/8KGzh\nHuX+ErOb8h5cOAbvCnHjyJFWf6gCITG98k9nxU9NToG0WYuNm/max1y/54f0dtxZ\npUo6KSNl3w6iYCfGOeUIj8isi06xMmeTgMNzv8DYhDt+P2igN6LenqWTVztogkiV\nxQ5ZJFFLEw4sN0CXnrZX3t5ruakxLXLTLKeE0I91YJvjClSBGkVJq26wOKQNHMhx\npWxeydQ5EgPZY+Aviz5Dnxe8aB7oSSovpXByzxURSabOuCK21awW5WJCGNpmqhWK\nZzACBDEstccj57c4OGV0eayHJRsluVr2e9NHRINZA3qdB37e6gsI1xHo\n-----END CERTIFICATE-----\n"""
 
 ceph_generated_key = """-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDLIx7gMwD4x5gr\nsEdkwcGrvOOWIQ5w2DGiaNTypt/lRIXsyqt/r7/ztdaqPikLIBbzHRZcSvoDnpr5\nyYFWa36q1YPOXTbRqgh3qUvkSAsufr+QwMIIkur74uD1wSh8xK9xmH6VjZ/7Wn4L\n7TTF6e2ste9cY6KUBlZpB+iNPYGlGc1ZYgVukXCVwquWDYbRwWNXlzWbprTCmxD0\nkc6J6rRK/AKLFqPCrcLABi66JTXCMjigk7gijX6DzIHecfYyj/blICkLExe5eHiQ\nctCv9PjiGqKy8bi4liAoxxIitaPmjbmZyUHIaLVQj+RmQJQRL3iLtn/eKHHgFp3Q\nAyz/23AXAgMBAAECggEAVoTB3Mm8azlPlaQB9GcV3tiXslSn+uYJ1duCf0sV52dV\nBzKW8s5fGiTjpiTNhGCJhchowqxoaew+o47wmGc2TvqbpeRLuecKrjScD0GkCYyQ\neM2wlshEbz4FhIZdgS6gbuh9WaM1dW/oaZoBNR5aTYo7xYTmNNeyLA/jO2zr7+4W\n5yES1lMSBXpKk7bDGKYY4bsX2b5RLr2Grh2u2bp7hoLABCEvuu8tSQdWXLEXWpXo\njwmV3hc6tabypIa0mj2Dmn2Dmt1ppSO0AZWG/WAizN3f4Z0r/u9HnbVrVmh0IEDw\n3uf2LP5o3msG9qKCbzv3lMgt9mMr70HOKnJ8ohMSKQKBgQDLkNb+0nr152HU9AeJ\nvdz8BeMxcwxCG77iwZphZ1HprmYKvvXgedqWtS6FRU+nV6UuQoPUbQxJBQzrN1Qv\nwKSlOAPCrTJgNgF/RbfxZTrIgCPuK2KM8I89VZv92TSGi362oQA4MazXC8RAWjoJ\nSu1/PHzK3aXOfVNSLrOWvIYeZQKBgQD/dgT6RUXKg0UhmXj7ExevV+c7oOJTDlMl\nvLngrmbjRgPO9VxLnZQGdyaBJeRngU/UXfNgajT/MU8B5fSKInnTMawv/tW7634B\nw3v6n5kNIMIjJmENRsXBVMllDTkT9S7ApV+VoGnXRccbTiDapBThSGd0wri/CuwK\nNWK1YFOeywKBgEDyI/XG114PBUJ43NLQVWm+wx5qszWAPqV/2S5MVXD1qC6zgCSv\nG9NLWN1CIMimCNg6dm7Wn73IM7fzvhNCJgVkWqbItTLG6DFf3/DPODLx1wTMqLOI\nqFqMLqmNm9l1Nec0dKp5BsjRQzq4zp1aX21hsfrTPmwjxeqJZdioqy2VAoGAXR5X\nCCdSHlSlUW8RE2xNOOQw7KJjfWT+WAYoN0c7R+MQplL31rRU7dpm1bLLRBN11vJ8\nMYvlT5RYuVdqQSP6BkrX+hLJNBvOLbRlL+EXOBrVyVxHCkDe+u7+DnC4epbn+N8P\nLYpwqkDMKB7diPVAizIKTBxinXjMu5fkKDs5n+sCgYBbZheYKk5M0sIxiDfZuXGB\nkf4mJdEkTI1KUGRdCwO/O7hXbroGoUVJTwqBLi1tKqLLarwCITje2T200BYOzj82\nqwRkCXGtXPKnxYEEUOiFx9OeDrzsZV00cxsEnX0Zdj+PucQ/J3Cvd0dWUspJfLHJ\n39gnaegswnz9KMQAvzKFdg==\n-----END PRIVATE KEY-----\n"""
@@ -603,18 +605,14 @@ class TestMonitoring:
     @patch("socket.getfqdn")
     @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
     @patch("cephadm.services.monitoring.password_hash", lambda password: 'alertmanager_password_hash')
+    @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: 'cephadm_root_cert')
+    @patch('cephadm.cert_mgr.CertMgr.generate_cert', lambda instance, fqdn, ip: ('mycert', 'mykey'))
     def test_alertmanager_config_security_enabled(self, _get_fqdn, _run_cephadm, cephadm_module: CephadmOrchestrator):
         _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
 
         fqdn = 'host1.test'
         _get_fqdn.return_value = fqdn
 
-        def gen_cert(host, addr):
-            return ('mycert', 'mykey')
-
-        def get_root_cert():
-            return 'my_root_cert'
-
         with with_host(cephadm_module, 'test'):
             cephadm_module.secure_monitoring_stack = True
             cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user')
@@ -653,7 +651,8 @@ class TestMonitoring:
                   cert_file: alertmanager.crt
                   key_file: alertmanager.key
                 basic_auth_users:
-                    alertmanager_user: alertmanager_password_hash""").lstrip()
+                    alertmanager_user: alertmanager_password_hash
+                """).lstrip()
 
                 _run_cephadm.assert_called_with(
                     'test',
@@ -684,7 +683,7 @@ class TestMonitoring:
                                 'alertmanager.crt': 'mycert',
                                 'alertmanager.key': 'mykey',
                                 'web.yml': web_config,
-                                'root_cert.pem': 'my_root_cert'
+                                'root_cert.pem': 'cephadm_root_cert'
                             },
                             'peers': [],
                             'web_config': '/etc/alertmanager/web.yml',
@@ -836,6 +835,8 @@ class TestMonitoring:
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
     @patch("cephadm.services.monitoring.password_hash", lambda password: 'prometheus_password_hash')
+    @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: 'cephadm_root_cert')
+    @patch('cephadm.cert_mgr.CertMgr.generate_cert', lambda instance, fqdn, ip: ('mycert', 'mykey'))
     def test_prometheus_config_security_enabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
         _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
         s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast')
@@ -875,7 +876,8 @@ class TestMonitoring:
                   cert_file: prometheus.crt
                   key_file: prometheus.key
                 basic_auth_users:
-                    prometheus_user: prometheus_password_hash""").lstrip()
+                    prometheus_user: prometheus_password_hash
+                """).lstrip()
 
                 y = dedent("""
                 # This file is generated by cephadm.
@@ -894,6 +896,9 @@ class TestMonitoring:
                         password: alertmanager_plain_password
                       tls_config:
                         ca_file: root_cert.pem
+                        cert_file: prometheus.crt
+                        key_file:  prometheus.key
+                      path_prefix: '/'
                       http_sd_configs:
                         - url: https://[::1]:8765/sd/prometheus/sd-config?service=alertmanager
                           basic_auth:
@@ -906,7 +911,7 @@ class TestMonitoring:
                   - job_name: 'ceph'
                     scheme: https
                     tls_config:
-                      ca_file: mgr_prometheus_cert.pem
+                      ca_file: root_cert.pem
                     honor_labels: true
                     relabel_configs:
                     - source_labels: [instance]
@@ -924,6 +929,8 @@ class TestMonitoring:
                     scheme: https
                     tls_config:
                       ca_file: root_cert.pem
+                      cert_file: prometheus.crt
+                      key_file:  prometheus.key
                     http_sd_configs:
                     - url: https://[::1]:8765/sd/prometheus/sd-config?service=node-exporter
                       basic_auth:
@@ -998,8 +1005,7 @@ class TestMonitoring:
                         "config_blobs": {
                             'files': {
                                 'prometheus.yml': y,
-                                'root_cert.pem': '',
-                                'mgr_prometheus_cert.pem': '',
+                                'root_cert.pem': 'cephadm_root_cert',
                                 'web.yml': web_config,
                                 'prometheus.crt': 'mycert',
                                 'prometheus.key': 'mykey',
@@ -3194,8 +3200,12 @@ class TestSMB:
 class TestMgmtGateway:
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     @patch("cephadm.services.mgmt_gateway.MgmtGatewayService.get_service_endpoints")
+    @patch("cephadm.services.mgmt_gateway.MgmtGatewayService.get_external_certificates",
+           lambda instance, svc_spec, dspec: (ceph_generated_cert, ceph_generated_key))
+    @patch("cephadm.services.mgmt_gateway.MgmtGatewayService.get_internal_certificates",
+           lambda instance, dspec: (ceph_generated_cert, ceph_generated_key))
     @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
-    @patch('cephadm.ssl_cert_utils.SSLCerts.generate_cert', lambda instance, fqdn, ip: (ceph_generated_cert, ceph_generated_key))
+    @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: cephadm_root_ca)
     @patch("cephadm.services.mgmt_gateway.get_dashboard_endpoints", lambda _: (["ceph-node-2:8443", "ceph-node-2:8443"], "https"))
     def test_mgmt_gateway_config(self, get_service_endpoints_mock: List[str], _run_cephadm, cephadm_module: CephadmOrchestrator):
 
@@ -3311,14 +3321,29 @@ class TestMgmtGateway:
                                                  location /grafana {
                                                      rewrite ^/grafana/(.*) /$1 break;
                                                      proxy_pass https://grafana_servers;
+                                                     # clear any Authorization header as Prometheus and Alertmanager are using basic-auth browser
+                                                     # will send this header if Grafana is running on the same node as one of those services
+                                                     proxy_set_header Authorization "";
                                                  }
 
                                                  location /prometheus {
-                                                     proxy_pass http://prometheus_servers;
+                                                     proxy_pass https://prometheus_servers;
+
+                                                     proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt;
+                                                     proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
+                                                     proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
+                                                     proxy_ssl_verify on;
+                                                     proxy_ssl_verify_depth 2;
                                                  }
 
                                                  location /alertmanager {
-                                                     proxy_pass http://alertmanager_servers;
+                                                     proxy_pass https://alertmanager_servers;
+
+                                                     proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt;
+                                                     proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
+                                                     proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
+                                                     proxy_ssl_verify on;
+                                                     proxy_ssl_verify_depth 2;
                                                  }
                                              }"""),
                     "nginx_internal_server.conf": dedent("""
@@ -3331,6 +3356,12 @@ class TestMgmtGateway:
                                                  ssl_ciphers         AES128-SHA:AES256-SHA:RC4-SHA:DES-CBC3-SHA:RC4-MD5;
                                                  ssl_prefer_server_ciphers on;
 
+                                                 location /internal/dashboard {
+                                                     rewrite ^/internal/dashboard/(.*) /$1 break;
+                                                     proxy_pass https://dashboard_servers;
+                                                     proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
+                                                 }
+
                                                  location /internal/grafana {
                                                      rewrite ^/internal/grafana/(.*) /$1 break;
                                                      proxy_pass https://grafana_servers;
@@ -3338,16 +3369,29 @@ class TestMgmtGateway:
 
                                                  location /internal/prometheus {
                                                      rewrite ^/internal/prometheus/(.*) /prometheus/$1 break;
-                                                     proxy_pass http://prometheus_servers;
+                                                     proxy_pass https://prometheus_servers;
+
+                                                     proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt;
+                                                     proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
+                                                     proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
+                                                     proxy_ssl_verify on;
+                                                     proxy_ssl_verify_depth 2;
                                                  }
 
                                                  location /internal/alertmanager {
                                                      rewrite ^/internal/alertmanager/(.*) /alertmanager/$1 break;
-                                                     proxy_pass http://alertmanager_servers;
+                                                     proxy_pass https://alertmanager_servers;
+
+                                                     proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt;
+                                                     proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key;
+                                                     proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt;
+                                                     proxy_ssl_verify on;
+                                                     proxy_ssl_verify_depth 2;
                                                  }
                                              }"""),
                     "nginx_internal.crt": f"{ceph_generated_cert}",
                     "nginx_internal.key": f"{ceph_generated_key}",
+                    "ca.crt": f"{cephadm_root_ca}",
                     "nginx.crt": f"{ceph_generated_cert}",
                     "nginx.key": f"{ceph_generated_key}",
                 }
index d0ad51c8f7d708b921838dffec101225d9627d0f..c00d8c70e638cfd772daa2a92f6085ef0501695e 100644 (file)
@@ -30,6 +30,14 @@ class PrometheusReceiver(BaseController):
 
 
 class PrometheusRESTController(RESTController):
+
+    def close_unlink_files(self, files):
+        # type (List[str])
+        valid_entries = [f for f in files if f is not None]
+        for f in valid_entries:
+            f.close()
+            os.unlink(f.name)
+
     def prometheus_proxy(self, method, path, params=None, payload=None):
         # type (str, str, dict, dict)
         user, password, ca_cert_file, cert_file, key_file = self.get_access_info('prometheus')
@@ -39,10 +47,7 @@ class PrometheusRESTController(RESTController):
                                method, path, 'Prometheus', params, payload,
                                user=user, password=password, verify=verify,
                                cert=cert)
-        for f in [ca_cert_file, cert_file, key_file]:
-            if f:
-                f.close()
-                os.unlink(f.name)
+        self.close_unlink_files([ca_cert_file, cert_file, key_file])
         return response
 
     def alert_proxy(self, method, path, params=None, payload=None):
@@ -54,14 +59,21 @@ class PrometheusRESTController(RESTController):
                                method, path, 'Alertmanager', params, payload,
                                user=user, password=password, verify=verify,
                                cert=cert, is_alertmanager=True)
-        for f in [ca_cert_file, cert_file, key_file]:
-            if f:
-                f.close()
-                os.unlink(f.name)
+        self.close_unlink_files([ca_cert_file, cert_file, key_file])
         return response
 
     def get_access_info(self, module_name):
-        # type (str, str, str, str, srt)
+        # type (str, str, str, str, str)
+
+        def write_to_tmp_file(content):
+            # type (str)
+            if content is None:
+                return None
+            tmp_file = tempfile.NamedTemporaryFile(delete=False)
+            tmp_file.write(content.encode('utf-8'))
+            tmp_file.flush()  # tmp_file must not be gc'ed
+            return tmp_file
+
         if module_name not in ['prometheus', 'alertmanager']:
             raise DashboardException(f'Invalid module name {module_name}', component='prometheus')
         user = None
@@ -69,37 +81,18 @@ class PrometheusRESTController(RESTController):
         cert_file = None
         pkey_file = None
         ca_cert_file = None
-
         orch_backend = mgr.get_module_option_ex('orchestrator', 'orchestrator')
         if orch_backend == 'cephadm':
-            secure_monitoring_stack = mgr.get_module_option_ex('cephadm',
-                                                               'secure_monitoring_stack',
-                                                               False)
-            if secure_monitoring_stack:
-                cmd = {'prefix': f'orch {module_name} get-credentials'}
-                ret, out, _ = mgr.mon_command(cmd)
-                if ret == 0 and out is not None:
-                    access_info = json.loads(out)
+            cmd = {'prefix': f'orch {module_name} get-credentials'}
+            ret, out, _ = mgr.mon_command(cmd)
+            if ret == 0 and out is not None:
+                access_info = json.loads(out)
+                if access_info:
                     user = access_info['user']
                     password = access_info['password']
-                    certificate = access_info['certificate']
-                    ca_cert_file = tempfile.NamedTemporaryFile(delete=False)
-                    ca_cert_file.write(certificate.encode('utf-8'))
-                    ca_cert_file.flush()
-
-                    cert_file = None
-                    cert = mgr.get_localized_store("crt")  # type: ignore
-                    if cert is not None:
-                        cert_file = tempfile.NamedTemporaryFile(delete=False)
-                        cert_file.write(cert.encode('utf-8'))
-                        cert_file.flush()  # cert_tmp must not be gc'ed
-
-                    pkey_file = None
-                    pkey = mgr.get_localized_store("key")  # type: ignore
-                    if pkey is not None:
-                        pkey_file = tempfile.NamedTemporaryFile(delete=False)
-                        pkey_file.write(pkey.encode('utf-8'))
-                        pkey_file.flush()
+                    ca_cert_file = write_to_tmp_file(access_info['certificate'])
+                    cert_file = write_to_tmp_file(mgr.get_localized_store("crt"))
+                    pkey_file = write_to_tmp_file(mgr.get_localized_store("key"))
 
         return user, password, ca_cert_file, cert_file, pkey_file
 
index 7584fabec0f80549251474bf507f1835a8c4e4a9..cc389545c45e62ad43e62761e6128eabfc84cec0 100644 (file)
@@ -794,7 +794,7 @@ class Orchestrator(object):
         raise NotImplementedError()
 
     def generate_certificates(self, module_name: str) -> OrchResult[Optional[Dict[str, str]]]:
-        """set prometheus access information"""
+        """generate cert/key for the module with the name module_name"""
         raise NotImplementedError()
 
     def set_custom_prometheus_alerts(self, alerts_file: str) -> OrchResult[str]:
index 7a4bca70fa459e5585bdc5b0db52efc8d9f23955..8b1c09218969e1d24c98a82cd3eaec6936a355a5 100644 (file)
@@ -1765,18 +1765,23 @@ class Module(MgrModule, OrchestratorClientMixin):
         self.get_file_sd_config()
 
     def configure(self, server_addr: str, server_port: int) -> None:
-        # cephadm deployments have a TLS monitoring stack setup option.
-        # If the cephadm module is on and the setting is true (defaults to false)
-        # we should have prometheus be set up to interact with that
-        cephadm_secure_monitoring_stack = self.get_module_option_ex(
-            'cephadm', 'secure_monitoring_stack', False)
-        if cephadm_secure_monitoring_stack:
-            try:
-                self.setup_tls_config(server_addr, server_port)
-                return
-            except Exception as e:
-                self.log.exception(f'Failed to setup cephadm based secure monitoring stack: {e}\n',
-                                   'Falling back to default configuration')
+        # TODO(redo): this new check is hacky, we should provide an explit cmd
+        # from cephadm to get/check the security status
+
+        # if cephadm is configured with security then TLS must be used
+        cmd = {'prefix': 'orch prometheus get-credentials'}
+        ret, out, _ = self.mon_command(cmd)
+        if ret == 0 and out is not None:
+            access_info = json.loads(out)
+            if access_info:
+                try:
+                    self.setup_tls_using_cephadm(server_addr, server_port)
+                    return
+                except Exception as e:
+                    self.log.exception(f'Failed to setup cephadm based secure monitoring stack: {e}\n',
+                                       'Falling back to default configuration')
+
+        # In any error fallback to plain http mode
         self.setup_default_config(server_addr, server_port)
 
     def setup_default_config(self, server_addr: str, server_port: int) -> None:
@@ -1792,7 +1797,7 @@ class Module(MgrModule, OrchestratorClientMixin):
         self.set_uri(build_url(scheme='http', host=self.get_server_addr(),
                      port=server_port, path='/'))
 
-    def setup_tls_config(self, server_addr: str, server_port: int) -> None:
+    def setup_tls_using_cephadm(self, server_addr: str, server_port: int) -> None:
         from mgr_util import verify_tls_files
         cmd = {'prefix': 'orch certmgr generate-certificates',
                'module_name': 'prometheus',