From 862a38e33863d5caf6504d900567f1ac259a70de Mon Sep 17 00:00:00 2001 From: Redouane Kachach Date: Wed, 3 Jul 2024 15:43:16 +0200 Subject: [PATCH] mgr/cephadm: adding mTLS support Signed-off-by: Redouane Kachach --- .../cephadmlib/daemons/mgmt_gateway.py | 38 +++-- src/pybind/mgr/cephadm/cert_mgr.py | 43 ++---- src/pybind/mgr/cephadm/http_server.py | 10 +- src/pybind/mgr/cephadm/inventory.py | 5 +- src/pybind/mgr/cephadm/module.py | 52 +++++-- .../mgr/cephadm/services/cephadmservice.py | 31 ++-- .../mgr/cephadm/services/mgmt_gateway.py | 101 +++++++++---- src/pybind/mgr/cephadm/services/monitoring.py | 140 +++++++++--------- src/pybind/mgr/cephadm/ssl_cert_utils.py | 29 ++-- .../services/alertmanager/alertmanager.yml.j2 | 2 +- .../services/alertmanager/web.yml.j2 | 6 + .../services/grafana/ceph-dashboard.yml.j2 | 2 + .../mgmt-gateway/external_server.conf.j2 | 15 ++ .../mgmt-gateway/internal_server.conf.j2 | 20 +++ .../services/node-exporter/web.yml.j2 | 4 + .../services/prometheus/prometheus.yml.j2 | 23 +-- .../templates/services/prometheus/web.yml.j2 | 6 + src/pybind/mgr/cephadm/tests/fixtures.py | 2 +- src/pybind/mgr/cephadm/tests/test_cephadm.py | 13 +- src/pybind/mgr/cephadm/tests/test_services.py | 78 +++++++--- .../mgr/dashboard/controllers/prometheus.py | 65 ++++---- src/pybind/mgr/orchestrator/_interface.py | 2 +- src/pybind/mgr/prometheus/module.py | 31 ++-- 23 files changed, 440 insertions(+), 278 deletions(-) diff --git a/src/cephadm/cephadmlib/daemons/mgmt_gateway.py b/src/cephadm/cephadmlib/daemons/mgmt_gateway.py index 93dfc275c41..b0a6f0579d2 100644 --- a/src/cephadm/cephadmlib/daemons/mgmt_gateway.py +++ b/src/cephadm/cephadmlib/daemons/mgmt_gateway.py @@ -104,9 +104,22 @@ class MgmtGateway(ContainerDaemonForm): raise OSError('data_dir is not a directory: %s' % (data_dir)) logger.info('Writing mgmt-gateway config...') config_dir = os.path.join(data_dir, 'etc/') - makedirs(config_dir, uid, gid, 0o755) - recursive_chown(config_dir, uid, gid) - populate_files(config_dir, self.files, uid, gid) + ssl_dir = os.path.join(data_dir, 'etc/ssl') + for ddir in [config_dir, ssl_dir]: + makedirs(ddir, uid, gid, 0o755) + recursive_chown(ddir, uid, gid) + conf_files = { + fname: content + for fname, content in self.files.items() + if fname.endswith('.conf') + } + cert_files = { + fname: content + for fname, content in self.files.items() + if fname.endswith('.crt') or fname.endswith('.key') + } + populate_files(config_dir, conf_files, uid, gid) + populate_files(ssl_dir, cert_files, uid, gid) def _get_container_mounts(self, data_dir: str) -> Dict[str, str]: mounts: Dict[str, str] = {} @@ -152,23 +165,6 @@ class MgmtGateway(ContainerDaemonForm): os.path.join( data_dir, 'etc/nginx_external_server.conf' ): '/etc/nginx_external_server.conf:Z', - os.path.join( - data_dir, 'etc/nginx_internal.crt' - ): '/etc/nginx/ssl/nginx_internal.crt:Z', - os.path.join( - data_dir, 'etc/nginx_internal.key' - ): '/etc/nginx/ssl/nginx_internal.key:Z', + os.path.join(data_dir, 'etc/ssl'): '/etc/nginx/ssl/', } ) - - if 'nginx.crt' in self.files: - mounts.update( - { - os.path.join( - data_dir, 'etc/nginx.crt' - ): '/etc/nginx/ssl/nginx.crt:Z', - os.path.join( - data_dir, 'etc/nginx.key' - ): '/etc/nginx/ssl/nginx.key:Z', - } - ) diff --git a/src/pybind/mgr/cephadm/cert_mgr.py b/src/pybind/mgr/cephadm/cert_mgr.py index 9b87c4651d8..e1715424a95 100644 --- a/src/pybind/mgr/cephadm/cert_mgr.py +++ b/src/pybind/mgr/cephadm/cert_mgr.py @@ -1,6 +1,5 @@ -from cephadm.ssl_cert_utils import SSLCerts -from threading import Lock +from cephadm.ssl_cert_utils import SSLCerts, SSLConfigException from typing import TYPE_CHECKING, Tuple, Union, List if TYPE_CHECKING: @@ -13,31 +12,21 @@ class CertMgr: CEPHADM_ROOT_CA_KEY = 'cephadm_root_ca_key' def __init__(self, mgr: "CephadmOrchestrator", ip: str) -> None: - self.lock = Lock() - self.initialized = False - with self.lock: - if self.initialized: - return - self.initialized = True - self.mgr = mgr - self.ssl_certs: SSLCerts = SSLCerts() - old_cert = self.mgr.cert_key_store.get_cert(self.CEPHADM_ROOT_CA_CERT) - old_key = self.mgr.cert_key_store.get_key(self.CEPHADM_ROOT_CA_KEY) - if old_key and old_cert: + self.ssl_certs: SSLCerts = SSLCerts() + old_cert = mgr.cert_key_store.get_cert(self.CEPHADM_ROOT_CA_CERT) + old_key = mgr.cert_key_store.get_key(self.CEPHADM_ROOT_CA_KEY) + if old_key and old_cert: + try: self.ssl_certs.load_root_credentials(old_cert, old_key) - else: - self.ssl_certs.generate_root_cert(ip) - self.mgr.cert_key_store.save_cert(self.CEPHADM_ROOT_CA_CERT, self.ssl_certs.get_root_cert()) - self.mgr.cert_key_store.save_key(self.CEPHADM_ROOT_CA_KEY, self.ssl_certs.get_root_key()) + except SSLConfigException: + raise Exception("Cannot load cephadm root CA certificates.") + else: + self.ssl_certs.generate_root_cert(ip) + mgr.cert_key_store.save_cert(self.CEPHADM_ROOT_CA_CERT, self.ssl_certs.get_root_cert()) + mgr.cert_key_store.save_key(self.CEPHADM_ROOT_CA_KEY, self.ssl_certs.get_root_key()) def get_root_ca(self) -> str: - with self.lock: - if self.initialized: - return self.ssl_certs.get_root_cert() - raise Exception("Not initialized") - - def generate_cert(self, host_fqdn: Union[str, List[str]], node_ip: str) -> Tuple[str, str]: - with self.lock: - if self.initialized: - return self.ssl_certs.generate_cert(host_fqdn, node_ip) - raise Exception("Not initialized") + return self.ssl_certs.get_root_cert() + + def generate_cert(self, host_fqdn: Union[str, List[str]], node_ip: Union[str, List[str]]) -> Tuple[str, str]: + return self.ssl_certs.generate_cert(host_fqdn, node_ip) diff --git a/src/pybind/mgr/cephadm/http_server.py b/src/pybind/mgr/cephadm/http_server.py index 56a87bdcf64..7ddce2e8be2 100644 --- a/src/pybind/mgr/cephadm/http_server.py +++ b/src/pybind/mgr/cephadm/http_server.py @@ -31,7 +31,8 @@ class CephadmHttpServer(threading.Thread): self.service_discovery = ServiceDiscovery(mgr) self.cherrypy_shutdown_event = threading.Event() self._service_discovery_port = self.mgr.service_discovery_port - self.secure_monitoring_stack = self.mgr.secure_monitoring_stack + security_enabled, mgmt_gw_enabled = self.mgr._get_security_config() + self.security_enabled = security_enabled super().__init__(target=self.run) def configure_cherrypy(self) -> None: @@ -45,12 +46,13 @@ class CephadmHttpServer(threading.Thread): self.agent.configure() self.service_discovery.configure(self.mgr.service_discovery_port, self.mgr.get_mgr_ip(), - self.secure_monitoring_stack) + self.security_enabled) def config_update(self) -> None: self.service_discovery_port = self.mgr.service_discovery_port - if self.secure_monitoring_stack != self.mgr.secure_monitoring_stack: - self.secure_monitoring_stack = self.mgr.secure_monitoring_stack + security_enabled, mgmt_gw_enabled = self.mgr._get_security_config() + if self.security_enabled != security_enabled: + self.security_enabled = security_enabled self.restart() @property diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index 64d286e30ac..5a89b362809 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -1942,9 +1942,7 @@ class CertKeyStore(): 'nvmeof_server_cert': {}, # service-name -> cert 'nvmeof_client_cert': {}, # service-name -> cert 'nvmeof_root_ca_cert': {}, # service-name -> cert - 'agent_endpoint_root_cert': Cert(), # cert - 'mgmt_gw_root_cert': Cert(), # cert - 'service_discovery_root_cert': Cert(), # cert + 'mgmt_gw_cert': Cert(), # cert 'cephadm_root_ca_cert': Cert(), # cert 'grafana_cert': {}, # host -> cert } @@ -1952,6 +1950,7 @@ class CertKeyStore(): # that don't have a key here are probably certs in PEM format # so there is no need to store a separate key self.known_keys = { + 'mgmt_gw_key': PrivKey(), # cert 'cephadm_root_ca_key': PrivKey(), # cert 'grafana_key': {}, # host -> key 'iscsi_ssl_key': {}, # service-name -> key diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 97a9404a31c..51d677ec6cd 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -6,6 +6,7 @@ import ipaddress import logging import re import shlex +import socket from collections import defaultdict from configparser import ConfigParser from contextlib import contextmanager @@ -771,6 +772,23 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, assert service_type in ServiceSpec.KNOWN_SERVICE_TYPES return self.cephadm_services[service_type] + def get_fqdn(self, hostname: str) -> str: + """Get a host's FQDN with its hostname. + + If the FQDN can't be resolved, the address from the inventory will + be returned instead. + """ + # TODO(redo): get fqdn from the inventory + addr = self.inventory.get_addr(hostname) + return socket.getfqdn(addr) + + def _get_security_config(self) -> Tuple[bool, bool]: + # TODO(redo): enable when oauth2-proxy code is active + # oauth2_proxy_enabled = len(self.mgr.cache.get_daemons_by_service('oauth2-proxy')) > 0 + mgmt_gw_enabled = len(self.cache.get_daemons_by_service('mgmt-gateway')) > 0 + security_enabled = self.secure_monitoring_stack or mgmt_gw_enabled + return security_enabled, mgmt_gw_enabled + def _get_cephadm_binary_path(self) -> str: import hashlib m = hashlib.sha256() @@ -2611,9 +2629,6 @@ Then run the following: raise OrchestratorError( f'If {service_name} is removed then the following OSDs will remain, --force to proceed anyway\n{msg}') - if service_name == 'mgmt-gateway': - self.set_module_option('secure_monitoring_stack', False) - found = self.spec_store.rm(service_name) if found and service_name.startswith('osd.'): self.spec_store.finally_rm(service_name) @@ -2943,21 +2958,26 @@ Then run the following: # add dependency on ceph-exporter daemons deps += [d.name() for d in self.cache.get_daemons_by_service('ceph-exporter')] deps += [d.name() for d in self.cache.get_daemons_by_service('mgmt-gateway')] - if self.secure_monitoring_stack: + security_enabled, _ = self._get_security_config() + if security_enabled: if prometheus_user and prometheus_password: deps.append(f'{hash(prometheus_user + prometheus_password)}') if alertmanager_user and alertmanager_password: deps.append(f'{hash(alertmanager_user + alertmanager_password)}') elif daemon_type == 'grafana': deps += get_daemon_names(['prometheus', 'loki', 'mgmt-gateway']) - if self.secure_monitoring_stack and prometheus_user and prometheus_password: + security_enabled, _ = self._get_security_config() + if security_enabled and prometheus_user and prometheus_password: deps.append(f'{hash(prometheus_user + prometheus_password)}') elif daemon_type == 'alertmanager': deps += get_daemon_names(['mgr', 'alertmanager', 'snmp-gateway', 'mgmt-gateway']) - if self.secure_monitoring_stack and alertmanager_user and alertmanager_password: + security_enabled, _ = self._get_security_config() + if security_enabled and alertmanager_user and alertmanager_password: deps.append(f'{hash(alertmanager_user + alertmanager_password)}') elif daemon_type == 'promtail': deps += get_daemon_names(['loki']) + elif daemon_type in ['ceph-exporter', 'node-exporter']: + deps += get_daemon_names(['mgmt-gateway']) elif daemon_type == JaegerAgentService.TYPE: for dd in self.cache.get_daemons_by_type(JaegerCollectorService.TYPE): assert dd.hostname is not None @@ -2972,7 +2992,7 @@ Then run the following: # this daemon type doesn't need deps mgmt pass - if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana', 'mgmt-gateway']: + if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana']: deps.append(f'secure_monitoring_stack:{self.secure_monitoring_stack}') return sorted(deps) @@ -3088,10 +3108,17 @@ Then run the following: @handle_orch_error def generate_certificates(self, module_name: str) -> Optional[Dict[str, str]]: + import socket supported_moduels = ['dashboard', 'prometheus'] if module_name not in supported_moduels: raise OrchestratorError(f'Unsupported modlue {module_name}. Supported moduels are: {supported_moduels}') - cert, key = self.cert_mgr.generate_cert(self.get_hostname(), self.get_mgr_ip()) + + host_fqdns = [socket.getfqdn(self.get_hostname())] + node_ip = self.get_mgr_ip() + if module_name == 'dashboard': + host_fqdns.append('dashboard_servers') + + cert, key = self.cert_mgr.generate_cert(host_fqdns, node_ip) return {'cert': cert, 'key': key} @handle_orch_error @@ -3148,6 +3175,9 @@ Then run the following: @handle_orch_error def get_prometheus_access_info(self) -> Dict[str, str]: + security_enabled, _ = self._get_security_config() + if not security_enabled: + return {} user, password = self._get_prometheus_credentials() return {'user': user, 'password': password, @@ -3155,6 +3185,9 @@ Then run the following: @handle_orch_error def get_alertmanager_access_info(self) -> Dict[str, str]: + security_enabled, _ = self._get_security_config() + if not security_enabled: + return {} user, password = self._get_alertmanager_credentials() return {'user': user, 'password': password, @@ -3403,9 +3436,6 @@ Then run the following: host_count = len(self.inventory.keys()) max_count = self.max_count_per_host - if spec.service_type == 'mgmt-gateway': - self.set_module_option('secure_monitoring_stack', True) - if spec.placement.count is not None: if spec.service_type in ['mon', 'mgr']: if spec.placement.count > max(5, host_count): diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index 4ef62071875..2964a44e2c3 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -90,7 +90,7 @@ def get_dashboard_endpoints(svc: 'CephadmService') -> Tuple[List[str], Optional[ if not port: continue assert dd.hostname is not None - addr = svc._inventory_get_fqdn(dd.hostname) + addr = svc.mgr.get_fqdn(dd.hostname) dashboard_endpoints.append(f'{addr}:{port}') return dashboard_endpoints, protocol @@ -124,7 +124,7 @@ def get_dashboard_urls(svc: 'CephadmService') -> List[str]: if dd.daemon_id == svc.mgr.get_mgr_id(): continue assert dd.hostname is not None - addr = svc._inventory_get_fqdn(dd.hostname) + addr = svc.mgr.get_fqdn(dd.hostname) dashboard_urls.append(build_url(scheme=proto, host=addr, port=port).rstrip('/')) return dashboard_urls @@ -384,15 +384,6 @@ class CephadmService(metaclass=ABCMeta): raise OrchestratorError(f"Unable to fetch keyring for {entity}: {err}") return simplified_keyring(entity, keyring) - def _inventory_get_fqdn(self, hostname: str) -> str: - """Get a host's FQDN with its hostname. - - If the FQDN can't be resolved, the address from the inventory will - be returned instead. - """ - addr = self.mgr.inventory.get_addr(hostname) - return socket.getfqdn(addr) - def _set_value_on_dashboard(self, service_name: str, get_mon_cmd: str, @@ -1282,11 +1273,29 @@ class CephExporterService(CephService): if spec.stats_period: exporter_config.update({'stats-period': f'{spec.stats_period}'}) + security_enabled, mgmt_gw_enabled = self.mgr._get_security_config() + if security_enabled: + exporter_config.update({'https_enabled': True}) + crt, key = self.get_certificates(daemon_spec) + exporter_config['files'] = { + 'ceph-exporter.crt': crt, + 'ceph-exporter.key': key + } daemon_spec.keyring = keyring daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) daemon_spec.final_config = merge_dicts(daemon_spec.final_config, exporter_config) + + deps = [] + deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')] + deps += [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}'] + daemon_spec.deps = deps + return daemon_spec + def get_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]: + node_ip = self.mgr.inventory.get_addr(daemon_spec.host) + host_fqdn = self.mgr.get_fqdn(daemon_spec.host) + return self.mgr.cert_mgr.generate_cert(host_fqdn, node_ip) class CephfsMirrorService(CephService): TYPE = 'cephfs-mirror' diff --git a/src/pybind/mgr/cephadm/services/mgmt_gateway.py b/src/pybind/mgr/cephadm/services/mgmt_gateway.py index 610634f92d8..2470b7de4cb 100644 --- a/src/pybind/mgr/cephadm/services/mgmt_gateway.py +++ b/src/pybind/mgr/cephadm/services/mgmt_gateway.py @@ -1,13 +1,44 @@ import logging -from typing import List, Any, Tuple, Dict, cast +from typing import TYPE_CHECKING, List, Any, Tuple, Dict, cast, Optional from orchestrator import DaemonDescription from ceph.deployment.service_spec import MgmtGatewaySpec, GrafanaSpec from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec, get_dashboard_endpoints +from mgr_util import build_url + +if TYPE_CHECKING: + from cephadm.module import CephadmOrchestrator logger = logging.getLogger(__name__) +def get_mgmt_gw_internal_endpoint(mgr: "CephadmOrchestrator") -> Optional[str]: + mgmt_gw_daemons = mgr.cache.get_daemons_by_service('mgmt-gateway') + if not mgmt_gw_daemons: + return None + + dd = mgmt_gw_daemons[0] + assert dd.hostname is not None + mgmt_gw_addr = mgr.get_fqdn(dd.hostname) + mgmt_gw_internal_endpoint = build_url(scheme='https', host=mgmt_gw_addr, port=MgmtGatewayService.INTERNAL_SERVICE_PORT) + return f'{mgmt_gw_internal_endpoint}/internal' + + +def get_mgmt_gw_external_endpoint(mgr: "CephadmOrchestrator") -> Optional[str]: + mgmt_gw_daemons = mgr.cache.get_daemons_by_service('mgmt-gateway') + if not mgmt_gw_daemons: + return None + + dd = mgmt_gw_daemons[0] + assert dd.hostname is not None + mgmt_gw_port = dd.ports[0] if dd.ports else None + mgmt_gw_addr = mgr.get_fqdn(dd.hostname) + mgmt_gw_spec = cast(MgmtGatewaySpec, mgr.spec_store['mgmt-gateway'].spec) + protocol = 'http' if mgmt_gw_spec.disable_https else 'https' + mgmt_gw_external_endpoint = build_url(scheme=protocol, host=mgmt_gw_addr, port=mgmt_gw_port) + return mgmt_gw_external_endpoint + + class MgmtGatewayService(CephadmService): TYPE = 'mgmt-gateway' SVC_TEMPLATE_PATH = 'services/mgmt-gateway/nginx.conf.j2' @@ -40,17 +71,31 @@ class MgmtGatewayService(CephadmService): self.mgr.set_module_option_ex('dashboard', 'standby_error_status_code', '503') self.mgr.set_module_option_ex('dashboard', 'standby_behaviour', 'error') - def get_certificates(self, svc_spec: MgmtGatewaySpec, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str, str, str]: + def get_external_certificates(self, svc_spec: MgmtGatewaySpec, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]: + cert = self.mgr.cert_key_store.get_cert('mgmt_gw_cert') + key = self.mgr.cert_key_store.get_key('mgmt_gw_key') + if not (cert and key): + # not available on store, check if provided on the spec + if svc_spec.ssl_certificate and svc_spec.ssl_certificate_key: + cert = svc_spec.ssl_certificate + key = svc_spec.ssl_certificate_key + else: + # not provided on the spec, let's generate self-sigend certificates + addr = self.mgr.inventory.get_addr(daemon_spec.host) + host_fqdn = self.mgr.get_fqdn(daemon_spec.host) + cert, key = self.mgr.cert_mgr.generate_cert(host_fqdn, addr) + # save certificates + if cert and key: + self.mgr.cert_key_store.save_cert('mgmt_gw_cert', cert) + self.mgr.cert_key_store.save_key('mgmt_gw_key', key) + else: + logger.error("Failed to obtain certificate and key from mgmt-gateway.") + return cert, key + + def get_internal_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]: node_ip = self.mgr.inventory.get_addr(daemon_spec.host) - host_fqdn = self._inventory_get_fqdn(daemon_spec.host) - internal_cert, internal_pkey = self.mgr.cert_mgr.generate_cert(host_fqdn, node_ip) - cert = svc_spec.ssl_certificate - pkey = svc_spec.ssl_certificate_key - if not (cert and pkey): - # In case the user has not provided certificates then we generate self-signed ones - cert, pkey = self.mgr.cert_mgr.generate_cert(host_fqdn, node_ip) - - return internal_cert, internal_pkey, cert, pkey + host_fqdn = self.mgr.get_fqdn(daemon_spec.host) + return self.mgr.cert_mgr.generate_cert(host_fqdn, node_ip) def get_mgmt_gateway_deps(self) -> List[str]: # url_prefix for the following services depends on the presence of mgmt-gateway @@ -58,8 +103,6 @@ class MgmtGatewayService(CephadmService): deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('prometheus')] deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('alertmanager')] deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('grafana')] - # secure_monitoring_stack affects the protocol used by monitoring services - deps += [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}'] for dd in self.mgr.cache.get_daemons_by_service('mgr'): # we consider mgr a dep even if the dashboard is disabled # in order to be consistent with _calc_daemon_deps(). @@ -70,9 +113,8 @@ class MgmtGatewayService(CephadmService): def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: assert self.TYPE == daemon_spec.daemon_type svc_spec = cast(MgmtGatewaySpec, self.mgr.spec_store[daemon_spec.service_name].spec) + scheme = 'https' dashboard_endpoints, dashboard_scheme = get_dashboard_endpoints(self) - scheme = 'https' if self.mgr.secure_monitoring_stack else 'http' - prometheus_endpoints = self.get_service_endpoints('prometheus') alertmanager_endpoints = self.get_service_endpoints('alertmanager') grafana_endpoints = self.get_service_endpoints('grafana') @@ -88,20 +130,11 @@ class MgmtGatewayService(CephadmService): 'alertmanager_endpoints': alertmanager_endpoints, 'grafana_endpoints': grafana_endpoints } - external_server_context = { + server_context = { 'spec': svc_spec, + 'internal_port': self.INTERNAL_SERVICE_PORT, 'dashboard_scheme': dashboard_scheme, - 'grafana_scheme': grafana_protocol, - 'prometheus_scheme': scheme, - 'alertmanager_scheme': scheme, 'dashboard_endpoints': dashboard_endpoints, - 'prometheus_endpoints': prometheus_endpoints, - 'alertmanager_endpoints': alertmanager_endpoints, - 'grafana_endpoints': grafana_endpoints - } - internal_server_context = { - 'spec': svc_spec, - 'internal_port': self.INTERNAL_SERVICE_PORT, 'grafana_scheme': grafana_protocol, 'prometheus_scheme': scheme, 'alertmanager_scheme': scheme, @@ -110,19 +143,21 @@ class MgmtGatewayService(CephadmService): 'grafana_endpoints': grafana_endpoints } - internal_cert, internal_pkey, cert, pkey = self.get_certificates(svc_spec, daemon_spec) + cert, key = self.get_external_certificates(svc_spec, daemon_spec) + internal_cert, internal_pkey = self.get_internal_certificates(daemon_spec) daemon_config = { "files": { "nginx.conf": self.mgr.template.render(self.SVC_TEMPLATE_PATH, main_context), - "nginx_external_server.conf": self.mgr.template.render(self.EXTERNAL_SVC_TEMPLATE_PATH, external_server_context), - "nginx_internal_server.conf": self.mgr.template.render(self.INTERNAL_SVC_TEMPLATE_PATH, internal_server_context), + "nginx_external_server.conf": self.mgr.template.render(self.EXTERNAL_SVC_TEMPLATE_PATH, server_context), + "nginx_internal_server.conf": self.mgr.template.render(self.INTERNAL_SVC_TEMPLATE_PATH, server_context), "nginx_internal.crt": internal_cert, - "nginx_internal.key": internal_pkey + "nginx_internal.key": internal_pkey, + "ca.crt": self.mgr.cert_mgr.get_root_ca() } } if not svc_spec.disable_https: daemon_config["files"]["nginx.crt"] = cert - daemon_config["files"]["nginx.key"] = pkey + daemon_config["files"]["nginx.key"] = key return daemon_config, sorted(self.get_mgmt_gateway_deps()) @@ -133,3 +168,7 @@ class MgmtGatewayService(CephadmService): # reset the standby dashboard redirection behaviour self.mgr.set_module_option_ex('dashboard', 'standby_error_status_code', '500') self.mgr.set_module_option_ex('dashboard', 'standby_behaviour', 'redirect') + if daemon.hostname is not None: + # delete cert/key entires for this mgmt-gateway daemon + self.mgr.cert_key_store.rm_cert('mgmt_gw_cert') + self.mgr.cert_key_store.rm_key('mgmt_gw_key') diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index a20b1202ccd..3a20bbfe485 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -8,10 +8,10 @@ from mgr_module import HandleCommandResult from orchestrator import DaemonDescription from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \ - SNMPGatewaySpec, PrometheusSpec, MgmtGatewaySpec + SNMPGatewaySpec, PrometheusSpec from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec, get_dashboard_urls -from cephadm.services.mgmt_gateway import MgmtGatewayService -from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url, get_cert_issuer_info, password_hash +from cephadm.services.mgmt_gateway import get_mgmt_gw_internal_endpoint, get_mgmt_gw_external_endpoint +from mgr_util import verify_tls, ServerConfigException, build_url, get_cert_issuer_info, password_hash from ceph.deployment.utils import wrap_ipv6 logger = logging.getLogger(__name__) @@ -28,11 +28,12 @@ class GrafanaService(CephadmService): def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: assert self.TYPE == daemon_spec.daemon_type - prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() deps = [] # type: List[str] - if self.mgr.secure_monitoring_stack and prometheus_user and prometheus_password: - deps.append(f'{hash(prometheus_user + prometheus_password)}') + security_enabled, mgmt_gw_enabled = self.mgr._get_security_config() deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') + prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() + if security_enabled and prometheus_user and prometheus_password: + deps.append(f'{hash(prometheus_user + prometheus_password)}') # add a dependency since url_prefix depends on the existence of mgmt-gateway deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')] @@ -40,31 +41,40 @@ class GrafanaService(CephadmService): prom_services = [] # type: List[str] for dd in self.mgr.cache.get_daemons_by_service('prometheus'): assert dd.hostname is not None - addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + addr = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname) port = dd.ports[0] if dd.ports else 9095 - protocol = 'https' if self.mgr.secure_monitoring_stack else 'http' + protocol = 'https' if security_enabled else 'http' prom_services.append(build_url(scheme=protocol, host=addr, port=port)) - deps.append(dd.name()) + # in case mgmt-gw is enabeld we only use one url pointing to the internal + # mgmt gw for dashboard which will take care of HA in this case + if mgmt_gw_enabled: + prom_services = [f'{get_mgmt_gw_internal_endpoint(self.mgr)}/prometheus'] + daemons = self.mgr.cache.get_daemons_by_service('loki') loki_host = '' for i, dd in enumerate(daemons): assert dd.hostname is not None if i == 0: - addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + addr = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname) loki_host = build_url(scheme='http', host=addr, port=3100) deps.append(dd.name()) - root_cert = self.mgr.http_server.service_discovery.ssl_certs.get_root_cert() + root_cert = self.mgr.cert_mgr.get_root_ca() + cert, pkey = self.prepare_certificates(daemon_spec) oneline_root_cert = '\\n'.join([line.strip() for line in root_cert.splitlines()]) + oneline_cert = '\\n'.join([line.strip() for line in cert.splitlines()]) + oneline_key = '\\n'.join([line.strip() for line in pkey.splitlines()]) grafana_data_sources = self.mgr.template.render('services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services, 'prometheus_user': prometheus_user, 'prometheus_password': prometheus_password, 'cephadm_root_ca': oneline_root_cert, - 'security_enabled': self.mgr.secure_monitoring_stack, + 'cert': oneline_cert, + 'key': oneline_key, + 'security_enabled': security_enabled, 'loki_host': loki_host}) spec: GrafanaSpec = cast( @@ -80,7 +90,6 @@ class GrafanaService(CephadmService): daemon_spec.port_ips = {str(grafana_port): ip_to_bind_to} grafana_ip = ip_to_bind_to - mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0 grafana_ini = self.mgr.template.render( 'services/grafana/grafana.ini.j2', { 'anonymous_access': spec.anonymous_access, @@ -103,7 +112,6 @@ class GrafanaService(CephadmService): } ) - cert, pkey = self.prepare_certificates(daemon_spec) config_file = { 'files': { "grafana.ini": grafana_ini, @@ -190,19 +198,12 @@ class GrafanaService(CephadmService): # TODO: signed cert dd = self.get_active_daemon(daemon_descrs) assert dd.hostname is not None - addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + addr = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname) port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT spec = cast(GrafanaSpec, self.mgr.spec_store[dd.service_name()].spec) - mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway') - if mgmt_gw_daemons: - dd = mgmt_gw_daemons[0] - assert dd.hostname is not None - mgmt_gw_spec = cast(MgmtGatewaySpec, self.mgr.spec_store['mgmt-gateway'].spec) - mgmt_gw_port = dd.ports[0] if dd.ports else None - mgmt_gw_addr = self._inventory_get_fqdn(dd.hostname) - protocol = 'http' if mgmt_gw_spec.disable_https else 'https' - mgmt_gw_external_endpoint = build_url(scheme=protocol, host=mgmt_gw_addr, port=mgmt_gw_port) + mgmt_gw_external_endpoint = get_mgmt_gw_external_endpoint(self.mgr) + if mgmt_gw_external_endpoint is not None: self._set_value_on_dashboard( 'Grafana', 'dashboard get-grafana-api-url', @@ -256,7 +257,7 @@ class AlertmanagerService(CephadmService): def get_alertmanager_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]: node_ip = self.mgr.inventory.get_addr(daemon_spec.host) - host_fqdn = self._inventory_get_fqdn(daemon_spec.host) + host_fqdn = self.mgr.get_fqdn(daemon_spec.host) cert, key = self.mgr.cert_mgr.generate_cert([host_fqdn, "alertmanager_servers"], node_ip) return cert, key @@ -283,19 +284,25 @@ class AlertmanagerService(CephadmService): # in order to be consistent with _calc_daemon_deps(). deps.append(dd.name()) + security_enabled, mgmt_gw_enabled = self.mgr._get_security_config() + if mgmt_gw_enabled: + dashboard_urls = [f'{get_mgmt_gw_internal_endpoint(self.mgr)}/dashboard'] + else: + dashboard_urls = get_dashboard_urls(self) + snmp_gateway_urls: List[str] = [] for dd in self.mgr.cache.get_daemons_by_service('snmp-gateway'): assert dd.hostname is not None assert dd.ports - addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + addr = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname) deps.append(dd.name()) snmp_gateway_urls.append(build_url(scheme='http', host=addr, port=dd.ports[0], path='/alerts')) context = { - 'secure_monitoring_stack': self.mgr.secure_monitoring_stack, - 'dashboard_urls': get_dashboard_urls(self), + 'security_enabled': security_enabled, + 'dashboard_urls': dashboard_urls, 'default_webhook_urls': default_webhook_urls, 'snmp_gateway_urls': snmp_gateway_urls, 'secure': secure, @@ -307,17 +314,18 @@ class AlertmanagerService(CephadmService): for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): assert dd.hostname is not None deps.append(dd.name()) - addr = self._inventory_get_fqdn(dd.hostname) + addr = self.mgr.get_fqdn(dd.hostname) peers.append(build_url(host=addr, port=port).lstrip('/')) - mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0 deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') - if self.mgr.secure_monitoring_stack: + if security_enabled: alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() if alertmanager_user and alertmanager_password: deps.append(f'{hash(alertmanager_user + alertmanager_password)}') cert, key = self.get_alertmanager_certificates(daemon_spec) context = { + 'enable_mtls': mgmt_gw_enabled, + 'enable_basic_auth': True, # TODO(redo): disable when ouath2-proxy is enabled 'alertmanager_web_user': alertmanager_user, 'alertmanager_web_password': password_hash(alertmanager_password), } @@ -327,7 +335,7 @@ class AlertmanagerService(CephadmService): 'alertmanager.crt': cert, 'alertmanager.key': key, 'web.yml': self.mgr.template.render('services/alertmanager/web.yml.j2', context), - 'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert() + 'root_cert.pem': self.mgr.cert_mgr.get_root_ca() }, 'peers': peers, 'web_config': '/etc/alertmanager/web.yml', @@ -352,21 +360,16 @@ class AlertmanagerService(CephadmService): def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: dd = self.get_active_daemon(daemon_descrs) assert dd.hostname is not None - addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + addr = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname) port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT - protocol = 'https' if self.mgr.secure_monitoring_stack else 'http' - - mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway') - if mgmt_gw_daemons: - dd = mgmt_gw_daemons[0] - assert dd.hostname is not None - mgmt_gw_addr = self._inventory_get_fqdn(dd.hostname) - mgmt_gw_internal_endpoint = build_url(scheme='https', host=mgmt_gw_addr, port=MgmtGatewayService.INTERNAL_SERVICE_PORT) + security_enabled, mgmt_gw_enabled = self.mgr._get_security_config() + protocol = 'https' if security_enabled else 'http' + if mgmt_gw_enabled: self._set_value_on_dashboard( 'AlertManager', 'dashboard get-alertmanager-api-host', 'dashboard set-alertmanager-api-host', - f'{mgmt_gw_internal_endpoint}/internal/alertmanager' + f'{get_mgmt_gw_internal_endpoint(self.mgr)}/alertmanager' ) self._set_value_on_dashboard( 'Alertmanager', @@ -413,8 +416,8 @@ class PrometheusService(CephadmService): def get_mgr_prometheus_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]: node_ip = self.mgr.inventory.get_addr(daemon_spec.host) - host_fqdn = self._inventory_get_fqdn(daemon_spec.host) - cert, key = self.mgr.cert_mgr.generate_cert([host_fqdn, "prometheus_servers"], node_ip) + host_fqdn = self.mgr.get_fqdn(daemon_spec.host) + cert, key = self.mgr.cert_mgr.generate_cert([host_fqdn, 'prometheus_servers'], node_ip) return cert, key def prepare_create( @@ -450,9 +453,10 @@ class PrometheusService(CephadmService): retention_size = '0' # build service discovery end-point + security_enabled, mgmt_gw_enabled = self.mgr._get_security_config() port = self.mgr.service_discovery_port mgr_addr = wrap_ipv6(self.mgr.get_mgr_ip()) - protocol = 'https' if self.mgr.secure_monitoring_stack else 'http' + protocol = 'https' if security_enabled else 'http' srv_end_point = f'{protocol}://{mgr_addr}:{port}/sd/prometheus/sd-config?' node_exporter_cnt = len(self.mgr.cache.get_daemons_by_service('node-exporter')) @@ -464,6 +468,7 @@ class PrometheusService(CephadmService): mgr_prometheus_sd_url = f'{srv_end_point}service=mgr-prometheus' # always included ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter' # always included nvmeof_sd_url = f'{srv_end_point}service=nvmeof' # always included + mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0 alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() @@ -471,9 +476,10 @@ class PrometheusService(CephadmService): # generate the prometheus configuration context = { + 'alertmanager_url_prefix': '/alertmanager' if mgmt_gw_enabled else '/', 'alertmanager_web_user': alertmanager_user, 'alertmanager_web_password': alertmanager_password, - 'secure_monitoring_stack': self.mgr.secure_monitoring_stack, + 'security_enabled': security_enabled, 'service_discovery_username': self.mgr.http_server.service_discovery.username, 'service_discovery_password': self.mgr.http_server.service_discovery.password, 'mgr_prometheus_sd_url': mgr_prometheus_sd_url, @@ -494,12 +500,13 @@ class PrometheusService(CephadmService): daemon_spec.port_ips = {str(port): ip_to_bind_to} web_context = { + 'enable_mtls': mgmt_gw_enabled, + 'enable_basic_auth': True, # TODO(redo): disable when ouath2-proxy is enabled 'prometheus_web_user': prometheus_user, 'prometheus_web_password': password_hash(prometheus_password), } - mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0 - if self.mgr.secure_monitoring_stack: + if security_enabled: cert, key = self.get_mgr_prometheus_certificates(daemon_spec) r: Dict[str, Any] = { 'files': { @@ -559,14 +566,15 @@ class PrometheusService(CephadmService): # add an explicit dependency on the active manager. This will force to # re-deploy prometheus if the mgr has changed (due to a fail-over i.e). deps.append(self.mgr.get_active_mgr().name()) - if self.mgr.secure_monitoring_stack: + deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') + security_enabled, mgmt_gw_enabled = self.mgr._get_security_config() + if security_enabled: alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() if prometheus_user and prometheus_password: deps.append(f'{hash(prometheus_user + prometheus_password)}') if alertmanager_user and alertmanager_password: deps.append(f'{hash(alertmanager_user + alertmanager_password)}') - deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') # add a dependency since url_prefix depends on the existence of mgmt-gateway deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')] @@ -588,21 +596,16 @@ class PrometheusService(CephadmService): def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: dd = self.get_active_daemon(daemon_descrs) assert dd.hostname is not None - addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + addr = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname) port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT - protocol = 'https' if self.mgr.secure_monitoring_stack else 'http' - - mgmt_gw_daemons = self.mgr.cache.get_daemons_by_service('mgmt-gateway') - if mgmt_gw_daemons: - dd = mgmt_gw_daemons[0] - assert dd.hostname is not None - mgmt_gw_addr = self._inventory_get_fqdn(dd.hostname) - mgmt_gw_internal_endpoint = build_url(scheme='https', host=mgmt_gw_addr, port=MgmtGatewayService.INTERNAL_SERVICE_PORT) + security_enabled, mgmt_gw_enabled = self.mgr._get_security_config() + protocol = 'https' if security_enabled else 'http' + if mgmt_gw_enabled: self._set_value_on_dashboard( 'Prometheus', 'dashboard get-prometheus-api-host', 'dashboard set-prometheus-api-host', - f'{mgmt_gw_internal_endpoint}/internal/prometheus' + f'{get_mgmt_gw_internal_endpoint(self.mgr)}/prometheus' ) self._set_value_on_dashboard( 'Prometheus', @@ -640,20 +643,23 @@ class NodeExporterService(CephadmService): def get_node_exporter_certificates(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[str, str]: node_ip = self.mgr.inventory.get_addr(daemon_spec.host) - host_fqdn = self._inventory_get_fqdn(daemon_spec.host) + host_fqdn = self.mgr.get_fqdn(daemon_spec.host) cert, key = self.mgr.cert_mgr.generate_cert(host_fqdn, node_ip) return cert, key def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]: assert self.TYPE == daemon_spec.daemon_type - deps = [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}'] - if self.mgr.secure_monitoring_stack: + deps = [] + deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('mgmt-gateway')] + deps += [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}'] + security_enabled, mgmt_gw_enabled = self.mgr._get_security_config() + if security_enabled: cert, key = self.get_node_exporter_certificates(daemon_spec) - mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0 r = { 'files': { - 'web.yml': self.mgr.template.render('services/node-exporter/web.yml.j2', {}), - 'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert(), + 'web.yml': self.mgr.template.render('services/node-exporter/web.yml.j2', + {'enable_mtls': mgmt_gw_enabled}), + 'root_cert.pem': self.mgr.cert_mgr.get_root_ca(), 'node_exporter.crt': cert, 'node_exporter.key': key, }, @@ -713,7 +719,7 @@ class PromtailService(CephadmService): for i, dd in enumerate(daemons): assert dd.hostname is not None if i == 0: - loki_host = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname) + loki_host = dd.ip if dd.ip else self.mgr.get_fqdn(dd.hostname) deps.append(dd.name()) diff --git a/src/pybind/mgr/cephadm/ssl_cert_utils.py b/src/pybind/mgr/cephadm/ssl_cert_utils.py index 8724b4d3755..2a8d6fe4e3d 100644 --- a/src/pybind/mgr/cephadm/ssl_cert_utils.py +++ b/src/pybind/mgr/cephadm/ssl_cert_utils.py @@ -1,5 +1,5 @@ -from typing import Any, Tuple, IO, List +from typing import Any, Tuple, IO, List, Union import ipaddress from datetime import datetime, timedelta @@ -9,11 +9,6 @@ from cryptography.hazmat.primitives.asymmetric import rsa from cryptography.hazmat.primitives import hashes, serialization from cryptography.hazmat.backends import default_backend -from orchestrator import OrchestratorError - - -logger = logging.getLogger(__name__) - class SSLConfigException(Exception): pass @@ -64,19 +59,23 @@ class SSLCerts: return (cert_str, key_str) - def generate_cert(self, hosts: Any, addr: str) -> Tuple[str, str]: - have_ip = True + def generate_cert(self, _hosts: Union[str, List[str]], _addrs: Union[str, List[str]]) -> Tuple[str, str]: + + addrs = [_addrs] if isinstance(_addrs, str) else _addrs + hosts = [_hosts] if isinstance(_hosts, str) else _hosts + + valid_ips = True try: - ip = x509.IPAddress(ipaddress.ip_address(addr)) + ips = [x509.IPAddress(ipaddress.ip_address(addr)) for addr in addrs] except Exception: - have_ip = False + valid_ips = False private_key = rsa.generate_private_key( public_exponent=65537, key_size=4096, backend=default_backend()) public_key = private_key.public_key() builder = x509.CertificateBuilder() - builder = builder.subject_name(x509.Name([x509.NameAttribute(NameOID.COMMON_NAME, addr), ])) + builder = builder.subject_name(x509.Name([x509.NameAttribute(NameOID.COMMON_NAME, addrs[0]), ])) builder = builder.issuer_name( x509.Name([x509.NameAttribute(NameOID.COMMON_NAME, u'cephadm-root'), ])) builder = builder.not_valid_before(datetime.now()) @@ -84,11 +83,9 @@ class SSLCerts: builder = builder.serial_number(x509.random_serial_number()) builder = builder.public_key(public_key) - if isinstance(hosts, str): - hosts = [hosts] san_list: List[x509.GeneralName] = [x509.DNSName(host) for host in hosts] - if have_ip: - san_list.append(ip) + if valid_ips: + san_list.extend(ips) builder = builder.add_extension( x509.SubjectAlternativeName( @@ -129,7 +126,7 @@ class SSLCerts: given_cert = x509.load_pem_x509_certificate(cert.encode('utf-8'), backend=default_backend()) tz = given_cert.not_valid_after.tzinfo if datetime.now(tz) >= given_cert.not_valid_after: - raise OrchestratorError('Given cert is expired') + raise SSLConfigException('Given cert is expired') self.root_cert = given_cert self.root_key = serialization.load_pem_private_key( data=priv_key.encode('utf-8'), backend=default_backend(), password=None) diff --git a/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 b/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 index b34a1fc17e2..de993cb6ce3 100644 --- a/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2 @@ -6,7 +6,7 @@ global: {% if not secure %} http_config: tls_config: -{% if secure_monitoring_stack %} +{% if security_enabled %} ca_file: root_cert.pem {% else %} insecure_skip_verify: true diff --git a/src/pybind/mgr/cephadm/templates/services/alertmanager/web.yml.j2 b/src/pybind/mgr/cephadm/templates/services/alertmanager/web.yml.j2 index ef4f0b4c750..47bcc5a0f65 100644 --- a/src/pybind/mgr/cephadm/templates/services/alertmanager/web.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/alertmanager/web.yml.j2 @@ -1,5 +1,11 @@ tls_server_config: cert_file: alertmanager.crt key_file: alertmanager.key +{% if enable_mtls %} + client_auth_type: RequireAndVerifyClientCert + client_ca_file: root_cert.pem +{% endif %} +{% if enable_basic_auth %} basic_auth_users: {{ alertmanager_web_user }}: {{ alertmanager_web_password }} +{% endif %} diff --git a/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 b/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 index 46aea864f53..4b2c05c38af 100644 --- a/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 @@ -27,6 +27,8 @@ datasources: secureJsonData: basicAuthPassword: {{ prometheus_password }} tlsCACert: "{{ cephadm_root_ca }}" + tlsClientCert: "{{ cert }}" + tlsClientKey: "{{ key }}" {% endif %} {% endfor %} diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 index 2220e8e4759..29da8954ccc 100644 --- a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 @@ -58,18 +58,33 @@ server { location /grafana { rewrite ^/grafana/(.*) /$1 break; proxy_pass {{ grafana_scheme }}://grafana_servers; + # clear any Authorization header as Prometheus and Alertmanager are using basic-auth browser + # will send this header if Grafana is running on the same node as one of those services + proxy_set_header Authorization ""; } {% endif %} {% if prometheus_endpoints %} location /prometheus { proxy_pass {{ prometheus_scheme }}://prometheus_servers; + + proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt; + proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; + proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; + proxy_ssl_verify on; + proxy_ssl_verify_depth 2; } {% endif %} {% if alertmanager_endpoints %} location /alertmanager { proxy_pass {{ alertmanager_scheme }}://alertmanager_servers; + + proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt; + proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; + proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; + proxy_ssl_verify on; + proxy_ssl_verify_depth 2; } {% endif %} } diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 index 6848c04ebe8..f48582c2ce1 100644 --- a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 +++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 @@ -8,6 +8,14 @@ server { ssl_ciphers AES128-SHA:AES256-SHA:RC4-SHA:DES-CBC3-SHA:RC4-MD5; ssl_prefer_server_ciphers on; +{% if dashboard_endpoints %} + location /internal/dashboard { + rewrite ^/internal/dashboard/(.*) /$1 break; + proxy_pass {{ dashboard_scheme }}://dashboard_servers; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + } +{% endif %} + {% if grafana_endpoints %} location /internal/grafana { rewrite ^/internal/grafana/(.*) /$1 break; @@ -19,6 +27,12 @@ server { location /internal/prometheus { rewrite ^/internal/prometheus/(.*) /prometheus/$1 break; proxy_pass {{ prometheus_scheme }}://prometheus_servers; + + proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt; + proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; + proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; + proxy_ssl_verify on; + proxy_ssl_verify_depth 2; } {% endif %} @@ -26,6 +40,12 @@ server { location /internal/alertmanager { rewrite ^/internal/alertmanager/(.*) /alertmanager/$1 break; proxy_pass {{ alertmanager_scheme }}://alertmanager_servers; + + proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt; + proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; + proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; + proxy_ssl_verify on; + proxy_ssl_verify_depth 2; } {% endif %} } diff --git a/src/pybind/mgr/cephadm/templates/services/node-exporter/web.yml.j2 b/src/pybind/mgr/cephadm/templates/services/node-exporter/web.yml.j2 index 1c122034518..594ad575130 100644 --- a/src/pybind/mgr/cephadm/templates/services/node-exporter/web.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/node-exporter/web.yml.j2 @@ -1,3 +1,7 @@ tls_server_config: cert_file: node_exporter.crt key_file: node_exporter.key +{% if enable_mtls %} + client_auth_type: RequireAndVerifyClientCert + client_ca_file: root_cert.pem +{% endif %} diff --git a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 index faccc8f6de2..ac1ceb54f21 100644 --- a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 @@ -2,7 +2,7 @@ global: scrape_interval: 10s evaluation_interval: 10s -{% if not secure_monitoring_stack %} +{% if not security_enabled %} external_labels: cluster: {{ cluster_fsid }} {% endif %} @@ -13,13 +13,16 @@ rule_files: {% if alertmanager_sd_url %} alerting: alertmanagers: -{% if secure_monitoring_stack %} +{% if security_enabled %} - scheme: https basic_auth: username: {{ alertmanager_web_user }} password: {{ alertmanager_web_password }} tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key + path_prefix: '{{ alertmanager_url_prefix }}' http_sd_configs: - url: {{ alertmanager_sd_url }} basic_auth: @@ -36,10 +39,10 @@ alerting: scrape_configs: - job_name: 'ceph' -{% if secure_monitoring_stack %} +{% if security_enabled %} scheme: https tls_config: - ca_file: mgr_prometheus_cert.pem + ca_file: root_cert.pem honor_labels: true relabel_configs: - source_labels: [instance] @@ -67,10 +70,12 @@ scrape_configs: {% if node_exporter_sd_url %} - job_name: 'node' -{% if secure_monitoring_stack %} +{% if security_enabled %} scheme: https tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key http_sd_configs: - url: {{ node_exporter_sd_url }} basic_auth: @@ -90,7 +95,7 @@ scrape_configs: {% if haproxy_sd_url %} - job_name: 'haproxy' -{% if secure_monitoring_stack %} +{% if security_enabled %} scheme: https tls_config: ca_file: root_cert.pem @@ -113,7 +118,7 @@ scrape_configs: {% if ceph_exporter_sd_url %} - job_name: 'ceph-exporter' -{% if secure_monitoring_stack %} +{% if security_enabled %} honor_labels: true scheme: https tls_config: @@ -138,7 +143,7 @@ scrape_configs: {% if nvmeof_sd_url %} - job_name: 'nvmeof' -{% if secure_monitoring_stack %} +{% if security_enabled %} honor_labels: true scheme: https tls_config: @@ -156,7 +161,7 @@ scrape_configs: {% endif %} {% endif %} -{% if not secure_monitoring_stack %} +{% if not security_enabled %} - job_name: 'federate' scrape_interval: 15s honor_labels: true diff --git a/src/pybind/mgr/cephadm/templates/services/prometheus/web.yml.j2 b/src/pybind/mgr/cephadm/templates/services/prometheus/web.yml.j2 index da3c3d724e8..c58c580e60e 100644 --- a/src/pybind/mgr/cephadm/templates/services/prometheus/web.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/prometheus/web.yml.j2 @@ -1,5 +1,11 @@ tls_server_config: cert_file: prometheus.crt key_file: prometheus.key +{% if enable_mtls %} + client_auth_type: RequireAndVerifyClientCert + client_ca_file: root_cert.pem +{% endif %} +{% if enable_basic_auth %} basic_auth_users: {{ prometheus_web_user }}: {{ prometheus_web_password }} +{% endif %} diff --git a/src/pybind/mgr/cephadm/tests/fixtures.py b/src/pybind/mgr/cephadm/tests/fixtures.py index 5b50d4baf29..c49c637e6ed 100644 --- a/src/pybind/mgr/cephadm/tests/fixtures.py +++ b/src/pybind/mgr/cephadm/tests/fixtures.py @@ -95,7 +95,7 @@ def with_cephadm_module(module_options=None, store=None): mock.patch('cephadm.module.CephadmOrchestrator.get_module_option_ex', get_module_option_ex), \ mock.patch("cephadm.module.CephadmOrchestrator.get_osdmap"), \ mock.patch("cephadm.module.CephadmOrchestrator.remote"), \ - mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1'),\ + mock.patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1'), \ mock.patch("cephadm.agent.CephadmAgentHelpers._request_agent_acks"), \ mock.patch("cephadm.agent.CephadmAgentHelpers._apply_agent", return_value=False), \ mock.patch("cephadm.agent.CephadmAgentHelpers._agent_down", return_value=False), \ diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 50009cbce1f..9774e107bce 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -346,8 +346,8 @@ class TestCephadm(object): )) def test_list_daemons(self, cephadm_module: CephadmOrchestrator): cephadm_module.service_cache_timeout = 10 - with with_host(cephadm_module, 'test'): - CephadmServe(cephadm_module)._refresh_host_daemons('test') + with with_host(cephadm_module, 'myhost'): + CephadmServe(cephadm_module)._refresh_host_daemons('myhost') dds = wait(cephadm_module, cephadm_module.list_daemons()) assert {d.name() for d in dds} == {'rgw.myrgw.foobar', 'haproxy.test.bar'} @@ -1705,8 +1705,6 @@ class TestCephadm(object): nvmeof_client_cert = 'fake-nvmeof-client-cert' nvmeof_server_cert = 'fake-nvmeof-server-cert' nvmeof_root_ca_cert = 'fake-nvmeof-root-ca-cert' - cephadm_module.cert_key_store.save_cert('agent_endpoint_root_cert', agent_endpoint_root_cert) - cephadm_module.cert_key_store.save_cert('alertmanager_cert', alertmanager_host1_cert, host='host1') cephadm_module.cert_key_store.save_cert('rgw_frontend_ssl_cert', rgw_frontend_rgw_foo_host2_cert, service_name='rgw.foo', user_made=True) cephadm_module.cert_key_store.save_cert('nvmeof_server_cert', nvmeof_server_cert, service_name='nvmeof.foo', user_made=True) cephadm_module.cert_key_store.save_cert('nvmeof_client_cert', nvmeof_client_cert, service_name='nvmeof.foo', user_made=True) @@ -1728,12 +1726,9 @@ class TestCephadm(object): 'rgw_frontend_ssl_cert': False, 'iscsi_ssl_cert': False, 'ingress_ssl_cert': False, - 'mgmt_gw_root_cert': False, + 'mgmt_gw_cert': False, 'cephadm_root_ca_cert': False, 'grafana_cert': False, - 'alertmanager_cert': False, - 'prometheus_cert': False, - 'node_exporter_cert': False, 'nvmeof_client_cert': False, 'nvmeof_server_cert': False, 'nvmeof_root_ca_cert': False, @@ -1783,7 +1778,7 @@ class TestCephadm(object): expected_ls = { 'grafana_key': False, - 'mgmt_gw_root_key': False, + 'mgmt_gw_key': False, 'cephadm_root_ca_key': False, 'iscsi_ssl_key': False, 'ingress_ssl_key': False, diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 4a2aae9c6ad..93768ff1f8f 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -46,6 +46,8 @@ from orchestrator._interface import DaemonDescription from typing import Dict, List +cephadm_root_ca = """-----BEGIN CERTIFICATE-----\\nMIIE7DCCAtSgAwIBAgIUE8b2zZ64geu2ns3Zfn3/4L+Cf6MwDQYJKoZIhvcNAQEL\\nBQAwFzEVMBMGA1UEAwwMY2VwaGFkbS1yb290MB4XDTI0MDYyNjE0NDA1M1oXDTM0\\nMDYyNzE0NDA1M1owFzEVMBMGA1UEAwwMY2VwaGFkbS1yb290MIICIjANBgkqhkiG\\n9w0BAQEFAAOCAg8AMIICCgKCAgEAsZRJsdtTr9GLG1lWFql5SGc46ldFanNJd1Gl\\nqXq5vgZVKRDTmNgAb/XFuNEEmbDAXYIRZolZeYKMHfn0pouPRSel0OsC6/02ZUOW\\nIuN89Wgo3IYleCFpkVIumD8URP3hwdu85plRxYZTtlruBaTRH38lssyCqxaOdEt7\\nAUhvYhcMPJThB17eOSQ73mb8JEC83vB47fosI7IhZuvXvRSuZwUW30rJanWNhyZq\\neS2B8qw2RSO0+77H6gA4ftBnitfsE1Y8/F9Z/f92JOZuSMQXUB07msznPbRJia3f\\nueO8gOc32vxd1A1/Qzp14uX34yEGY9ko2lW226cZO29IVUtXOX+LueQttwtdlpz8\\ne6Npm09pXhXAHxV/OW3M28MdXmobIqT/m9MfkeAErt5guUeC5y8doz6/3VQRjFEn\\nRpN0WkblgnNAQ3DONPc+Qd9Fi/wZV2X7bXoYpNdoWDsEOiE/eLmhG1A2GqU/mneP\\nzQ6u79nbdwTYpwqHpa+PvusXeLfKauzI8lLUJotdXy9EK8iHUofibB61OljYye6B\\nG3b8C4QfGsw8cDb4APZd/6AZYyMx/V3cGZ+GcOV7WvsC8k7yx5Uqasm/kiGQ3EZo\\nuNenNEYoGYrjb8D/8QzqNUTwlEh27/ps80tO7l2GGTvWVZL0PRZbmLDvO77amtOf\\nOiRXMoUCAwEAAaMwMC4wGwYDVR0RBBQwEocQAAAAAAAAAAAAAAAAAAAAATAPBgNV\\nHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQAxwzX5AhYEWhTV4VUwUj5+\\nqPdl4Q2tIxRokqyE+cDxoSd+6JfGUefUbNyBxDt0HaBq8obDqqrbcytxnn7mpnDu\\nhtiauY+I4Amt7hqFOiFA4cCLi2mfok6g2vL53tvhd9IrsfflAU2wy7hL76Ejm5El\\nA+nXlkJwps01Whl9pBkUvIbOn3pXX50LT4hb5zN0PSu957rjd2xb4HdfuySm6nW4\\n4GxtVWfmGA6zbC4XMEwvkuhZ7kD2qjkAguGDF01uMglkrkCJT3OROlNBuSTSBGqt\\ntntp5VytHvb7KTF7GttM3ha8/EU2KYaHM6WImQQTrOfiImAktOk4B3lzUZX3HYIx\\n+sByO4P4dCvAoGz1nlWYB2AvCOGbKf0Tgrh4t4jkiF8FHTXGdfvWmjgi1pddCNAy\\nn65WOCmVmLZPERAHOk1oBwqyReSvgoCFo8FxbZcNxJdlhM0Z6hzKggm3O3Dl88Xl\\n5euqJjh2STkBW8Xuowkg1TOs5XyWvKoDFAUzyzeLOL8YSG+gXV22gPTUaPSVAqdb\\nwd0Fx2kjConuC5bgTzQHs8XWA930U3XWZraj21Vaa8UxlBLH4fUro8H5lMSYlZNE\\nJHRNW8BkznAClaFSDG3dybLsrzrBFAu/Qb5zVkT1xyq0YkepGB7leXwq6vjWA5Pw\\nmZbKSphWfh0qipoqxqhfkw==\\n-----END CERTIFICATE-----\\n""" + ceph_generated_cert = """-----BEGIN CERTIFICATE-----\nMIICxjCCAa4CEQDIZSujNBlKaLJzmvntjukjMA0GCSqGSIb3DQEBDQUAMCExDTAL\nBgNVBAoMBENlcGgxEDAOBgNVBAMMB2NlcGhhZG0wHhcNMjIwNzEzMTE0NzA3WhcN\nMzIwNzEwMTE0NzA3WjAhMQ0wCwYDVQQKDARDZXBoMRAwDgYDVQQDDAdjZXBoYWRt\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyyMe4DMA+MeYK7BHZMHB\nq7zjliEOcNgxomjU8qbf5USF7Mqrf6+/87XWqj4pCyAW8x0WXEr6A56a+cmBVmt+\nqtWDzl020aoId6lL5EgLLn6/kMDCCJLq++Lg9cEofMSvcZh+lY2f+1p+C+00xent\nrLXvXGOilAZWaQfojT2BpRnNWWIFbpFwlcKrlg2G0cFjV5c1m6a0wpsQ9JHOieq0\nSvwCixajwq3CwAYuuiU1wjI4oJO4Io1+g8yB3nH2Mo/25SApCxMXuXh4kHLQr/T4\n4hqisvG4uJYgKMcSIrWj5o25mclByGi1UI/kZkCUES94i7Z/3ihx4Bad0AMs/9tw\nFwIDAQABMA0GCSqGSIb3DQEBDQUAA4IBAQAf+pwz7Gd7mDwU2LY0TQXsK6/8KGzh\nHuX+ErOb8h5cOAbvCnHjyJFWf6gCITG98k9nxU9NToG0WYuNm/max1y/54f0dtxZ\npUo6KSNl3w6iYCfGOeUIj8isi06xMmeTgMNzv8DYhDt+P2igN6LenqWTVztogkiV\nxQ5ZJFFLEw4sN0CXnrZX3t5ruakxLXLTLKeE0I91YJvjClSBGkVJq26wOKQNHMhx\npWxeydQ5EgPZY+Aviz5Dnxe8aB7oSSovpXByzxURSabOuCK21awW5WJCGNpmqhWK\nZzACBDEstccj57c4OGV0eayHJRsluVr2e9NHRINZA3qdB37e6gsI1xHo\n-----END CERTIFICATE-----\n""" ceph_generated_key = """-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDLIx7gMwD4x5gr\nsEdkwcGrvOOWIQ5w2DGiaNTypt/lRIXsyqt/r7/ztdaqPikLIBbzHRZcSvoDnpr5\nyYFWa36q1YPOXTbRqgh3qUvkSAsufr+QwMIIkur74uD1wSh8xK9xmH6VjZ/7Wn4L\n7TTF6e2ste9cY6KUBlZpB+iNPYGlGc1ZYgVukXCVwquWDYbRwWNXlzWbprTCmxD0\nkc6J6rRK/AKLFqPCrcLABi66JTXCMjigk7gijX6DzIHecfYyj/blICkLExe5eHiQ\nctCv9PjiGqKy8bi4liAoxxIitaPmjbmZyUHIaLVQj+RmQJQRL3iLtn/eKHHgFp3Q\nAyz/23AXAgMBAAECggEAVoTB3Mm8azlPlaQB9GcV3tiXslSn+uYJ1duCf0sV52dV\nBzKW8s5fGiTjpiTNhGCJhchowqxoaew+o47wmGc2TvqbpeRLuecKrjScD0GkCYyQ\neM2wlshEbz4FhIZdgS6gbuh9WaM1dW/oaZoBNR5aTYo7xYTmNNeyLA/jO2zr7+4W\n5yES1lMSBXpKk7bDGKYY4bsX2b5RLr2Grh2u2bp7hoLABCEvuu8tSQdWXLEXWpXo\njwmV3hc6tabypIa0mj2Dmn2Dmt1ppSO0AZWG/WAizN3f4Z0r/u9HnbVrVmh0IEDw\n3uf2LP5o3msG9qKCbzv3lMgt9mMr70HOKnJ8ohMSKQKBgQDLkNb+0nr152HU9AeJ\nvdz8BeMxcwxCG77iwZphZ1HprmYKvvXgedqWtS6FRU+nV6UuQoPUbQxJBQzrN1Qv\nwKSlOAPCrTJgNgF/RbfxZTrIgCPuK2KM8I89VZv92TSGi362oQA4MazXC8RAWjoJ\nSu1/PHzK3aXOfVNSLrOWvIYeZQKBgQD/dgT6RUXKg0UhmXj7ExevV+c7oOJTDlMl\nvLngrmbjRgPO9VxLnZQGdyaBJeRngU/UXfNgajT/MU8B5fSKInnTMawv/tW7634B\nw3v6n5kNIMIjJmENRsXBVMllDTkT9S7ApV+VoGnXRccbTiDapBThSGd0wri/CuwK\nNWK1YFOeywKBgEDyI/XG114PBUJ43NLQVWm+wx5qszWAPqV/2S5MVXD1qC6zgCSv\nG9NLWN1CIMimCNg6dm7Wn73IM7fzvhNCJgVkWqbItTLG6DFf3/DPODLx1wTMqLOI\nqFqMLqmNm9l1Nec0dKp5BsjRQzq4zp1aX21hsfrTPmwjxeqJZdioqy2VAoGAXR5X\nCCdSHlSlUW8RE2xNOOQw7KJjfWT+WAYoN0c7R+MQplL31rRU7dpm1bLLRBN11vJ8\nMYvlT5RYuVdqQSP6BkrX+hLJNBvOLbRlL+EXOBrVyVxHCkDe+u7+DnC4epbn+N8P\nLYpwqkDMKB7diPVAizIKTBxinXjMu5fkKDs5n+sCgYBbZheYKk5M0sIxiDfZuXGB\nkf4mJdEkTI1KUGRdCwO/O7hXbroGoUVJTwqBLi1tKqLLarwCITje2T200BYOzj82\nqwRkCXGtXPKnxYEEUOiFx9OeDrzsZV00cxsEnX0Zdj+PucQ/J3Cvd0dWUspJfLHJ\n39gnaegswnz9KMQAvzKFdg==\n-----END PRIVATE KEY-----\n""" @@ -603,18 +605,14 @@ class TestMonitoring: @patch("socket.getfqdn") @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') @patch("cephadm.services.monitoring.password_hash", lambda password: 'alertmanager_password_hash') + @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: 'cephadm_root_cert') + @patch('cephadm.cert_mgr.CertMgr.generate_cert', lambda instance, fqdn, ip: ('mycert', 'mykey')) def test_alertmanager_config_security_enabled(self, _get_fqdn, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) fqdn = 'host1.test' _get_fqdn.return_value = fqdn - def gen_cert(host, addr): - return ('mycert', 'mykey') - - def get_root_cert(): - return 'my_root_cert' - with with_host(cephadm_module, 'test'): cephadm_module.secure_monitoring_stack = True cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user') @@ -653,7 +651,8 @@ class TestMonitoring: cert_file: alertmanager.crt key_file: alertmanager.key basic_auth_users: - alertmanager_user: alertmanager_password_hash""").lstrip() + alertmanager_user: alertmanager_password_hash + """).lstrip() _run_cephadm.assert_called_with( 'test', @@ -684,7 +683,7 @@ class TestMonitoring: 'alertmanager.crt': 'mycert', 'alertmanager.key': 'mykey', 'web.yml': web_config, - 'root_cert.pem': 'my_root_cert' + 'root_cert.pem': 'cephadm_root_cert' }, 'peers': [], 'web_config': '/etc/alertmanager/web.yml', @@ -836,6 +835,8 @@ class TestMonitoring: @patch("cephadm.serve.CephadmServe._run_cephadm") @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') @patch("cephadm.services.monitoring.password_hash", lambda password: 'prometheus_password_hash') + @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: 'cephadm_root_cert') + @patch('cephadm.cert_mgr.CertMgr.generate_cert', lambda instance, fqdn, ip: ('mycert', 'mykey')) def test_prometheus_config_security_enabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast') @@ -875,7 +876,8 @@ class TestMonitoring: cert_file: prometheus.crt key_file: prometheus.key basic_auth_users: - prometheus_user: prometheus_password_hash""").lstrip() + prometheus_user: prometheus_password_hash + """).lstrip() y = dedent(""" # This file is generated by cephadm. @@ -894,6 +896,9 @@ class TestMonitoring: password: alertmanager_plain_password tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key + path_prefix: '/' http_sd_configs: - url: https://[::1]:8765/sd/prometheus/sd-config?service=alertmanager basic_auth: @@ -906,7 +911,7 @@ class TestMonitoring: - job_name: 'ceph' scheme: https tls_config: - ca_file: mgr_prometheus_cert.pem + ca_file: root_cert.pem honor_labels: true relabel_configs: - source_labels: [instance] @@ -924,6 +929,8 @@ class TestMonitoring: scheme: https tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key http_sd_configs: - url: https://[::1]:8765/sd/prometheus/sd-config?service=node-exporter basic_auth: @@ -998,8 +1005,7 @@ class TestMonitoring: "config_blobs": { 'files': { 'prometheus.yml': y, - 'root_cert.pem': '', - 'mgr_prometheus_cert.pem': '', + 'root_cert.pem': 'cephadm_root_cert', 'web.yml': web_config, 'prometheus.crt': 'mycert', 'prometheus.key': 'mykey', @@ -3194,8 +3200,12 @@ class TestSMB: class TestMgmtGateway: @patch("cephadm.serve.CephadmServe._run_cephadm") @patch("cephadm.services.mgmt_gateway.MgmtGatewayService.get_service_endpoints") + @patch("cephadm.services.mgmt_gateway.MgmtGatewayService.get_external_certificates", + lambda instance, svc_spec, dspec: (ceph_generated_cert, ceph_generated_key)) + @patch("cephadm.services.mgmt_gateway.MgmtGatewayService.get_internal_certificates", + lambda instance, dspec: (ceph_generated_cert, ceph_generated_key)) @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') - @patch('cephadm.ssl_cert_utils.SSLCerts.generate_cert', lambda instance, fqdn, ip: (ceph_generated_cert, ceph_generated_key)) + @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: cephadm_root_ca) @patch("cephadm.services.mgmt_gateway.get_dashboard_endpoints", lambda _: (["ceph-node-2:8443", "ceph-node-2:8443"], "https")) def test_mgmt_gateway_config(self, get_service_endpoints_mock: List[str], _run_cephadm, cephadm_module: CephadmOrchestrator): @@ -3311,14 +3321,29 @@ class TestMgmtGateway: location /grafana { rewrite ^/grafana/(.*) /$1 break; proxy_pass https://grafana_servers; + # clear any Authorization header as Prometheus and Alertmanager are using basic-auth browser + # will send this header if Grafana is running on the same node as one of those services + proxy_set_header Authorization ""; } location /prometheus { - proxy_pass http://prometheus_servers; + proxy_pass https://prometheus_servers; + + proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt; + proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; + proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; + proxy_ssl_verify on; + proxy_ssl_verify_depth 2; } location /alertmanager { - proxy_pass http://alertmanager_servers; + proxy_pass https://alertmanager_servers; + + proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt; + proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; + proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; + proxy_ssl_verify on; + proxy_ssl_verify_depth 2; } }"""), "nginx_internal_server.conf": dedent(""" @@ -3331,6 +3356,12 @@ class TestMgmtGateway: ssl_ciphers AES128-SHA:AES256-SHA:RC4-SHA:DES-CBC3-SHA:RC4-MD5; ssl_prefer_server_ciphers on; + location /internal/dashboard { + rewrite ^/internal/dashboard/(.*) /$1 break; + proxy_pass https://dashboard_servers; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + } + location /internal/grafana { rewrite ^/internal/grafana/(.*) /$1 break; proxy_pass https://grafana_servers; @@ -3338,16 +3369,29 @@ class TestMgmtGateway: location /internal/prometheus { rewrite ^/internal/prometheus/(.*) /prometheus/$1 break; - proxy_pass http://prometheus_servers; + proxy_pass https://prometheus_servers; + + proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt; + proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; + proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; + proxy_ssl_verify on; + proxy_ssl_verify_depth 2; } location /internal/alertmanager { rewrite ^/internal/alertmanager/(.*) /alertmanager/$1 break; - proxy_pass http://alertmanager_servers; + proxy_pass https://alertmanager_servers; + + proxy_ssl_certificate /etc/nginx/ssl/nginx_internal.crt; + proxy_ssl_certificate_key /etc/nginx/ssl/nginx_internal.key; + proxy_ssl_trusted_certificate /etc/nginx/ssl/ca.crt; + proxy_ssl_verify on; + proxy_ssl_verify_depth 2; } }"""), "nginx_internal.crt": f"{ceph_generated_cert}", "nginx_internal.key": f"{ceph_generated_key}", + "ca.crt": f"{cephadm_root_ca}", "nginx.crt": f"{ceph_generated_cert}", "nginx.key": f"{ceph_generated_key}", } diff --git a/src/pybind/mgr/dashboard/controllers/prometheus.py b/src/pybind/mgr/dashboard/controllers/prometheus.py index d0ad51c8f7d..c00d8c70e63 100644 --- a/src/pybind/mgr/dashboard/controllers/prometheus.py +++ b/src/pybind/mgr/dashboard/controllers/prometheus.py @@ -30,6 +30,14 @@ class PrometheusReceiver(BaseController): class PrometheusRESTController(RESTController): + + def close_unlink_files(self, files): + # type (List[str]) + valid_entries = [f for f in files if f is not None] + for f in valid_entries: + f.close() + os.unlink(f.name) + def prometheus_proxy(self, method, path, params=None, payload=None): # type (str, str, dict, dict) user, password, ca_cert_file, cert_file, key_file = self.get_access_info('prometheus') @@ -39,10 +47,7 @@ class PrometheusRESTController(RESTController): method, path, 'Prometheus', params, payload, user=user, password=password, verify=verify, cert=cert) - for f in [ca_cert_file, cert_file, key_file]: - if f: - f.close() - os.unlink(f.name) + self.close_unlink_files([ca_cert_file, cert_file, key_file]) return response def alert_proxy(self, method, path, params=None, payload=None): @@ -54,14 +59,21 @@ class PrometheusRESTController(RESTController): method, path, 'Alertmanager', params, payload, user=user, password=password, verify=verify, cert=cert, is_alertmanager=True) - for f in [ca_cert_file, cert_file, key_file]: - if f: - f.close() - os.unlink(f.name) + self.close_unlink_files([ca_cert_file, cert_file, key_file]) return response def get_access_info(self, module_name): - # type (str, str, str, str, srt) + # type (str, str, str, str, str) + + def write_to_tmp_file(content): + # type (str) + if content is None: + return None + tmp_file = tempfile.NamedTemporaryFile(delete=False) + tmp_file.write(content.encode('utf-8')) + tmp_file.flush() # tmp_file must not be gc'ed + return tmp_file + if module_name not in ['prometheus', 'alertmanager']: raise DashboardException(f'Invalid module name {module_name}', component='prometheus') user = None @@ -69,37 +81,18 @@ class PrometheusRESTController(RESTController): cert_file = None pkey_file = None ca_cert_file = None - orch_backend = mgr.get_module_option_ex('orchestrator', 'orchestrator') if orch_backend == 'cephadm': - secure_monitoring_stack = mgr.get_module_option_ex('cephadm', - 'secure_monitoring_stack', - False) - if secure_monitoring_stack: - cmd = {'prefix': f'orch {module_name} get-credentials'} - ret, out, _ = mgr.mon_command(cmd) - if ret == 0 and out is not None: - access_info = json.loads(out) + cmd = {'prefix': f'orch {module_name} get-credentials'} + ret, out, _ = mgr.mon_command(cmd) + if ret == 0 and out is not None: + access_info = json.loads(out) + if access_info: user = access_info['user'] password = access_info['password'] - certificate = access_info['certificate'] - ca_cert_file = tempfile.NamedTemporaryFile(delete=False) - ca_cert_file.write(certificate.encode('utf-8')) - ca_cert_file.flush() - - cert_file = None - cert = mgr.get_localized_store("crt") # type: ignore - if cert is not None: - cert_file = tempfile.NamedTemporaryFile(delete=False) - cert_file.write(cert.encode('utf-8')) - cert_file.flush() # cert_tmp must not be gc'ed - - pkey_file = None - pkey = mgr.get_localized_store("key") # type: ignore - if pkey is not None: - pkey_file = tempfile.NamedTemporaryFile(delete=False) - pkey_file.write(pkey.encode('utf-8')) - pkey_file.flush() + ca_cert_file = write_to_tmp_file(access_info['certificate']) + cert_file = write_to_tmp_file(mgr.get_localized_store("crt")) + pkey_file = write_to_tmp_file(mgr.get_localized_store("key")) return user, password, ca_cert_file, cert_file, pkey_file diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 7584fabec0f..cc389545c45 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -794,7 +794,7 @@ class Orchestrator(object): raise NotImplementedError() def generate_certificates(self, module_name: str) -> OrchResult[Optional[Dict[str, str]]]: - """set prometheus access information""" + """generate cert/key for the module with the name module_name""" raise NotImplementedError() def set_custom_prometheus_alerts(self, alerts_file: str) -> OrchResult[str]: diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index 7a4bca70fa4..8b1c0921896 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -1765,18 +1765,23 @@ class Module(MgrModule, OrchestratorClientMixin): self.get_file_sd_config() def configure(self, server_addr: str, server_port: int) -> None: - # cephadm deployments have a TLS monitoring stack setup option. - # If the cephadm module is on and the setting is true (defaults to false) - # we should have prometheus be set up to interact with that - cephadm_secure_monitoring_stack = self.get_module_option_ex( - 'cephadm', 'secure_monitoring_stack', False) - if cephadm_secure_monitoring_stack: - try: - self.setup_tls_config(server_addr, server_port) - return - except Exception as e: - self.log.exception(f'Failed to setup cephadm based secure monitoring stack: {e}\n', - 'Falling back to default configuration') + # TODO(redo): this new check is hacky, we should provide an explit cmd + # from cephadm to get/check the security status + + # if cephadm is configured with security then TLS must be used + cmd = {'prefix': 'orch prometheus get-credentials'} + ret, out, _ = self.mon_command(cmd) + if ret == 0 and out is not None: + access_info = json.loads(out) + if access_info: + try: + self.setup_tls_using_cephadm(server_addr, server_port) + return + except Exception as e: + self.log.exception(f'Failed to setup cephadm based secure monitoring stack: {e}\n', + 'Falling back to default configuration') + + # In any error fallback to plain http mode self.setup_default_config(server_addr, server_port) def setup_default_config(self, server_addr: str, server_port: int) -> None: @@ -1792,7 +1797,7 @@ class Module(MgrModule, OrchestratorClientMixin): self.set_uri(build_url(scheme='http', host=self.get_server_addr(), port=server_port, path='/')) - def setup_tls_config(self, server_addr: str, server_port: int) -> None: + def setup_tls_using_cephadm(self, server_addr: str, server_port: int) -> None: from mgr_util import verify_tls_files cmd = {'prefix': 'orch certmgr generate-certificates', 'module_name': 'prometheus', -- 2.39.5