]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: enabling security on prometheus monitoring stack
authorRedouane Kachach <rkachach@redhat.com>
Wed, 25 May 2022 10:27:23 +0000 (12:27 +0200)
committerRedouane Kachach <rkachach@redhat.com>
Mon, 20 Feb 2023 12:02:08 +0000 (13:02 +0100)
Signed-off-by: Redouane Kachach <rkachach@redhat.com>
22 files changed:
src/cephadm/cephadm.py
src/pybind/mgr/cephadm/http_server.py
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/service_discovery.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/templates/services/alertmanager/alertmanager.yml.j2
src/pybind/mgr/cephadm/templates/services/alertmanager/web.yml.j2 [new file with mode: 0644]
src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2
src/pybind/mgr/cephadm/templates/services/node-exporter/web.yml.j2 [new file with mode: 0644]
src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2
src/pybind/mgr/cephadm/templates/services/prometheus/web.yml.j2 [new file with mode: 0644]
src/pybind/mgr/cephadm/tests/test_service_discovery.py
src/pybind/mgr/cephadm/tests/test_services.py
src/pybind/mgr/dashboard/controllers/prometheus.py
src/pybind/mgr/dashboard/services/access_control.py
src/pybind/mgr/dashboard/tests/test_prometheus.py
src/pybind/mgr/mgr_util.py
src/pybind/mgr/orchestrator/_interface.py
src/pybind/mgr/orchestrator/module.py
src/pybind/mgr/prometheus/module.py
src/pybind/mgr/requirements-required.txt

index 07dcecd06524513e1183bb6e3b097ae56f8377f1..ec67567b67e56508e74751b4849a757e3db4a3e3 100755 (executable)
@@ -589,7 +589,7 @@ class Monitoring(object):
             'cpus': '1',
             'memory': '1GB',
             'args': [
-                '--no-collector.timex',
+                '--no-collector.timex'
             ],
         },
         'grafana': {
@@ -2711,11 +2711,26 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
             peers = config.get('peers', list())  # type: ignore
             for peer in peers:
                 r += ['--cluster.peer={}'.format(peer)]
+            try:
+                r += [f'--web.config.file={config["web_config"]}']
+            except KeyError:
+                pass
             # some alertmanager, by default, look elsewhere for a config
             r += ['--config.file=/etc/alertmanager/alertmanager.yml']
         if daemon_type == 'promtail':
             r += ['--config.expand-env']
+        if daemon_type == 'prometheus':
+            config = get_parm(ctx.config_json)
+            try:
+                r += [f'--web.config.file={config["web_config"]}']
+            except KeyError:
+                pass
         if daemon_type == 'node-exporter':
+            config = get_parm(ctx.config_json)
+            try:
+                r += [f'--web.config={config["web_config"]}']
+            except KeyError:
+                pass
             r += ['--path.procfs=/host/proc',
                   '--path.sysfs=/host/sys',
                   '--path.rootfs=/rootfs']
@@ -2806,6 +2821,12 @@ def create_daemon_dirs(ctx, fsid, daemon_type, daemon_id, uid, gid,
             config_dir = 'etc/loki'
             makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
             makedirs(os.path.join(data_dir_root, 'data'), uid, gid, 0o755)
+        elif daemon_type == 'node-exporter':
+            data_dir_root = get_data_dir(fsid, ctx.data_dir,
+                                         daemon_type, daemon_id)
+            config_dir = 'etc/node-exporter'
+            makedirs(os.path.join(data_dir_root, config_dir), uid, gid, 0o755)
+            recursive_chown(os.path.join(data_dir_root, 'etc'), uid, gid)
 
         # populate the config directory for the component from the config-json
         if 'files' in config_json:
@@ -3039,6 +3060,7 @@ def get_container_mounts(ctx, fsid, daemon_type, daemon_id,
             mounts[log_dir] = '/var/log/ceph:z'
             mounts[os.path.join(data_dir, 'data')] = '/promtail:Z'
         elif daemon_type == 'node-exporter':
+            mounts[os.path.join(data_dir, 'etc/node-exporter')] = '/etc/node-exporter:Z'
             mounts['/proc'] = '/host/proc:ro'
             mounts['/sys'] = '/host/sys:ro'
             mounts['/'] = '/rootfs:ro'
index 0c0b940aa94d9c18600511abd23f8866794e944e..ef29d3b4e7535618ee355d5fa2bb7ccb5365a371 100644 (file)
@@ -31,6 +31,7 @@ class CephadmHttpServer(threading.Thread):
         self.service_discovery = ServiceDiscovery(mgr)
         self.cherrypy_shutdown_event = threading.Event()
         self._service_discovery_port = self.mgr.service_discovery_port
+        self.secure_monitoring_stack = self.mgr.secure_monitoring_stack
         super().__init__(target=self.run)
 
     def configure_cherrypy(self) -> None:
@@ -42,10 +43,15 @@ class CephadmHttpServer(threading.Thread):
     def configure(self) -> None:
         self.configure_cherrypy()
         self.agent.configure()
-        self.service_discovery.configure(self.mgr.service_discovery_port, self.mgr.get_mgr_ip())
+        self.service_discovery.configure(self.mgr.service_discovery_port,
+                                         self.mgr.get_mgr_ip(),
+                                         self.secure_monitoring_stack)
 
     def config_update(self) -> None:
         self.service_discovery_port = self.mgr.service_discovery_port
+        if self.secure_monitoring_stack != self.mgr.secure_monitoring_stack:
+            self.secure_monitoring_stack = self.mgr.secure_monitoring_stack
+            self.restart()
 
     @property
     def service_discovery_port(self) -> int:
index ca4c1a482cf30af26fb2052a4b2aac8c60ac9695..2f646cabbb15d7c1eed455a527f3f71124eda127 100644 (file)
@@ -442,6 +442,36 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             default=False,
             desc='Log all refresh metadata. Includes daemon, device, and host info collected regularly. Only has effect if logging at debug level'
         ),
+        Option(
+            'prometheus_web_user',
+            type='str',
+            default='admin',
+            desc='Prometheus web user'
+        ),
+        Option(
+            'prometheus_web_password',
+            type='str',
+            default='admin',
+            desc='Prometheus web password'
+        ),
+        Option(
+            'alertmanager_web_user',
+            type='str',
+            default='admin',
+            desc='Alertmanager web user'
+        ),
+        Option(
+            'alertmanager_web_password',
+            type='str',
+            default='admin',
+            desc='Alertmanager web password'
+        ),
+        Option(
+            'secure_monitoring_stack',
+            type='bool',
+            default=False,
+            desc='Enable TLS security for all the monitoring stack daemons'
+        ),
     ]
 
     def __init__(self, *args: Any, **kwargs: Any):
@@ -514,6 +544,11 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
             self.agent_down_multiplier = 0.0
             self.agent_starting_port = 0
             self.service_discovery_port = 0
+            self.secure_monitoring_stack = False
+            self.prometheus_web_password: Optional[str] = None
+            self.prometheus_web_user: Optional[str] = None
+            self.alertmanager_web_password: Optional[str] = None
+            self.alertmanager_web_user: Optional[str] = None
             self.apply_spec_fails: List[Tuple[str, str]] = []
             self.max_osd_draining_count = 10
             self.device_enhanced_scan = False
@@ -2462,6 +2497,14 @@ Then run the following:
                           spec: Optional[ServiceSpec],
                           daemon_type: str,
                           daemon_id: str) -> List[str]:
+
+        def get_daemon_names(daemons: List[str]) -> List[str]:
+            daemon_names = []
+            for daemon_type in daemons:
+                for dd in self.cache.get_daemons_by_type(daemon_type):
+                    daemon_names.append(dd.name())
+            return daemon_names
+
         deps = []
         if daemon_type == 'haproxy':
             # because cephadm creates new daemon instances whenever
@@ -2514,15 +2557,28 @@ Then run the following:
                 deps.append('ingress')
             # add dependency on ceph-exporter daemons
             deps += [d.name() for d in self.cache.get_daemons_by_service('ceph-exporter')]
+            if self.secure_monitoring_stack:
+                if self.prometheus_web_user and self.prometheus_web_password:
+                    deps.append(f'{hash(self.prometheus_web_user + self.prometheus_web_password)}')
+                if self.alertmanager_web_user and self.alertmanager_web_password:
+                    deps.append(f'{hash(self.alertmanager_web_user + self.alertmanager_web_password)}')
+        elif daemon_type == 'grafana':
+            deps += get_daemon_names(['prometheus', 'loki'])
+            if self.secure_monitoring_stack and self.prometheus_web_user and self.prometheus_web_password:
+                deps.append(f'{hash(self.prometheus_web_user + self.prometheus_web_password)}')
+        elif daemon_type == 'alertmanager':
+            deps += get_daemon_names(['mgr', 'alertmanager', 'snmp-gateway'])
+            if self.secure_monitoring_stack and self.alertmanager_web_user and self.alertmanager_web_password:
+                deps.append(f'{hash(self.alertmanager_web_user + self.alertmanager_web_password)}')
+        elif daemon_type == 'promtail':
+            deps += get_daemon_names(['loki'])
         else:
-            need = {
-                'grafana': ['prometheus', 'loki'],
-                'alertmanager': ['mgr', 'alertmanager', 'snmp-gateway'],
-                'promtail': ['loki'],
-            }
-            for dep_type in need.get(daemon_type, []):
-                for dd in self.cache.get_daemons_by_type(dep_type):
-                    deps.append(dd.name())
+            # TODO(redo): some error message!
+            pass
+
+        if daemon_type in ['prometheus', 'node-exporter', 'alertmanager', 'grafana']:
+            deps.append(f'secure_monitoring_stack:{self.secure_monitoring_stack}')
+
         return sorted(deps)
 
     @forall_hosts
@@ -2613,6 +2669,18 @@ Then run the following:
             self.events.from_orch_error(e)
             raise
 
+    @handle_orch_error
+    def get_prometheus_access_info(self) -> Dict[str, str]:
+        return {'user': self.prometheus_web_user or '',
+                'password': self.prometheus_web_password or '',
+                'certificate': self.http_server.service_discovery.ssl_certs.get_root_cert()}
+
+    @handle_orch_error
+    def get_alertmanager_access_info(self) -> Dict[str, str]:
+        return {'user': self.alertmanager_web_user or '',
+                'password': self.alertmanager_web_password or '',
+                'certificate': self.http_server.service_discovery.ssl_certs.get_root_cert()}
+
     @handle_orch_error
     def apply_mon(self, spec: ServiceSpec) -> str:
         return self._apply(spec)
index 0d92f014bba3e89081959f498def2d3a9053356b..b5ae1677c5d392857c821ce8f0ddd7476ae2bc46 100644 (file)
@@ -971,11 +971,15 @@ class CephadmServe:
                     dd.name()))
                 action = 'reconfig'
             elif last_deps != deps:
-                self.log.debug('%s deps %s -> %s' % (dd.name(), last_deps,
-                                                     deps))
-                self.log.info('Reconfiguring %s (dependencies changed)...' % (
-                    dd.name()))
+                self.log.debug(f'{dd.name()} deps {last_deps} -> {deps}')
+                self.log.info(f'Reconfiguring {dd.name()} (dependencies changed)...')
                 action = 'reconfig'
+                # we need only redeploy if secure_monitoring_stack value has changed:
+                if dd.daemon_type in ['prometheus', 'node-exporter', 'alertmanager']:
+                    diff = list(set(last_deps) - set(deps))
+                    if any('secure_monitoring_stack' in e for e in diff):
+                        action = 'redeploy'
+
             elif spec is not None and hasattr(spec, 'extra_container_args') and dd.extra_container_args != spec.extra_container_args:
                 self.log.debug(
                     f'{dd.name()} container cli args {dd.extra_container_args} -> {spec.extra_container_args}')
index 83d0f52a0732376da8e7adaed9354b01378696a4..ddc0574e2b12c4ae8cd36bd7cc8114b7d4cce986 100644 (file)
@@ -7,11 +7,14 @@ except ImportError:
         pass
 
 import logging
+import socket
+
 import orchestrator  # noqa
 from mgr_module import ServiceInfoT
 from mgr_util import build_url
-from typing import Dict, List, TYPE_CHECKING, cast, Collection, Callable, NamedTuple
+from typing import Dict, List, TYPE_CHECKING, cast, Collection, Callable, NamedTuple, Optional
 from cephadm.services.monitoring import AlertmanagerService, NodeExporterService, PrometheusService
+import secrets
 
 from cephadm.services.ingress import IngressSpec
 from cephadm.ssl_cert_utils import SSLCerts
@@ -47,8 +50,13 @@ class ServiceDiscovery:
     def __init__(self, mgr: "CephadmOrchestrator") -> None:
         self.mgr = mgr
         self.ssl_certs = SSLCerts()
+        self.username: Optional[str] = None
+        self.password: Optional[str] = None
+
+    def validate_password(self, realm: str, username: str, password: str) -> bool:
+        return (password == self.password and username == self.username)
 
-    def configure_routes(self, server: Server) -> None:
+    def configure_routes(self, server: Server, enable_auth: bool) -> None:
         ROUTES = [
             Route('index', '/', server.index),
             Route('sd-config', '/prometheus/sd-config', server.get_sd_config),
@@ -57,9 +65,28 @@ class ServiceDiscovery:
         d = cherrypy.dispatch.RoutesDispatcher()
         for route in ROUTES:
             d.connect(**route._asdict())
-        conf = {'/': {'request.dispatch': d}}
+        if enable_auth:
+            conf = {
+                '/': {
+                    'request.dispatch': d,
+                    'tools.auth_basic.on': True,
+                    'tools.auth_basic.realm': 'localhost',
+                    'tools.auth_basic.checkpassword': self.validate_password
+                }
+            }
+        else:
+            conf = {'/': {'request.dispatch': d}}
         cherrypy.tree.mount(None, '/sd', config=conf)
 
+    def enable_auth(self) -> None:
+        self.username = self.mgr.get_store('service_discovery/root/username')
+        self.password = self.mgr.get_store('service_discovery/root/password')
+        if not self.password or not self.username:
+            self.username = 'admin'  # TODO(redo): what should be the default username
+            self.password = secrets.token_urlsafe(20)
+            self.mgr.set_store('service_discovery/root/password', self.password)
+            self.mgr.set_store('service_discovery/root/username', self.username)
+
     def configure_tls(self, server: Server) -> None:
         old_cert = self.mgr.get_store(self.KV_STORE_SD_ROOT_CERT)
         old_key = self.mgr.get_store(self.KV_STORE_SD_ROOT_KEY)
@@ -69,16 +96,20 @@ class ServiceDiscovery:
             self.ssl_certs.generate_root_cert(self.mgr.get_mgr_ip())
             self.mgr.set_store(self.KV_STORE_SD_ROOT_CERT, self.ssl_certs.get_root_cert())
             self.mgr.set_store(self.KV_STORE_SD_ROOT_KEY, self.ssl_certs.get_root_key())
-
-        host = self.mgr.get_hostname()
         addr = self.mgr.get_mgr_ip()
-        server.ssl_certificate, server.ssl_private_key = self.ssl_certs.generate_cert_files(host, addr)
+        host_fqdn = socket.getfqdn(addr)
+        server.ssl_certificate, server.ssl_private_key = self.ssl_certs.generate_cert_files(
+            host_fqdn, addr)
 
-    def configure(self, port: int, addr: str) -> None:
+    def configure(self, port: int, addr: str, enable_security: bool) -> None:
         # we create a new server to enforce TLS/SSL config refresh
         self.root_server = Root(self.mgr, port, addr)
-        self.configure_tls(self.root_server)
-        self.configure_routes(self.root_server)
+        self.root_server.ssl_certificate = None
+        self.root_server.ssl_private_key = None
+        if enable_security:
+            self.enable_auth()
+            self.configure_tls(self.root_server)
+        self.configure_routes(self.root_server, enable_security)
 
 
 class Root(Server):
@@ -95,7 +126,7 @@ class Root(Server):
         self.unsubscribe()
         super().stop()
 
-    def __init__(self, mgr: "CephadmOrchestrator", port: int, host: str):
+    def __init__(self, mgr: "CephadmOrchestrator", port: int = 0, host: str = ''):
         self.mgr = mgr
         super().__init__()
         self.socket_port = port
@@ -142,7 +173,7 @@ class Root(Server):
         for server in servers:
             hostname = server.get('hostname', '')
             for service in cast(List[ServiceInfoT], server.get('services', [])):
-                if service['type'] != 'mgr':
+                if service['type'] != 'mgr' or service['id'] != self.mgr.get_mgr_id():
                     continue
                 port = self.mgr.get_module_option_ex(
                     'prometheus', 'server_port', PrometheusService.DEFAULT_MGR_PROMETHEUS_PORT)
index c709e5f5e9bcbde94c4892062acc277fc4fe7504..6c63ef6436a105f8307511c05ab38e2113778785 100644 (file)
@@ -12,7 +12,7 @@ from orchestrator import DaemonDescription
 from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \
     SNMPGatewaySpec, PrometheusSpec
 from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec
-from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url, get_cert_issuer_info
+from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert, build_url, get_cert_issuer_info, password_hash
 from ceph.deployment.utils import wrap_ipv6
 
 logger = logging.getLogger(__name__)
@@ -30,13 +30,17 @@ class GrafanaService(CephadmService):
     def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
         assert self.TYPE == daemon_spec.daemon_type
         deps = []  # type: List[str]
+        if self.mgr.secure_monitoring_stack and self.mgr.prometheus_web_user and self.mgr.prometheus_web_password:
+            deps.append(f'{hash(self.mgr.prometheus_web_user + self.mgr.prometheus_web_password)}')
+        deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
 
         prom_services = []  # type: List[str]
         for dd in self.mgr.cache.get_daemons_by_service('prometheus'):
             assert dd.hostname is not None
             addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
             port = dd.ports[0] if dd.ports else 9095
-            prom_services.append(build_url(scheme='http', host=addr, port=port))
+            protocol = 'https' if self.mgr.secure_monitoring_stack else 'http'
+            prom_services.append(build_url(scheme=protocol, host=addr, port=port))
 
             deps.append(dd.name())
 
@@ -50,10 +54,18 @@ class GrafanaService(CephadmService):
 
             deps.append(dd.name())
 
-        grafana_data_sources = self.mgr.template.render(
-            'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services, 'loki_host': loki_host})
-
-        spec: GrafanaSpec = cast(GrafanaSpec, self.mgr.spec_store.active_specs[daemon_spec.service_name])
+        root_cert = self.mgr.http_server.service_discovery.ssl_certs.get_root_cert()
+        oneline_root_cert = '\\n'.join([line.strip() for line in root_cert.splitlines()])
+        grafana_data_sources = self.mgr.template.render('services/grafana/ceph-dashboard.yml.j2',
+                                                        {'hosts': prom_services,
+                                                         'prometheus_user': self.mgr.prometheus_web_user,
+                                                         'prometheus_password': self.mgr.prometheus_web_password,
+                                                         'cephadm_root_ca': oneline_root_cert,
+                                                         'security_enabled': self.mgr.secure_monitoring_stack,
+                                                         'loki_host': loki_host})
+
+        spec: GrafanaSpec = cast(
+            GrafanaSpec, self.mgr.spec_store.active_specs[daemon_spec.service_name])
         grafana_ini = self.mgr.template.render(
             'services/grafana/grafana.ini.j2', {
                 'initial_admin_password': spec.initial_admin_password,
@@ -103,7 +115,8 @@ class GrafanaService(CephadmService):
         # that were originally generated by cephadm or in case cert/key are empty.
         if not certs_present or (org == 'Ceph' and cn == 'cephadm'):
             logger.info('Regenerating cephadm self-signed grafana TLS certificates')
-            cert, pkey = create_self_signed_cert('Ceph', daemon_spec.host)
+            host_fqdn = socket.getfqdn(daemon_spec.host)
+            cert, pkey = create_self_signed_cert('Ceph', host_fqdn)
             self.mgr.set_store(cert_path, cert)
             self.mgr.set_store(key_path, pkey)
             if 'dashboard' in self.mgr.get('mgr_map')['modules']:
@@ -126,7 +139,8 @@ class GrafanaService(CephadmService):
                > ceph orch daemon reconfig <grafana-daemon>
 
             """
-            self.mgr.set_health_warning('CEPHADM_CERT_ERROR', 'Invalid grafana certificate: ', 1, [err_msg])
+            self.mgr.set_health_warning(
+                'CEPHADM_CERT_ERROR', 'Invalid grafana certificate: ', 1, [err_msg])
 
         return cert, pkey
 
@@ -220,6 +234,7 @@ class AlertmanagerService(CephadmService):
                 f'{p_result.scheme}://{hostname}:{p_result.port}{p_result.path}')
             proto = p_result.scheme
             port = p_result.port
+
         # scan all mgrs to generate deps and to get standbys too.
         # assume that they are all on the same port as the active mgr.
         for dd in self.mgr.cache.get_daemons_by_service('mgr'):
@@ -244,6 +259,7 @@ class AlertmanagerService(CephadmService):
                                      port=dd.ports[0], path='/alerts'))
 
         context = {
+            'secure_monitoring_stack': self.mgr.secure_monitoring_stack,
             'dashboard_urls': dashboard_urls,
             'default_webhook_urls': default_webhook_urls,
             'snmp_gateway_urls': snmp_gateway_urls,
@@ -259,12 +275,37 @@ class AlertmanagerService(CephadmService):
             addr = self._inventory_get_fqdn(dd.hostname)
             peers.append(build_url(host=addr, port=port).lstrip('/'))
 
-        return {
-            "files": {
-                "alertmanager.yml": yml
-            },
-            "peers": peers
-        }, sorted(deps)
+        deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
+
+        if self.mgr.secure_monitoring_stack:
+            if self.mgr.alertmanager_web_user and self.mgr.alertmanager_web_password:
+                deps.append(f'{hash(self.mgr.alertmanager_web_user + self.mgr.alertmanager_web_password)}')
+            node_ip = self.mgr.inventory.get_addr(daemon_spec.host)
+            host_fqdn = self._inventory_get_fqdn(daemon_spec.host)
+            cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert(
+                host_fqdn, node_ip)
+            context = {
+                'alertmanager_web_user': self.mgr.alertmanager_web_user,
+                'alertmanager_web_password': password_hash(self.mgr.alertmanager_web_password),
+            }
+            return {
+                "files": {
+                    "alertmanager.yml": yml,
+                    'alertmanager.crt': cert,
+                    'alertmanager.key': key,
+                    'web.yml': self.mgr.template.render('services/alertmanager/web.yml.j2', context),
+                    'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert()
+                },
+                'peers': peers,
+                'web_config': '/etc/alertmanager/web.yml'
+            }, sorted(deps)
+        else:
+            return {
+                "files": {
+                    "alertmanager.yml": yml
+                },
+                "peers": peers
+            }, sorted(deps)
 
     def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
         # TODO: if there are multiple daemons, who is the active one?
@@ -278,7 +319,8 @@ class AlertmanagerService(CephadmService):
         assert dd.hostname is not None
         addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
         port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
-        service_url = build_url(scheme='http', host=addr, port=port)
+        protocol = 'https' if self.mgr.secure_monitoring_stack else 'http'
+        service_url = build_url(scheme=protocol, host=addr, port=port)
         self._set_service_url_on_dashboard(
             'AlertManager',
             'dashboard get-alertmanager-api-host',
@@ -326,7 +368,6 @@ class PrometheusService(CephadmService):
     ) -> Tuple[Dict[str, Any], List[str]]:
 
         assert self.TYPE == daemon_spec.daemon_type
-
         spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
 
         try:
@@ -343,7 +384,8 @@ class PrometheusService(CephadmService):
         # build service discovery end-point
         port = self.mgr.service_discovery_port
         mgr_addr = wrap_ipv6(self.mgr.get_mgr_ip())
-        srv_end_point = f'https://{mgr_addr}:{port}/sd/prometheus/sd-config?'
+        protocol = 'https' if self.mgr.secure_monitoring_stack else 'http'
+        srv_end_point = f'{protocol}://{mgr_addr}:{port}/sd/prometheus/sd-config?'
 
         node_exporter_cnt = len(self.mgr.cache.get_daemons_by_service('node-exporter'))
         alertmgr_cnt = len(self.mgr.cache.get_daemons_by_service('alertmanager'))
@@ -356,6 +398,11 @@ class PrometheusService(CephadmService):
 
         # generate the prometheus configuration
         context = {
+            'alertmanager_web_user': self.mgr.alertmanager_web_user,
+            'alertmanager_web_password': self.mgr.alertmanager_web_password,
+            'secure_monitoring_stack': self.mgr.secure_monitoring_stack,
+            'service_discovery_username': self.mgr.http_server.service_discovery.username,
+            'service_discovery_password': self.mgr.http_server.service_discovery.password,
             'mgr_prometheus_sd_url': mgr_prometheus_sd_url,
             'node_exporter_sd_url': node_exporter_sd_url,
             'alertmanager_sd_url': alertmanager_sd_url,
@@ -363,15 +410,43 @@ class PrometheusService(CephadmService):
             'ceph_exporter_sd_url': ceph_exporter_sd_url
         }
 
-        r: Dict[str, Any] = {
-            'files': {
-                'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context),
-                'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert()
-            },
-            'retention_time': retention_time,
-            'retention_size': retention_size
+        web_context = {
+            'prometheus_web_user': self.mgr.prometheus_web_user,
+            'prometheus_web_password': password_hash(self.mgr.prometheus_web_password),
         }
 
+        if self.mgr.secure_monitoring_stack:
+            cfg_key = 'mgr/prometheus/root/cert'
+            cmd = {'prefix': 'config-key get', 'key': cfg_key}
+            ret, mgr_prometheus_rootca, err = self.mgr.mon_command(cmd)
+            if ret != 0:
+                logger.error(f'mon command to get config-key {cfg_key} failed: {err}')
+            else:
+                node_ip = self.mgr.inventory.get_addr(daemon_spec.host)
+                host_fqdn = self._inventory_get_fqdn(daemon_spec.host)
+                cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert(host_fqdn, node_ip)
+                r: Dict[str, Any] = {
+                    'files': {
+                        'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context),
+                        'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert(),
+                        'mgr_prometheus_cert.pem': mgr_prometheus_rootca,
+                        'web.yml': self.mgr.template.render('services/prometheus/web.yml.j2', web_context),
+                        'prometheus.crt': cert,
+                        'prometheus.key': key,
+                    },
+                    'retention_time': retention_time,
+                    'retention_size': retention_size,
+                    'web_config': '/etc/prometheus/web.yml'
+                }
+        else:
+            r = {
+                'files': {
+                    'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context)
+                },
+                'retention_time': retention_time,
+                'retention_size': retention_size
+            }
+
         # include alerts, if present in the container
         if os.path.exists(self.mgr.prometheus_alerts_path):
             with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f:
@@ -399,13 +474,18 @@ class PrometheusService(CephadmService):
 
     def calculate_deps(self) -> List[str]:
         deps = []  # type: List[str]
-        port = cast(int, self.mgr.get_module_option_ex(
-            'prometheus', 'server_port', self.DEFAULT_MGR_PROMETHEUS_PORT))
+        port = cast(int, self.mgr.get_module_option_ex('prometheus', 'server_port', self.DEFAULT_MGR_PROMETHEUS_PORT))
         deps.append(str(port))
         deps.append(str(self.mgr.service_discovery_port))
         # add an explicit dependency on the active manager. This will force to
         # re-deploy prometheus if the mgr has changed (due to a fail-over i.e).
         deps.append(self.mgr.get_active_mgr().name())
+        if self.mgr.secure_monitoring_stack:
+            if self.mgr.prometheus_web_user and self.mgr.prometheus_web_password:
+                deps.append(f'{hash(self.mgr.prometheus_web_user + self.mgr.prometheus_web_password)}')
+            if self.mgr.alertmanager_web_user and self.mgr.alertmanager_web_password:
+                deps.append(f'{hash(self.mgr.alertmanager_web_user + self.mgr.alertmanager_web_password)}')
+        deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
         # add dependency on ceph-exporter daemons
         deps += [d.name() for d in self.mgr.cache.get_daemons_by_service('ceph-exporter')]
         deps += [s for s in ['node-exporter', 'alertmanager'] if self.mgr.cache.get_daemons_by_service(s)]
@@ -425,7 +505,8 @@ class PrometheusService(CephadmService):
         assert dd.hostname is not None
         addr = dd.ip if dd.ip else self._inventory_get_fqdn(dd.hostname)
         port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
-        service_url = build_url(scheme='http', host=addr, port=port)
+        protocol = 'https' if self.mgr.secure_monitoring_stack else 'http'
+        service_url = build_url(scheme=protocol, host=addr, port=port)
         self._set_service_url_on_dashboard(
             'Prometheus',
             'dashboard get-prometheus-api-host',
@@ -454,7 +535,25 @@ class NodeExporterService(CephadmService):
 
     def generate_config(self, daemon_spec: CephadmDaemonDeploySpec) -> Tuple[Dict[str, Any], List[str]]:
         assert self.TYPE == daemon_spec.daemon_type
-        return {}, []
+        deps = [f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}']
+        if self.mgr.secure_monitoring_stack:
+            node_ip = self.mgr.inventory.get_addr(daemon_spec.host)
+            host_fqdn = self._inventory_get_fqdn(daemon_spec.host)
+            cert, key = self.mgr.http_server.service_discovery.ssl_certs.generate_cert(
+                host_fqdn, node_ip)
+            r = {
+                'files': {
+                    'web.yml': self.mgr.template.render('services/node-exporter/web.yml.j2', {}),
+                    'root_cert.pem': self.mgr.http_server.service_discovery.ssl_certs.get_root_cert(),
+                    'node_exporter.crt': cert,
+                    'node_exporter.key': key,
+                },
+                'web_config': '/etc/node-exporter/web.yml'
+            }
+        else:
+            r = {}
+
+        return r, deps
 
     def ok_to_stop(self,
                    daemon_ids: List[str],
index 4e394106f05a42a8fda2ebd65b3656625760bf22..b34a1fc17e2811bb626670c0508c0b6ffdc92012 100644 (file)
@@ -6,8 +6,12 @@ global:
 {% if not secure %}
   http_config:
     tls_config:
+{% if secure_monitoring_stack %}
+      ca_file: root_cert.pem
+{% else %}
       insecure_skip_verify: true
 {% endif %}
+{% endif %}
 
 route:
   receiver: 'default'
diff --git a/src/pybind/mgr/cephadm/templates/services/alertmanager/web.yml.j2 b/src/pybind/mgr/cephadm/templates/services/alertmanager/web.yml.j2
new file mode 100644 (file)
index 0000000..ef4f0b4
--- /dev/null
@@ -0,0 +1,5 @@
+tls_server_config:
+  cert_file: alertmanager.crt
+  key_file: alertmanager.key
+basic_auth_users:
+    {{ alertmanager_web_user }}: {{ alertmanager_web_password }}
index 7e5ffe5eaa88da1f6cc9142608010287a8e4fb7d..46aea864f536d69e41709f05b9c877cf275faef7 100644 (file)
@@ -1,13 +1,12 @@
 # {{ cephadm_managed }}
+apiVersion: 1
+
 deleteDatasources:
 {% for host in hosts %}
   - name: 'Dashboard{{ loop.index }}'
     orgId: 1
 {% endfor %}
 
-  - name: 'Loki'
-    orgId: 2
-
 datasources:
 {% for host in hosts %}
   - name: 'Dashboard{{ loop.index }}'
@@ -15,16 +14,26 @@ datasources:
     access: 'proxy'
     orgId: 1
     url: '{{ host }}'
-    basicAuth: false
+    basicAuth: {{ 'true' if security_enabled else 'false' }}
     isDefault: {{ 'true' if loop.first else 'false' }}
     editable: false
+{% if security_enabled %}
+    basicAuthUser: {{ prometheus_user }}
+    jsonData:
+       graphiteVersion: "1.1"
+       tlsAuth: false
+       tlsAuthWithCACert: true
+       tlsSkipVerify: false
+    secureJsonData:
+      basicAuthPassword: {{ prometheus_password }}
+      tlsCACert: "{{ cephadm_root_ca }}"
+{% endif %}
 {% endfor %}
 
   - name: 'Loki'
     type: 'loki'
     access: 'proxy'
-    orgId: 2
     url: '{{ loki_host }}'
     basicAuth: false
-    isDefault: true
+    isDefault: false
     editable: false
diff --git a/src/pybind/mgr/cephadm/templates/services/node-exporter/web.yml.j2 b/src/pybind/mgr/cephadm/templates/services/node-exporter/web.yml.j2
new file mode 100644 (file)
index 0000000..1c12203
--- /dev/null
@@ -0,0 +1,3 @@
+tls_server_config:
+  cert_file: node_exporter.crt
+  key_file: node_exporter.key
index 65d1cc18f96cd7e564735634a27af8d15a579f09..acbda6b9911661ede5af28f3a92672027cf13b56 100644 (file)
@@ -8,35 +8,83 @@ rule_files:
 {% if alertmanager_sd_url %}
 alerting:
   alertmanagers:
-    - scheme: http
+{% if secure_monitoring_stack %}
+    - scheme: https
+      basic_auth:
+        username: {{ alertmanager_web_user }}
+        password: {{ alertmanager_web_password }}
+      tls_config:
+        ca_file: root_cert.pem
       http_sd_configs:
         - url: {{ alertmanager_sd_url }}
+          basic_auth:
+            username: {{ service_discovery_username }}
+            password: {{ service_discovery_password }}
           tls_config:
             ca_file: root_cert.pem
+{% else %}
+    - scheme: http
+      http_sd_configs:
+        - url: {{ alertmanager_sd_url }}
+{% endif %}
 {% endif %}
 
 scrape_configs:
   - job_name: 'ceph'
+{% if secure_monitoring_stack %}
+    scheme: https
+    tls_config:
+      ca_file: mgr_prometheus_cert.pem
     honor_labels: true
     http_sd_configs:
     - url: {{ mgr_prometheus_sd_url }}
+      basic_auth:
+        username: {{ service_discovery_username }}
+        password: {{ service_discovery_password }}
       tls_config:
         ca_file: root_cert.pem
+{% else %}
+    honor_labels: true
+    http_sd_configs:
+    - url: {{ mgr_prometheus_sd_url }}
+{% endif %}
 
 {% if node_exporter_sd_url %}
   - job_name: 'node'
+{% if secure_monitoring_stack %}
+    scheme: https
+    tls_config:
+      ca_file: root_cert.pem
     http_sd_configs:
     - url: {{ node_exporter_sd_url }}
+      basic_auth:
+        username: {{ service_discovery_username }}
+        password: {{ service_discovery_password }}
       tls_config:
         ca_file: root_cert.pem
+{% else %}
+    http_sd_configs:
+    - url: {{ node_exporter_sd_url }}
+{% endif %}
 {% endif %}
 
 {% if haproxy_sd_url %}
   - job_name: 'haproxy'
+{% if secure_monitoring_stack %}
+    scheme: https
+    tls_config:
+      ca_file: root_cert.pem
     http_sd_configs:
     - url: {{ haproxy_sd_url }}
+      basic_auth:
+        username: {{ service_discovery_username }}
+        password: {{ service_discovery_password }}
       tls_config:
         ca_file: root_cert.pem
+{% else %}
+    http_sd_configs:
+    - url: {{ haproxy_sd_url }}
+{% endif %}
 {% endif %}
 
 {% if ceph_exporter_sd_url %}
diff --git a/src/pybind/mgr/cephadm/templates/services/prometheus/web.yml.j2 b/src/pybind/mgr/cephadm/templates/services/prometheus/web.yml.j2
new file mode 100644 (file)
index 0000000..da3c3d7
--- /dev/null
@@ -0,0 +1,5 @@
+tls_server_config:
+  cert_file: prometheus.crt
+  key_file: prometheus.key
+basic_auth_users:
+    {{ prometheus_web_user }}: {{ prometheus_web_password }}
index 630218b549f37f76520361887788f5527ba9e61e..ff98a13885f8097094c5d0b22b7fdb69e953d1ff 100644 (file)
@@ -66,15 +66,18 @@ class FakeMgr:
         self.cache = FakeCache()
         self.spec_store = FakeSpecStore(self)
 
+    def get_mgr_id(self):
+        return 'mgr-1'
+
     def list_servers(self):
 
         servers = [
             {'hostname': 'node0',
              'ceph_version': '16.2',
-             'services': [{'type': 'mgr'}, {'type': 'mon'}]},
+             'services': [{'type': 'mgr', 'id': 'mgr-1'}, {'type': 'mon'}]},
             {'hostname': 'node1',
              'ceph_version': '16.2',
-             'services': [{'type': 'mgr'}, {'type': 'mon'}]}
+             'services': [{'type': 'mgr', 'id': 'mgr-2'}, {'type': 'mon'}]}
         ]
 
         return servers
@@ -106,7 +109,7 @@ class TestServiceDiscovery:
             assert 'targets' in entry
 
         # check content
-        assert cfg[0]['targets'] == ['node0:9283', 'node1:9283']
+        assert cfg[0]['targets'] == ['node0:9283']
 
     def test_get_sd_config_node_exporter(self):
         mgr = FakeMgr()
index aa94a79e6d09c6c0f2470d91134953aad6d4e59d..eae1bd9b77b1d4d36bdbe52b290e08111b96c6ba 100644 (file)
@@ -395,12 +395,98 @@ class TestMonitoring:
 
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
-    def test_prometheus_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+    @patch("cephadm.services.monitoring.password_hash", lambda password: 'fake_password')
+    def test_alertmanager_config_security_enabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
         _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
 
+        def gen_cert(host, addr):
+            return ('mycert', 'mykey')
+
+        def get_root_cert():
+            return 'my_root_cert'
+
+        with with_host(cephadm_module, 'test'):
+            cephadm_module.secure_monitoring_stack = True
+            cephadm_module.alertmanager_web_password = 'fake_password'
+            cephadm_module.alertmanager_web_user = 'admin'
+            cephadm_module.http_server.service_discovery.ssl_certs.generate_cert = MagicMock(side_effect=gen_cert)
+            cephadm_module.http_server.service_discovery.ssl_certs.get_root_cert = MagicMock(side_effect=get_root_cert)
+            with with_service(cephadm_module, AlertManagerSpec()):
+
+                y = dedent("""
+                # This file is generated by cephadm.
+                # See https://prometheus.io/docs/alerting/configuration/ for documentation.
+
+                global:
+                  resolve_timeout: 5m
+                  http_config:
+                    tls_config:
+                      ca_file: root_cert.pem
+
+                route:
+                  receiver: 'default'
+                  routes:
+                    - group_by: ['alertname']
+                      group_wait: 10s
+                      group_interval: 10s
+                      repeat_interval: 1h
+                      receiver: 'ceph-dashboard'
+
+                receivers:
+                - name: 'default'
+                  webhook_configs:
+                - name: 'ceph-dashboard'
+                  webhook_configs:
+                  - url: 'http://localhost:8080/api/prometheus_receiver'
+                """).lstrip()
+
+                web_config = dedent("""
+                tls_server_config:
+                  cert_file: alertmanager.crt
+                  key_file: alertmanager.key
+                basic_auth_users:
+                    admin: fake_password""").lstrip()
+
+                _run_cephadm.assert_called_with(
+                    'test',
+                    'alertmanager.test',
+                    'deploy',
+                    [
+                        '--name', 'alertmanager.test',
+                        '--meta-json', '{"service_name": "alertmanager", "ports": [9093, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null, "extra_entrypoint_args": null}',
+                        '--config-json', '-', '--tcp-ports', '9093 9094'
+                    ],
+                    stdin=json.dumps({
+                        "files": {
+                            "alertmanager.yml": y,
+                            'alertmanager.crt': 'mycert',
+                            'alertmanager.key': 'mykey',
+                            'web.yml': web_config,
+                            'root_cert.pem': 'my_root_cert'
+                        },
+                        'peers': [],
+                        'web_config': '/etc/alertmanager/web.yml'
+                    }),
+                    image='')
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
+    def test_prometheus_config_security_disabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast')
         with with_host(cephadm_module, 'test'):
             with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \
                     with_service(cephadm_module, CephExporterSpec('ceph-exporter')) as _, \
+                    with_service(cephadm_module, s) as _, \
+                    with_service(cephadm_module, AlertManagerSpec('alertmanager')) as _, \
+                    with_service(cephadm_module, IngressSpec(service_id='ingress',
+                                                             frontend_port=8089,
+                                                             monitor_port=8999,
+                                                             monitor_user='admin',
+                                                             monitor_password='12345',
+                                                             keepalived_password='12345',
+                                                             virtual_ip="1.2.3.4/32",
+                                                             backend_service='rgw.foo')) as _, \
                     with_service(cephadm_module, PrometheusSpec('prometheus')) as _:
 
                 y = dedent("""
@@ -411,21 +497,149 @@ class TestMonitoring:
                 rule_files:
                   - /etc/prometheus/alerting/*
 
+                alerting:
+                  alertmanagers:
+                    - scheme: http
+                      http_sd_configs:
+                        - url: http://[::1]:8765/sd/prometheus/sd-config?service=alertmanager
 
                 scrape_configs:
                   - job_name: 'ceph'
                     honor_labels: true
                     http_sd_configs:
+                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus
+
+                  - job_name: 'node'
+                    http_sd_configs:
+                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=node-exporter
+
+                  - job_name: 'haproxy'
+                    http_sd_configs:
+                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=haproxy
+
+                  - job_name: 'ceph-exporter'
+                    honor_labels: true
+                    http_sd_configs:
+                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter
+                      tls_config:
+                        ca_file: root_cert.pem
+                """).lstrip()
+
+                _run_cephadm.assert_called_with(
+                    'test',
+                    'prometheus.test',
+                    'deploy',
+                    [
+                        '--name', 'prometheus.test',
+                        '--meta-json',
+                        ('{"service_name": "prometheus", "ports": [9095], "ip": null, "deployed_by": [], "rank": null, '
+                         '"rank_generation": null, "extra_container_args": null, "extra_entrypoint_args": null}'),
+                        '--config-json', '-',
+                        '--tcp-ports', '9095'
+                    ],
+                    stdin=json.dumps({"files": {"prometheus.yml": y,
+                                                "/etc/prometheus/alerting/custom_alerts.yml": ""},
+                                      'retention_time': '15d',
+                                      'retention_size': '0'}),
+                    image='')
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
+    @patch("cephadm.services.monitoring.password_hash", lambda password: 'fake_password')
+    def test_prometheus_config_security_enabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast')
+
+        def gen_cert(host, addr):
+            return ('mycert', 'mykey')
+
+        with with_host(cephadm_module, 'test'):
+            cephadm_module.secure_monitoring_stack = True
+            cephadm_module.http_server.service_discovery.username = 'admin'
+            cephadm_module.http_server.service_discovery.password = 'fake_password'
+            cephadm_module.http_server.service_discovery.ssl_certs.generate_cert = MagicMock(
+                side_effect=gen_cert)
+            with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \
+                    with_service(cephadm_module, s) as _, \
+                    with_service(cephadm_module, AlertManagerSpec('alertmanager')) as _, \
+                    with_service(cephadm_module, IngressSpec(service_id='ingress',
+                                                             frontend_port=8089,
+                                                             monitor_port=8999,
+                                                             monitor_user='admin',
+                                                             monitor_password='12345',
+                                                             keepalived_password='12345',
+                                                             virtual_ip="1.2.3.4/32",
+                                                             backend_service='rgw.foo')) as _, \
+                    with_service(cephadm_module, PrometheusSpec('prometheus')) as _:
+
+                web_config = dedent("""
+                tls_server_config:
+                  cert_file: prometheus.crt
+                  key_file: prometheus.key
+                basic_auth_users:
+                    admin: fake_password""").lstrip()
+
+                y = dedent("""
+                # This file is generated by cephadm.
+                global:
+                  scrape_interval: 10s
+                  evaluation_interval: 10s
+                rule_files:
+                  - /etc/prometheus/alerting/*
+
+                alerting:
+                  alertmanagers:
+                    - scheme: https
+                      basic_auth:
+                        username: admin
+                        password: admin
+                      tls_config:
+                        ca_file: root_cert.pem
+                      http_sd_configs:
+                        - url: https://[::1]:8765/sd/prometheus/sd-config?service=alertmanager
+                          basic_auth:
+                            username: admin
+                            password: fake_password
+                          tls_config:
+                            ca_file: root_cert.pem
+
+                scrape_configs:
+                  - job_name: 'ceph'
+                    scheme: https
+                    tls_config:
+                      ca_file: mgr_prometheus_cert.pem
+                    honor_labels: true
+                    http_sd_configs:
                     - url: https://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus
+                      basic_auth:
+                        username: admin
+                        password: fake_password
                       tls_config:
                         ca_file: root_cert.pem
 
                   - job_name: 'node'
+                    scheme: https
+                    tls_config:
+                      ca_file: root_cert.pem
                     http_sd_configs:
                     - url: https://[::1]:8765/sd/prometheus/sd-config?service=node-exporter
+                      basic_auth:
+                        username: admin
+                        password: fake_password
                       tls_config:
                         ca_file: root_cert.pem
 
+                  - job_name: 'haproxy'
+                    scheme: https
+                    tls_config:
+                      ca_file: root_cert.pem
+                    http_sd_configs:
+                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=haproxy
+                      basic_auth:
+                        username: admin
+                        password: fake_password
+                      tls_config:
+                        ca_file: root_cert.pem
 
                   - job_name: 'ceph-exporter'
                     honor_labels: true
@@ -442,16 +656,25 @@ class TestMonitoring:
                     [
                         '--name', 'prometheus.test',
                         '--meta-json',
-                        ('{"service_name": "prometheus", "ports": [9095], "ip": null, "deployed_by": [], "rank": null, '
-                         '"rank_generation": null, "extra_container_args": null, "extra_entrypoint_args": null}'),
+                        '{"service_name": "prometheus", "ports": [9095], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null, "extra_container_args": null, "extra_entrypoint_args": null}',
                         '--config-json', '-',
                         '--tcp-ports', '9095'
                     ],
-                    stdin=json.dumps({"files": {"prometheus.yml": y, "root_cert.pem": '',
-                                                "/etc/prometheus/alerting/custom_alerts.yml": ""},
-                                      'retention_time': '15d',
-                                      'retention_size': '0'}),
-                    image='')
+                    stdin=json.dumps({
+                        'files': {
+                            'prometheus.yml': y,
+                            'root_cert.pem': '',
+                            'mgr_prometheus_cert.pem': '',
+                            'web.yml': web_config,
+                            'prometheus.crt': 'mycert',
+                            'prometheus.key': 'mykey',
+                            "/etc/prometheus/alerting/custom_alerts.yml": "",
+                        },
+                        'retention_time': '15d',
+                        'retention_size': '0',
+                        'web_config': '/etc/prometheus/web.yml'}),
+                    image=''
+                )
 
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     def test_loki_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
@@ -586,13 +809,12 @@ class TestMonitoring:
                           allow_embedding = true""").lstrip(),  # noqa: W291
                     'provisioning/datasources/ceph-dashboard.yml': dedent("""
                         # This file is generated by cephadm.
+                        apiVersion: 1
+
                         deleteDatasources:
                           - name: 'Dashboard1'
                             orgId: 1
 
-                          - name: 'Loki'
-                            orgId: 2
-
                         datasources:
                           - name: 'Dashboard1'
                             type: 'prometheus'
@@ -606,10 +828,9 @@ class TestMonitoring:
                           - name: 'Loki'
                             type: 'loki'
                             access: 'proxy'
-                            orgId: 2
                             url: ''
                             basicAuth: false
-                            isDefault: true
+                            isDefault: false
                             editable: false""").lstrip(),
                     'certs/cert_file': dedent(f"""
                         # generated by cephadm\n{grafana_cert}""").lstrip(),
@@ -666,20 +887,18 @@ class TestMonitoring:
                                     '  allow_embedding = true',
                                 'provisioning/datasources/ceph-dashboard.yml':
                                     "# This file is generated by cephadm.\n"
+                                    "apiVersion: 1\n\n"
                                     'deleteDatasources:\n\n'
-                                    "  - name: 'Loki'\n"
-                                    '    orgId: 2\n\n'
                                     'datasources:\n\n'
                                     "  - name: 'Loki'\n"
                                     "    type: 'loki'\n"
                                     "    access: 'proxy'\n"
-                                    '    orgId: 2\n'
                                     "    url: ''\n"
                                     '    basicAuth: false\n'
-                                    '    isDefault: true\n'
+                                    '    isDefault: false\n'
                                     '    editable: false',
                                 'certs/cert_file': ANY,
-                                'certs/cert_key': ANY}}, [])
+                                'certs/cert_key': ANY}}, ['secure_monitoring_stack:False'])
 
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     def test_monitoring_ports(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
index ae4abfc1668859529111e9b4cc526981b39ca077..e183656b77c8d30387b8cc303291d522b21e7a86 100644 (file)
@@ -1,10 +1,13 @@
 # -*- coding: utf-8 -*-
 
 import json
+import os
+import tempfile
 from datetime import datetime
 
 import requests
 
+from .. import mgr
 from ..exceptions import DashboardException
 from ..security import Scope
 from ..services import ceph_service
@@ -29,15 +32,50 @@ class PrometheusReceiver(BaseController):
 class PrometheusRESTController(RESTController):
     def prometheus_proxy(self, method, path, params=None, payload=None):
         # type (str, str, dict, dict)
-        return self._proxy(self._get_api_url(Settings.PROMETHEUS_API_HOST),
-                           method, path, 'Prometheus', params, payload,
-                           verify=Settings.PROMETHEUS_API_SSL_VERIFY)
+        user, password, cert_file = self.get_access_info('prometheus')
+        verify = cert_file.name if cert_file else Settings.PROMETHEUS_API_SSL_VERIFY
+        response = self._proxy(self._get_api_url(Settings.PROMETHEUS_API_HOST),
+                               method, path, 'Prometheus', params, payload,
+                               user=user, password=password, verify=verify)
+        if cert_file:
+            cert_file.close()
+            os.unlink(cert_file.name)
+        return response
 
     def alert_proxy(self, method, path, params=None, payload=None):
         # type (str, str, dict, dict)
-        return self._proxy(self._get_api_url(Settings.ALERTMANAGER_API_HOST),
-                           method, path, 'Alertmanager', params, payload,
-                           verify=Settings.ALERTMANAGER_API_SSL_VERIFY)
+        user, password, cert_file = self.get_access_info('alertmanager')
+        verify = cert_file.name if cert_file else Settings.ALERTMANAGER_API_SSL_VERIFY
+        response = self._proxy(self._get_api_url(Settings.ALERTMANAGER_API_HOST),
+                               method, path, 'Alertmanager', params, payload,
+                               user=user, password=password, verify=verify)
+        if cert_file:
+            cert_file.close()
+            os.unlink(cert_file.name)
+        return response
+
+    def get_access_info(self, module_name):
+        # type (str, str, str)
+        if module_name not in ['prometheus', 'alertmanager']:
+            raise DashboardException(f'Invalid module name {module_name}', component='prometheus')
+        user = None
+        password = None
+        cert_file = None
+        secure_monitoring_stack = bool(mgr.get_module_option_ex('cephadm',
+                                                                'secure_monitoring_stack',
+                                                                'false'))
+        if secure_monitoring_stack:
+            cmd = {'prefix': f'orch {module_name} access info'}
+            ret, out, _ = mgr.mon_command(cmd)
+            if ret == 0 and out is not None:
+                access_info = json.loads(out)
+                user = access_info['user']
+                password = access_info['password']
+                certificate = access_info['certificate']
+                cert_file = tempfile.NamedTemporaryFile(delete=False)
+                cert_file.write(certificate.encode('utf-8'))
+                cert_file.flush()
+        return user, password, cert_file
 
     def _get_api_url(self, host):
         return host.rstrip('/') + '/api/v1'
@@ -45,11 +83,15 @@ class PrometheusRESTController(RESTController):
     def balancer_status(self):
         return ceph_service.CephService.send_command('mon', 'balancer status')
 
-    def _proxy(self, base_url, method, path, api_name, params=None, payload=None, verify=True):
+    def _proxy(self, base_url, method, path, api_name, params=None, payload=None, verify=True,
+               user=None, password=None):
         # type (str, str, str, str, dict, dict, bool)
         try:
+            from requests.auth import HTTPBasicAuth
+            auth = HTTPBasicAuth(user, password) if user and password else None
             response = requests.request(method, base_url + path, params=params,
-                                        json=payload, verify=verify)
+                                        json=payload, verify=verify,
+                                        auth=auth)
         except Exception:
             raise DashboardException(
                 "Could not reach {}'s API on {}".format(api_name, base_url),
index d379d6e2146557df9dc707afcdec54678326f75f..789ccf670207314e065517e887e2738dd76e6fbe 100644 (file)
@@ -14,6 +14,7 @@ from typing import List, Optional, Sequence
 
 import bcrypt
 from mgr_module import CLICheckNonemptyFileInput, CLIReadCommand, CLIWriteCommand
+from mgr_util import password_hash
 
 from .. import mgr
 from ..exceptions import PasswordPolicyException, PermissionNotValid, \
@@ -27,17 +28,6 @@ logger = logging.getLogger('access_control')
 DEFAULT_FILE_DESC = 'password/secret'
 
 
-# password hashing algorithm
-def password_hash(password, salt_password=None):
-    if not password:
-        return None
-    if not salt_password:
-        salt_password = bcrypt.gensalt()
-    else:
-        salt_password = salt_password.encode('utf8')
-    return bcrypt.hashpw(password.encode('utf8'), salt_password).decode('utf8')
-
-
 _P = Permission  # short alias
 
 
index cd2fb3e8dd3994ce6a5430b7c70b90d959a18018..21c4a0b10e9ce4ebec709a8d185895d0e5c82118 100644 (file)
@@ -26,43 +26,49 @@ class PrometheusControllerTest(ControllerTestCase):
         mgr.get_module_option.side_effect = settings.get
         cls.setup_controllers([Prometheus, PrometheusNotifications, PrometheusReceiver])
 
+    @patch("dashboard.controllers.prometheus.mgr.get_module_option_ex", lambda a, b, c: False)
     def test_rules(self):
         with patch('requests.request') as mock_request:
             self._get('/api/prometheus/rules')
             mock_request.assert_called_with('GET', self.prometheus_host_api + '/rules',
-                                            json=None, params={}, verify=True)
+                                            json=None, params={}, verify=True, auth=None)
 
+    @patch("dashboard.controllers.prometheus.mgr.get_module_option_ex", lambda a, b, c: False)
     def test_list(self):
         with patch('requests.request') as mock_request:
             self._get('/api/prometheus')
             mock_request.assert_called_with('GET', self.alert_host_api + '/alerts',
-                                            json=None, params={}, verify=True)
+                                            json=None, params={}, verify=True, auth=None)
 
+    @patch("dashboard.controllers.prometheus.mgr.get_module_option_ex", lambda a, b, c: False)
     def test_get_silences(self):
         with patch('requests.request') as mock_request:
             self._get('/api/prometheus/silences')
             mock_request.assert_called_with('GET', self.alert_host_api + '/silences',
-                                            json=None, params={}, verify=True)
+                                            json=None, params={}, verify=True, auth=None)
 
+    @patch("dashboard.controllers.prometheus.mgr.get_module_option_ex", lambda a, b, c: False)
     def test_add_silence(self):
         with patch('requests.request') as mock_request:
             self._post('/api/prometheus/silence', {'id': 'new-silence'})
             mock_request.assert_called_with('POST', self.alert_host_api + '/silences',
                                             params=None, json={'id': 'new-silence'},
-                                            verify=True)
+                                            verify=True, auth=None)
 
+    @patch("dashboard.controllers.prometheus.mgr.get_module_option_ex", lambda a, b, c: False)
     def test_update_silence(self):
         with patch('requests.request') as mock_request:
             self._post('/api/prometheus/silence', {'id': 'update-silence'})
             mock_request.assert_called_with('POST', self.alert_host_api + '/silences',
                                             params=None, json={'id': 'update-silence'},
-                                            verify=True)
+                                            verify=True, auth=None)
 
+    @patch("dashboard.controllers.prometheus.mgr.get_module_option_ex", lambda a, b, c: False)
     def test_expire_silence(self):
         with patch('requests.request') as mock_request:
             self._delete('/api/prometheus/silence/0')
             mock_request.assert_called_with('DELETE', self.alert_host_api + '/silence/0',
-                                            json=None, params=None, verify=True)
+                                            json=None, params=None, verify=True, auth=None)
 
     def test_silences_empty_delete(self):
         with patch('requests.request') as mock_request:
index 8c1e5be44161b74da8647592ab62703019349c77..721660ec2a0f6d2ed09c91c934ab26fd105088b4 100644 (file)
@@ -3,6 +3,7 @@ import os
 if 'UNITTEST' in os.environ:
     import tests
 
+import bcrypt
 import cephfs
 import contextlib
 import datetime
@@ -873,3 +874,13 @@ def profile_method(skip_attribute: bool = False) -> Callable[[Callable[..., T]],
             return result
         return wrapper
     return outer
+
+
+def password_hash(password: Optional[str], salt_password: Optional[str] = None) -> Optional[str]:
+    if not password:
+        return None
+    if not salt_password:
+        salt = bcrypt.gensalt()
+    else:
+        salt = salt_password.encode('utf8')
+    return bcrypt.hashpw(password.encode('utf8'), salt).decode('utf8')
index 2972d92aa188863263f7497c2284ab6cfb9468c5..402355532275b1b4fc4ba6f35bb16445a25e6cd2 100644 (file)
@@ -660,6 +660,14 @@ class Orchestrator(object):
         """Update prometheus cluster"""
         raise NotImplementedError()
 
+    def get_prometheus_access_info(self) -> OrchResult[Dict[str, str]]:
+        """get prometheus access information"""
+        raise NotImplementedError()
+
+    def get_alertmanager_access_info(self) -> OrchResult[Dict[str, str]]:
+        """get alertmanager access information"""
+        raise NotImplementedError()
+
     def apply_node_exporter(self, spec: ServiceSpec) -> OrchResult[str]:
         """Update existing a Node-Exporter daemon(s)"""
         raise NotImplementedError()
index cc97612ad6a8e4052f1f8d874306e85929ea8757..9c98736b64acecf08847ef8efa72bee31639e4c5 100644 (file)
@@ -869,6 +869,18 @@ class OrchestratorCli(OrchestratorClientMixin, MgrModule,
 
             return HandleCommandResult(stdout=table.get_string())
 
+    @_cli_write_command('orch prometheus access info')
+    def _get_prometheus_access_info(self) -> HandleCommandResult:
+        completion = self.get_prometheus_access_info()
+        access_info = raise_if_exception(completion)
+        return HandleCommandResult(stdout=json.dumps(access_info))
+
+    @_cli_write_command('orch alertmanager access info')
+    def _get_alertmanager_access_info(self) -> HandleCommandResult:
+        completion = self.get_alertmanager_access_info()
+        access_info = raise_if_exception(completion)
+        return HandleCommandResult(stdout=json.dumps(access_info))
+
     @_cli_write_command('orch apply osd')
     def _apply_osd(self,
                    all_available_devices: bool = False,
index 069330c392c7d01af89cb230ad2e53bbde14df97..b34b865e32096b65ab53512b0a683d42c6457e3f 100644 (file)
@@ -1,4 +1,5 @@
 import cherrypy
+import yaml
 from collections import defaultdict
 from pkg_resources import packaging  # type: ignore
 import json
@@ -8,11 +9,12 @@ import re
 import threading
 import time
 import enum
+from collections import namedtuple
+
 from mgr_module import CLIReadCommand, MgrModule, MgrStandbyModule, PG_STATES, Option, ServiceInfoT, HandleCommandResult, CLIWriteCommand
 from mgr_util import get_default_addr, profile_method, build_url
 from rbd import RBD
-from collections import namedtuple
-import yaml
+from cephadm.ssl_cert_utils import SSLCerts
 
 from typing import DefaultDict, Optional, Dict, Any, Set, cast, Tuple, Union, List, Callable
 
@@ -635,6 +637,7 @@ class Module(MgrModule):
         _global_instance = self
         self.metrics_thread = MetricCollectionThread(_global_instance)
         self.health_history = HealthHistory(self)
+        self.ssl_certs = SSLCerts()
 
     def _setup_static_metrics(self) -> Dict[str, Metric]:
         metrics = {}
@@ -856,9 +859,9 @@ class Module(MgrModule):
         self.log.info('Restarting engine...')
         cherrypy.engine.stop()
         cherrypy.server.httpserver = None
+        server_addr = cast(str, self.get_localized_module_option('server_addr', get_default_addr()))
         server_port = cast(int, self.get_localized_module_option('server_port', DEFAULT_PORT))
-        self.set_uri(build_url(scheme='http', host=self.get_server_addr(), port=server_port, path='/'))
-        cherrypy.config.update({'server.socket_port': server_port})
+        self.configure(server_addr, server_port)
         cherrypy.engine.start()
         self.log.info('Engine started.')
 
@@ -1722,6 +1725,50 @@ class Module(MgrModule):
         self.collect()
         self.get_file_sd_config()
 
+    def configure(self, server_addr: str, server_port: int) -> None:
+        secure_monitoring_stack = self.get_module_option_ex(
+            'cephadm', 'secure_monitoring_stack', False)
+        if secure_monitoring_stack:
+            self.generate_tls_certificates(self.get_mgr_ip())
+            cherrypy.config.update({
+                'server.socket_host': server_addr,
+                'server.socket_port': server_port,
+                'engine.autoreload.on': False,
+                'server.ssl_module': 'builtin',
+                'server.ssl_certificate': self.cert_file,
+                'server.ssl_private_key': self.key_file,
+            })
+            # Publish the URI that others may use to access the service we're about to start serving
+            self.set_uri(build_url(scheme='https', host=self.get_server_addr(),
+                         port=server_port, path='/'))
+        else:
+            cherrypy.config.update({
+                'server.socket_host': server_addr,
+                'server.socket_port': server_port,
+                'engine.autoreload.on': False,
+                'server.ssl_module': None,
+                'server.ssl_certificate': None,
+                'server.ssl_private_key': None,
+            })
+            # Publish the URI that others may use to access the service we're about to start serving
+            self.set_uri(build_url(scheme='http', host=self.get_server_addr(),
+                         port=server_port, path='/'))
+
+    def generate_tls_certificates(self, host: str) -> None:
+        try:
+            old_cert = self.get_store('root/cert')
+            old_key = self.get_store('root/key')
+            if not old_cert or not old_key:
+                raise Exception('No old credentials for mgr-prometheus endpoint')
+            self.ssl_certs.load_root_credentials(old_cert, old_key)
+        except Exception:
+            self.ssl_certs.generate_root_cert(host)
+            self.set_store('root/cert', self.ssl_certs.get_root_cert())
+            self.set_store('root/key', self.ssl_certs.get_root_key())
+
+        self.cert_file, self.key_file = self.ssl_certs.generate_cert_files(
+            self.get_hostname(), host)
+
     def serve(self) -> None:
 
         class Root(object):
@@ -1802,10 +1849,8 @@ class Module(MgrModule):
                                              self.STALE_CACHE_RETURN]:
             self.stale_cache_strategy = self.STALE_CACHE_FAIL
 
-        server_addr = cast(str, self.get_localized_module_option(
-            'server_addr', get_default_addr()))
-        server_port = cast(int, self.get_localized_module_option(
-            'server_port', DEFAULT_PORT))
+        server_addr = cast(str, self.get_localized_module_option('server_addr', get_default_addr()))
+        server_port = cast(int, self.get_localized_module_option('server_port', DEFAULT_PORT))
         self.log.info(
             "server_addr: %s server_port: %s" %
             (server_addr, server_port)
@@ -1818,19 +1863,13 @@ class Module(MgrModule):
         else:
             self.log.info('Cache disabled')
 
-        cherrypy.config.update({
-            'server.socket_host': server_addr,
-            'server.socket_port': server_port,
-            'engine.autoreload.on': False
-        })
-        # Publish the URI that others may use to access the service we're
-        # about to start serving
-        self.set_uri(build_url(scheme='http', host=self.get_server_addr(), port=server_port, path='/'))
+        self.configure(server_addr, server_port)
 
         cherrypy.tree.mount(Root(), "/")
         self.log.info('Starting engine...')
         cherrypy.engine.start()
         self.log.info('Engine started.')
+
         # wait for the shutdown event
         self.shutdown_event.wait()
         self.shutdown_event.clear()
index 1fad65c626f8661d04e5238d9f06714a6bd27922..76fef65dbe430dd49bc18091f06839ebcb32c3da 100644 (file)
@@ -15,3 +15,4 @@ scipy
 setuptools
 werkzeug
 natsort
+bcrypt