]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: adding automation for Prometheus config generation
authorRedouane Kachach <rkachach@ibm.com>
Thu, 19 Jun 2025 07:55:03 +0000 (09:55 +0200)
committerRedouane Kachach <rkachach@ibm.com>
Thu, 7 Aug 2025 14:39:48 +0000 (16:39 +0200)
Signed-off-by: Redouane Kachach <rkachach@ibm.com>
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/services/service_discovery.py
src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2
src/pybind/mgr/cephadm/tests/test_services.py

index 0e53c2e9f0ac56499181b5a3f025ee9961f4ca72..f6b3c49355a514c5f42537c65ac9bf92ab3c8fe6 100644 (file)
@@ -743,6 +743,11 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
 
         self.cert_mgr.init_tlsobject_store()
 
+    def _get_mgr_ips(self) -> List[str]:
+        return [self.inventory.get_addr(d.hostname)
+                for d in self.cache.get_daemons_by_service('mgr')
+                if d.hostname is not None]
+
     def _get_security_config(self) -> Tuple[bool, bool, bool]:
         oauth2_proxy_enabled = len(self.cache.get_daemons_by_service('oauth2-proxy')) > 0
         mgmt_gw_enabled = len(self.cache.get_daemons_by_service('mgmt-gateway')) > 0
index b2fddd01da7103e4781c4757a95c2e13c206bba0..ce21fd027788e127271c226ac162cb76ccbb57ee 100644 (file)
@@ -9,6 +9,7 @@ import requests
 
 from mgr_module import HandleCommandResult
 from .service_registry import register_cephadm_service
+from cephadm.services.service_registry import service_registry
 
 from orchestrator import DaemonDescription
 from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \
@@ -24,6 +25,14 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 
 
+def get_field_from_spec(spec: ServiceSpec, attr: str, default: Any) -> Any:
+    try:
+        value = getattr(spec, attr)
+        return value if value else default
+    except AttributeError:
+        return default
+
+
 @register_cephadm_service
 class GrafanaService(CephadmService):
     TYPE = 'grafana'
@@ -484,6 +493,14 @@ class PrometheusService(CephadmService):
     USER_CFG_KEY = 'prometheus/web_user'
     PASS_CFG_KEY = 'prometheus/web_password'
 
+    def prepare_create(
+            self,
+            daemon_spec: CephadmDaemonDeploySpec,
+    ) -> CephadmDaemonDeploySpec:
+        assert self.TYPE == daemon_spec.daemon_type
+        daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
+        return daemon_spec
+
     def config(self, spec: ServiceSpec) -> None:
         # make sure module is enabled
         mgr_map = self.mgr.get('mgr_map')
@@ -501,13 +518,52 @@ class PrometheusService(CephadmService):
         cert, key = self.mgr.cert_mgr.generate_cert([host_fqdn, 'prometheus_servers'], node_ip)
         return cert, key
 
-    def prepare_create(
-            self,
-            daemon_spec: CephadmDaemonDeploySpec,
-    ) -> CephadmDaemonDeploySpec:
-        assert self.TYPE == daemon_spec.daemon_type
-        daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec)
-        return daemon_spec
+    def get_service_discovery_cfg(self, security_enabled: bool, mgmt_gw_enabled: bool) -> Dict[str, List[str]]:
+        """
+        Retrieves the service discovery URLs for the services that require monitoring
+
+        Returns:
+            Dict[str, List[str]]: A dictionary where the keys represent service categories (e.g., "nfs", "node-exporterr") and
+                                  the values are a list of service-discovery URLs used to get the corresponding service targets.
+        """
+        if mgmt_gw_enabled:
+            service_discovery_url_prefixes = [f'{self.mgr.get_mgmt_gw_internal_endpoint()}']
+        else:
+            port = self.mgr.service_discovery_port
+            protocol = 'https' if security_enabled else 'http'
+            service_discovery_url_prefixes = [f'{protocol}://{wrap_ipv6(ip)}:{port}'
+                                              for ip in self.mgr._get_mgr_ips()]
+        return {
+            service: [f'{prefix}/sd/prometheus/sd-config?service={service}' for prefix in service_discovery_url_prefixes]
+            for service in service_registry.get_services_requiring_monitoring()
+            if service == 'ceph'
+            or bool(self.mgr.cache.get_daemons_by_service(service))
+            or bool(self.mgr.cache.get_daemons_by_type(service))
+        }
+
+    def configure_alerts(self, r: Dict) -> None:
+        # include alerts, if present in the container
+        if os.path.exists(self.mgr.prometheus_alerts_path):
+            with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f:
+                alerts = f.read()
+            r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts
+
+        # Include custom alerts if present in key value store. This enables the
+        # users to add custom alerts. Write the file in any case, so that if the
+        # content of the key value store changed, that file is overwritten
+        # (emptied in case they value has been removed from the key value
+        # store). This prevents the necessity to adapt `cephadm` binary to
+        # remove the file.
+        #
+        # Don't use the template engine for it as
+        #
+        #   1. the alerts are always static and
+        #   2. they are a template themselves for the Go template engine, which
+        #      use curly braces and escaping that is cumbersome and unnecessary
+        #      for the user.
+        #
+        r['files']['/etc/prometheus/alerting/custom_alerts.yml'] = \
+            self.mgr.get_store('services/prometheus/alerting/custom_alerts.yml', '')
 
     def generate_config(
             self,
@@ -515,56 +571,18 @@ class PrometheusService(CephadmService):
     ) -> Tuple[Dict[str, Any], List[str]]:
 
         assert self.TYPE == daemon_spec.daemon_type
-        spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
-        try:
-            retention_time = spec.retention_time if spec.retention_time else '15d'
-        except AttributeError:
-            retention_time = '15d'
 
-        try:
-            targets = spec.targets
-        except AttributeError:
-            logger.warning('Prometheus targets not found in the spec. Using empty list.')
-            targets = []
-
-        try:
-            retention_size = spec.retention_size if spec.retention_size else '0'
-        except AttributeError:
-            # default to disabled
-            retention_size = '0'
+        spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
+        retention_time = get_field_from_spec(spec, 'retention_time', '15d')
+        retention_size = get_field_from_spec(spec, 'retention_size', '0')
+        targets = get_field_from_spec(spec, 'targets', [])
 
         # build service discovery end-point
         security_enabled, mgmt_gw_enabled, oauth2_enabled = self.mgr._get_security_config()
-        port = self.mgr.service_discovery_port
-        mgr_addr = wrap_ipv6(self.mgr.get_mgr_ip())
-
-        protocol = 'https' if security_enabled else 'http'
-        self.mgr.get_mgmt_gw_internal_endpoint()
-        if mgmt_gw_enabled:
-            service_discovery_url_prefix = f'{self.mgr.get_mgmt_gw_internal_endpoint()}'
-        else:
-            service_discovery_url_prefix = f'{protocol}://{mgr_addr}:{port}'
-        srv_end_point = f'{service_discovery_url_prefix}/sd/prometheus/sd-config?'
-
-        node_exporter_cnt = len(self.mgr.cache.get_daemons_by_service('node-exporter'))
-        alertmgr_cnt = len(self.mgr.cache.get_daemons_by_service('alertmanager'))
-        haproxy_cnt = len(self.mgr.cache.get_daemons_by_type('ingress'))
-        node_exporter_sd_url = f'{srv_end_point}service=node-exporter' if node_exporter_cnt > 0 else None
-        alertmanager_sd_url = f'{srv_end_point}service=alertmanager' if alertmgr_cnt > 0 else None
-        haproxy_sd_url = f'{srv_end_point}service=haproxy' if haproxy_cnt > 0 else None
-        mgr_prometheus_sd_url = f'{srv_end_point}service=mgr-prometheus'  # always included
-        ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter'  # always included
-        nvmeof_sd_url = f'{srv_end_point}service=nvmeof'  # always included
-        mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0
-        nfs_sd_url = f'{srv_end_point}service=nfs'  # always included
-        smb_sd_url = f'{srv_end_point}service=smb'  # always included
-
         alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
-        prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
         federate_path = self.get_target_cluster_federate_path(targets)
         cluster_credentials: Dict[str, Any] = {}
         cluster_credentials_files: Dict[str, Any] = {'files': {}}
-        FSID = self.mgr._cluster_fsid
         if targets:
             if 'dashboard' in self.mgr.get('mgr_map')['modules']:
                 cluster_credentials_files, cluster_credentials = self.mgr.remote(
@@ -576,21 +594,14 @@ class PrometheusService(CephadmService):
         # generate the prometheus configuration
         context = {
             'alertmanager_url_prefix': '/alertmanager' if mgmt_gw_enabled else '/',
+            'security_enabled': security_enabled,
             'alertmanager_web_user': alertmanager_user,
             'alertmanager_web_password': alertmanager_password,
-            'security_enabled': security_enabled,
             'service_discovery_username': self.mgr.http_server.service_discovery.username,
             'service_discovery_password': self.mgr.http_server.service_discovery.password,
-            'mgr_prometheus_sd_url': mgr_prometheus_sd_url,
-            'node_exporter_sd_url': node_exporter_sd_url,
-            'alertmanager_sd_url': alertmanager_sd_url,
-            'haproxy_sd_url': haproxy_sd_url,
-            'ceph_exporter_sd_url': ceph_exporter_sd_url,
-            'nvmeof_sd_url': nvmeof_sd_url,
+            'service_discovery_cfg': self.get_service_discovery_cfg(security_enabled, mgmt_gw_enabled),
             'external_prometheus_targets': targets,
-            'cluster_fsid': FSID,
-            'nfs_sd_url': nfs_sd_url,
-            'smb_sd_url': smb_sd_url,
+            'cluster_fsid': self.mgr._cluster_fsid,
             'clusters_credentials': cluster_credentials,
             'federate_path': federate_path
         }
@@ -600,69 +611,41 @@ class PrometheusService(CephadmService):
             assert daemon_spec.host is not None
             ip_to_bind_to = self.mgr.get_first_matching_network_ip(daemon_spec.host, spec) or ''
             if ip_to_bind_to:
-                daemon_spec.port_ips = {str(port): ip_to_bind_to}
+                daemon_spec.port_ips = {str(self.mgr.service_discovery_port): ip_to_bind_to}
 
-        web_context = {
-            'enable_mtls': mgmt_gw_enabled,
-            'enable_basic_auth': not oauth2_enabled,
-            'prometheus_web_user': prometheus_user,
-            'prometheus_web_password': password_hash(prometheus_password),
+        files = {
+            'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context)
+        }
+        r: Dict[str, Any] = {
+            'files': files,
+            'retention_time': retention_time,
+            'retention_size': retention_size,
+            'ip_to_bind_to': ip_to_bind_to,
+            'use_url_prefix': mgmt_gw_enabled
         }
-
         if security_enabled:
             # Following key/cert are needed for:
             # 1- run the prometheus server (web.yml config)
             # 2- use mTLS to scrape node-exporter (prometheus acts as client)
             # 3- use mTLS to send alerts to alertmanager (prometheus acts as client)
-            cert, key = self.get_prometheus_certificates(daemon_spec)
-            r: Dict[str, Any] = {
-                'files': {
-                    'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context),
-                    'root_cert.pem': self.mgr.cert_mgr.get_root_ca(),
-                    'web.yml': self.mgr.template.render('services/prometheus/web.yml.j2', web_context),
-                    'prometheus.crt': cert,
-                    'prometheus.key': key,
-                },
-                'retention_time': retention_time,
-                'retention_size': retention_size,
-                'ip_to_bind_to': ip_to_bind_to,
-                'web_config': '/etc/prometheus/web.yml',
-                'use_url_prefix': mgmt_gw_enabled
-            }
-            r['files'].update(cluster_credentials_files['files'])
-        else:
-            r = {
-                'files': {
-                    'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context)
-                },
-                'retention_time': retention_time,
-                'retention_size': retention_size,
-                'ip_to_bind_to': ip_to_bind_to,
-                'use_url_prefix': mgmt_gw_enabled
+            prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
+            web_context = {
+                'enable_mtls': mgmt_gw_enabled,
+                'enable_basic_auth': not oauth2_enabled,
+                'prometheus_web_user': prometheus_user,
+                'prometheus_web_password': password_hash(prometheus_password),
             }
+            cert, key = self.get_prometheus_certificates(daemon_spec)
+            files.update({
+                'root_cert.pem': self.mgr.cert_mgr.get_root_ca(),
+                'web.yml': self.mgr.template.render('services/prometheus/web.yml.j2', web_context),
+                'prometheus.crt': cert,
+                'prometheus.key': key,
+                **cluster_credentials_files['files']
+            })
+            r.update({'web_config': '/etc/prometheus/web.yml'})
 
-        # include alerts, if present in the container
-        if os.path.exists(self.mgr.prometheus_alerts_path):
-            with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f:
-                alerts = f.read()
-            r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts
-
-        # Include custom alerts if present in key value store. This enables the
-        # users to add custom alerts. Write the file in any case, so that if the
-        # content of the key value store changed, that file is overwritten
-        # (emptied in case they value has been removed from the key value
-        # store). This prevents the necessity to adapt `cephadm` binary to
-        # remove the file.
-        #
-        # Don't use the template engine for it as
-        #
-        #   1. the alerts are always static and
-        #   2. they are a template themselves for the Go template engine, which
-        #      use curly braces and escaping that is cumbersome and unnecessary
-        #      for the user.
-        #
-        r['files']['/etc/prometheus/alerting/custom_alerts.yml'] = \
-            self.mgr.get_store('services/prometheus/alerting/custom_alerts.yml', '')
+        self.configure_alerts(r)
 
         return r, self.get_dependencies(self.mgr)
 
index d823f67bc8a811e534fd1c1a3a4cf68f02489b68..c2381948dc22f4ca35b640b61825f7d2ee951332 100644 (file)
@@ -145,7 +145,7 @@ class Root(Server):
 <head><title>Cephadm HTTP Endpoint</title></head>
 <body>
 <h2>Cephadm Service Discovery Endpoints</h2>
-<p><a href='prometheus/sd-config?service=mgr-prometheus'>mgr/Prometheus http sd-config</a></p>
+<p><a href='prometheus/sd-config?service=ceph'>mgr/Prometheus http sd-config</a></p>
 <p><a href='prometheus/sd-config?service=alertmanager'>Alertmanager http sd-config</a></p>
 <p><a href='prometheus/sd-config?service=node-exporter'>Node exporter http sd-config</a></p>
 <p><a href='prometheus/sd-config?service=haproxy'>HAProxy http sd-config</a></p>
@@ -161,26 +161,23 @@ class Root(Server):
     @cherrypy.tools.json_out()
     def get_sd_config(self, service: str) -> List[Dict[str, Collection[str]]]:
         """Return <http_sd_config> compatible prometheus config for the specified service."""
-        if service == 'mgr-prometheus':
-            return self.prometheus_sd_config()
-        elif service == 'alertmanager':
-            return self.alertmgr_sd_config()
-        elif service == 'node-exporter':
-            return self.node_exporter_sd_config()
-        elif service == 'haproxy':
-            return self.haproxy_sd_config()
-        elif service == 'ceph-exporter':
-            return self.ceph_exporter_sd_config()
-        elif service == 'nvmeof':
-            return self.nvmeof_sd_config()
-        elif service == 'nfs':
-            return self.nfs_sd_config()
-        elif service == 'smb':
-            return self.smb_sd_config()
-        elif service.startswith("container"):
+
+        if service.startswith("container"):
             return self.container_sd_config(service)
-        else:
-            return []
+
+        service_to_config = {
+            'mgr-prometheus': self.prometheus_sd_config,
+            'ceph': self.prometheus_sd_config,
+            'alertmanager': self.alertmgr_sd_config,
+            'node-exporter': self.node_exporter_sd_config,
+            'haproxy': self.haproxy_sd_config,
+            'ingress': self.haproxy_sd_config,
+            'ceph-exporter': self.ceph_exporter_sd_config,
+            'nvmeof': self.nvmeof_sd_config,
+            'nfs': self.nfs_sd_config,
+            'smb': self.smb_sd_config,
+        }
+        return service_to_config.get(service, lambda: [])()
 
     def prometheus_sd_config(self) -> List[Dict[str, Collection[str]]]:
         """Return <http_sd_config> compatible prometheus config for prometheus service.
index 961da145dac639ec8ad3ce22b142dcdfe742b378..2afbf606af2b86ea2faadbe7a841c931f9b179e7 100644 (file)
@@ -8,10 +8,10 @@ global:
 rule_files:
   - /etc/prometheus/alerting/*
 
-{% if alertmanager_sd_url %}
+{% if 'alertmanager' in service_discovery_cfg %}
 alerting:
   alertmanagers:
-{% if security_enabled %}
+  {% if security_enabled %}
     - scheme: https
       basic_auth:
         username: {{ alertmanager_web_user }}
@@ -19,197 +19,65 @@ alerting:
       tls_config:
         ca_file: root_cert.pem
         cert_file: prometheus.crt
-        key_file:  prometheus.key
+        key_file: prometheus.key
       path_prefix: '{{ alertmanager_url_prefix }}'
       http_sd_configs:
-        - url: {{ alertmanager_sd_url }}
+      {% for url in service_discovery_cfg['alertmanager'] %}
+        - url: {{ url }}
           basic_auth:
             username: {{ service_discovery_username }}
             password: {{ service_discovery_password }}
           tls_config:
             ca_file: root_cert.pem
             cert_file: prometheus.crt
-            key_file:  prometheus.key
-{% else %}
+            key_file: prometheus.key
+      {% endfor %}
+  {% else %}
     - scheme: http
       http_sd_configs:
-        - url: {{ alertmanager_sd_url }}
-{% endif %}
+      {% for url in service_discovery_cfg['alertmanager'] %}
+        - url: {{ url }}
+      {% endfor %}
+  {% endif %}
 {% endif %}
 
 scrape_configs:
-  - job_name: 'ceph'
+{% for service, urls in service_discovery_cfg.items() %}
+ {% if service != 'alertmanager' %}
+  - job_name: '{{ service }}'
     relabel_configs:
     - source_labels: [__address__]
       target_label: cluster
       replacement: {{ cluster_fsid }}
+    {% if service == 'ceph' %}
     - source_labels: [instance]
       target_label: instance
       replacement: 'ceph_cluster'
-{% if security_enabled %}
-    scheme: https
-    tls_config:
-      ca_file: root_cert.pem
-    honor_labels: true
-    http_sd_configs:
-    - url: {{ mgr_prometheus_sd_url }}
-      basic_auth:
-        username: {{ service_discovery_username }}
-        password: {{ service_discovery_password }}
-      tls_config:
-        ca_file: root_cert.pem
-        cert_file: prometheus.crt
-        key_file:  prometheus.key
-{% else %}
-    honor_labels: true
-    http_sd_configs:
-    - url: {{ mgr_prometheus_sd_url }}
-{% endif %}
-
-{% if node_exporter_sd_url %}
-  - job_name: 'node'
-    relabel_configs:
-    - source_labels: [__address__]
-      target_label: cluster
-      replacement: {{ cluster_fsid }}
-{% if security_enabled %}
+    {% endif %}
+    {% if security_enabled %}
     scheme: https
     tls_config:
       ca_file: root_cert.pem
       cert_file: prometheus.crt
-      key_file:  prometheus.key
-    http_sd_configs:
-    - url: {{ node_exporter_sd_url }}
-      basic_auth:
-        username: {{ service_discovery_username }}
-        password: {{ service_discovery_password }}
-      tls_config:
-        ca_file: root_cert.pem
-        cert_file: prometheus.crt
-        key_file:  prometheus.key
-{% else %}
-    http_sd_configs:
-    - url: {{ node_exporter_sd_url }}
-{% endif %}
-{% endif %}
-
-{% if haproxy_sd_url %}
-  - job_name: 'haproxy'
-    relabel_configs:
-    - source_labels: [__address__]
-      target_label: cluster
-      replacement: {{ cluster_fsid }}
-{% if security_enabled %}
-    scheme: https
-    tls_config:
-      ca_file: root_cert.pem
-    http_sd_configs:
-    - url: {{ haproxy_sd_url }}
-      basic_auth:
-        username: {{ service_discovery_username }}
-        password: {{ service_discovery_password }}
-      tls_config:
-        ca_file: root_cert.pem
-        cert_file: prometheus.crt
-        key_file:  prometheus.key
-{% else %}
-    http_sd_configs:
-    - url: {{ haproxy_sd_url }}
-{% endif %}
-{% endif %}
-
-{% if ceph_exporter_sd_url %}
-  - job_name: 'ceph-exporter'
-    relabel_configs:
-    - source_labels: [__address__]
-      target_label: cluster
-      replacement: {{ cluster_fsid }}
-{% if security_enabled %}
+      key_file: prometheus.key
+    {% endif %}
     honor_labels: true
-    scheme: https
-    tls_config:
-      ca_file: root_cert.pem
     http_sd_configs:
-    - url: {{ ceph_exporter_sd_url }}
+    {% for url in urls %}
+    - url: {{ url }}
+      {% if security_enabled %}
       basic_auth:
         username: {{ service_discovery_username }}
         password: {{ service_discovery_password }}
       tls_config:
         ca_file: root_cert.pem
         cert_file: prometheus.crt
-        key_file:  prometheus.key
-{% else %}
-    honor_labels: true
-    http_sd_configs:
-    - url: {{ ceph_exporter_sd_url }}
-{% endif %}
-{% endif %}
+        key_file: prometheus.key
+      {% endif %}
+    {% endfor %}
 
-{% if nvmeof_sd_url %}
-  - job_name: 'nvmeof'
-{% if security_enabled %}
-    honor_labels: true
-    scheme: https
-    tls_config:
-      ca_file: root_cert.pem
-    http_sd_configs:
-    - url: {{ nvmeof_sd_url }}
-      basic_auth:
-        username: {{ service_discovery_username }}
-        password: {{ service_discovery_password }}
-      tls_config:
-        ca_file: root_cert.pem
-        cert_file: prometheus.crt
-        key_file:  prometheus.key
-{% else %}
-    http_sd_configs:
-    - url: {{ nvmeof_sd_url }}
-{% endif %}
-{% endif %}
-
-{% if nfs_sd_url %}
-  - job_name: 'nfs'
-{% if security_enabled %}
-    honor_labels: true
-    scheme: https
-    tls_config:
-      ca_file: root_cert.pem
-    http_sd_configs:
-    - url: {{ nfs_sd_url }}
-      basic_auth:
-        username: {{ service_discovery_username }}
-        password: {{ service_discovery_password }}
-      tls_config:
-        ca_file: root_cert.pem
-        cert_file: prometheus.crt
-        key_file:  prometheus.key
-{% else %}
-    http_sd_configs:
-    - url: {{ nfs_sd_url }}
-{% endif %}
-{% endif %}
-
-{% if smb_sd_url %}
-  - job_name: 'smb'
-{% if security_enabled %}
-    honor_labels: true
-    scheme: https
-    tls_config:
-      ca_file: root_cert.pem
-    http_sd_configs:
-    - url: {{ smb_sd_url }}
-      basic_auth:
-        username: {{ service_discovery_username }}
-        password: {{ service_discovery_password }}
-      tls_config:
-        ca_file: root_cert.pem
-        cert_file: prometheus.crt
-        key_file:  prometheus.key
-{% else %}
-    http_sd_configs:
-    - url: {{ smb_sd_url }}
-{% endif %}
-{% endif %}
+ {% endif %}
+{% endfor %}
 
 {% for url, details in clusters_credentials.items() %}
   - job_name: 'federate_{{ loop.index }}'
@@ -237,4 +105,3 @@ scrape_configs:
     static_configs:
     - targets: ['{{ url }}']
 {% endfor %}
-
index f7cd18b5398fba58c1bd8223c3639f7cdd5ffda2..8ac23d5d8d5ab21ddfb254f4e2faacc418538790 100644 (file)
@@ -1143,7 +1143,7 @@ class TestMonitoring:
                 )
 
     @patch("cephadm.serve.CephadmServe._run_cephadm")
-    @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
+    @patch("cephadm.module.CephadmOrchestrator._get_mgr_ips", lambda _: ['192.168.100.100', '::1'])
     def test_prometheus_config_security_disabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
         _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
         s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast')
@@ -1185,6 +1185,7 @@ class TestMonitoring:
                   alertmanagers:
                     - scheme: http
                       http_sd_configs:
+                        - url: http://192.168.100.100:8765/sd/prometheus/sd-config?service=alertmanager
                         - url: http://[::1]:8765/sd/prometheus/sd-config?service=alertmanager
 
                 scrape_configs:
@@ -1198,44 +1199,39 @@ class TestMonitoring:
                       replacement: 'ceph_cluster'
                     honor_labels: true
                     http_sd_configs:
-                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus
+                    - url: http://192.168.100.100:8765/sd/prometheus/sd-config?service=ceph
+                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph
 
-                  - job_name: 'node'
+                  - job_name: 'ceph-exporter'
                     relabel_configs:
                     - source_labels: [__address__]
                       target_label: cluster
                       replacement: fsid
+                    honor_labels: true
                     http_sd_configs:
-                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=node-exporter
+                    - url: http://192.168.100.100:8765/sd/prometheus/sd-config?service=ceph-exporter
+                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter
 
-                  - job_name: 'haproxy'
+                  - job_name: 'ingress'
                     relabel_configs:
                     - source_labels: [__address__]
                       target_label: cluster
                       replacement: fsid
+                    honor_labels: true
                     http_sd_configs:
-                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=haproxy
+                    - url: http://192.168.100.100:8765/sd/prometheus/sd-config?service=ingress
+                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=ingress
 
-                  - job_name: 'ceph-exporter'
+                  - job_name: 'node-exporter'
                     relabel_configs:
                     - source_labels: [__address__]
                       target_label: cluster
                       replacement: fsid
                     honor_labels: true
                     http_sd_configs:
-                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter
-
-                  - job_name: 'nvmeof'
-                    http_sd_configs:
-                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
-
-                  - job_name: 'nfs'
-                    http_sd_configs:
-                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=nfs
+                    - url: http://192.168.100.100:8765/sd/prometheus/sd-config?service=node-exporter
+                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=node-exporter
 
-                  - job_name: 'smb'
-                    http_sd_configs:
-                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=smb
 
                 """).lstrip()
 
@@ -1278,17 +1274,17 @@ class TestMonitoring:
                     use_current_daemon_image=False,
                 )
 
+    @patch("cephadm.module.CephadmOrchestrator.get_unique_name")
     @patch("cephadm.serve.CephadmServe._run_cephadm")
-    @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
+    @patch("cephadm.module.CephadmOrchestrator._get_mgr_ips", lambda _: ['::1'])
     @patch("cephadm.services.monitoring.password_hash", lambda password: 'prometheus_password_hash')
     @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: 'cephadm_root_cert')
     @patch('cephadm.cert_mgr.CertMgr.generate_cert', lambda instance, fqdn, ip: ('mycert', 'mykey'))
-    def test_prometheus_config_security_enabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+    def test_prometheus_config_security_enabled(self, _run_cephadm, _get_uname, cephadm_module: CephadmOrchestrator):
         _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+        _get_uname.return_value = 'test'
         s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast')
-
-        def gen_cert(host, addr):
-            return ('mycert', 'mykey')
+        smb_spec = SMBSpec(cluster_id='foxtrot', config_uri='rados://.smb/foxtrot/config.json',)
 
         with with_host(cephadm_module, 'test'):
             cephadm_module.secure_monitoring_stack = True
@@ -1305,6 +1301,8 @@ class TestMonitoring:
                 },
             })
             with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \
+                    with_service(cephadm_module, smb_spec) as _, \
+                    with_service(cephadm_module, CephExporterSpec('ceph-exporter')) as _, \
                     with_service(cephadm_module, s) as _, \
                     with_service(cephadm_module, AlertManagerSpec('alertmanager')) as _, \
                     with_service(cephadm_module, IngressSpec(service_id='ingress',
@@ -1345,7 +1343,7 @@ class TestMonitoring:
                       tls_config:
                         ca_file: root_cert.pem
                         cert_file: prometheus.crt
-                        key_file:  prometheus.key
+                        key_file: prometheus.key
                       path_prefix: '/'
                       http_sd_configs:
                         - url: https://[::1]:8765/sd/prometheus/sd-config?service=alertmanager
@@ -1355,7 +1353,7 @@ class TestMonitoring:
                           tls_config:
                             ca_file: root_cert.pem
                             cert_file: prometheus.crt
-                            key_file:  prometheus.key
+                            key_file: prometheus.key
 
                 scrape_configs:
                   - job_name: 'ceph'
@@ -1369,18 +1367,20 @@ class TestMonitoring:
                     scheme: https
                     tls_config:
                       ca_file: root_cert.pem
+                      cert_file: prometheus.crt
+                      key_file: prometheus.key
                     honor_labels: true
                     http_sd_configs:
-                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus
+                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=ceph
                       basic_auth:
                         username: sd_user
                         password: sd_password
                       tls_config:
                         ca_file: root_cert.pem
                         cert_file: prometheus.crt
-                        key_file:  prometheus.key
+                        key_file: prometheus.key
 
-                  - job_name: 'node'
+                  - job_name: 'ceph-exporter'
                     relabel_configs:
                     - source_labels: [__address__]
                       target_label: cluster
@@ -1389,18 +1389,19 @@ class TestMonitoring:
                     tls_config:
                       ca_file: root_cert.pem
                       cert_file: prometheus.crt
-                      key_file:  prometheus.key
+                      key_file: prometheus.key
+                    honor_labels: true
                     http_sd_configs:
-                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=node-exporter
+                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter
                       basic_auth:
                         username: sd_user
                         password: sd_password
                       tls_config:
                         ca_file: root_cert.pem
                         cert_file: prometheus.crt
-                        key_file:  prometheus.key
+                        key_file: prometheus.key
 
-                  - job_name: 'haproxy'
+                  - job_name: 'ingress'
                     relabel_configs:
                     - source_labels: [__address__]
                       target_label: cluster
@@ -1408,70 +1409,51 @@ class TestMonitoring:
                     scheme: https
                     tls_config:
                       ca_file: root_cert.pem
+                      cert_file: prometheus.crt
+                      key_file: prometheus.key
+                    honor_labels: true
                     http_sd_configs:
-                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=haproxy
+                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=ingress
                       basic_auth:
                         username: sd_user
                         password: sd_password
                       tls_config:
                         ca_file: root_cert.pem
                         cert_file: prometheus.crt
-                        key_file:  prometheus.key
+                        key_file: prometheus.key
 
-                  - job_name: 'ceph-exporter'
+                  - job_name: 'node-exporter'
                     relabel_configs:
                     - source_labels: [__address__]
                       target_label: cluster
                       replacement: fsid
-                    honor_labels: true
-                    scheme: https
-                    tls_config:
-                      ca_file: root_cert.pem
-                    http_sd_configs:
-                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter
-                      basic_auth:
-                        username: sd_user
-                        password: sd_password
-                      tls_config:
-                        ca_file: root_cert.pem
-                        cert_file: prometheus.crt
-                        key_file:  prometheus.key
-
-                  - job_name: 'nvmeof'
-                    honor_labels: true
                     scheme: https
                     tls_config:
                       ca_file: root_cert.pem
-                    http_sd_configs:
-                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
-                      basic_auth:
-                        username: sd_user
-                        password: sd_password
-                      tls_config:
-                        ca_file: root_cert.pem
-                        cert_file: prometheus.crt
-                        key_file:  prometheus.key
-
-                  - job_name: 'nfs'
+                      cert_file: prometheus.crt
+                      key_file: prometheus.key
                     honor_labels: true
-                    scheme: https
-                    tls_config:
-                      ca_file: root_cert.pem
                     http_sd_configs:
-                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=nfs
+                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=node-exporter
                       basic_auth:
                         username: sd_user
                         password: sd_password
                       tls_config:
                         ca_file: root_cert.pem
                         cert_file: prometheus.crt
-                        key_file:  prometheus.key
+                        key_file: prometheus.key
 
                   - job_name: 'smb'
-                    honor_labels: true
+                    relabel_configs:
+                    - source_labels: [__address__]
+                      target_label: cluster
+                      replacement: fsid
                     scheme: https
                     tls_config:
                       ca_file: root_cert.pem
+                      cert_file: prometheus.crt
+                      key_file: prometheus.key
+                    honor_labels: true
                     http_sd_configs:
                     - url: https://[::1]:8765/sd/prometheus/sd-config?service=smb
                       basic_auth:
@@ -1480,7 +1462,8 @@ class TestMonitoring:
                       tls_config:
                         ca_file: root_cert.pem
                         cert_file: prometheus.crt
-                        key_file:  prometheus.key
+                        key_file: prometheus.key
+
 
                 """).lstrip()
 
@@ -1519,8 +1502,8 @@ class TestMonitoring:
                             'retention_time': '15d',
                             'retention_size': '0',
                             'ip_to_bind_to': '',
-                            'web_config': '/etc/prometheus/web.yml',
-                            "use_url_prefix": False
+                            "use_url_prefix": False,
+                            'web_config': '/etc/prometheus/web.yml'
                         },
                     }),
                     error_ok=True,