From d1ba8e9c56a23d57e2b203a78a8f2e8cc470c27d Mon Sep 17 00:00:00 2001 From: Redouane Kachach Date: Thu, 19 Jun 2025 09:55:03 +0200 Subject: [PATCH] mgr/cephadm: adding automation for Prometheus config generation Signed-off-by: Redouane Kachach --- src/pybind/mgr/cephadm/module.py | 5 + src/pybind/mgr/cephadm/services/monitoring.py | 209 ++++++++---------- .../mgr/cephadm/services/service_discovery.py | 37 ++-- .../services/prometheus/prometheus.yml.j2 | 189 +++------------- src/pybind/mgr/cephadm/tests/test_services.py | 127 +++++------ 5 files changed, 201 insertions(+), 366 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 0e53c2e9f0a..f6b3c49355a 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -743,6 +743,11 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule, self.cert_mgr.init_tlsobject_store() + def _get_mgr_ips(self) -> List[str]: + return [self.inventory.get_addr(d.hostname) + for d in self.cache.get_daemons_by_service('mgr') + if d.hostname is not None] + def _get_security_config(self) -> Tuple[bool, bool, bool]: oauth2_proxy_enabled = len(self.cache.get_daemons_by_service('oauth2-proxy')) > 0 mgmt_gw_enabled = len(self.cache.get_daemons_by_service('mgmt-gateway')) > 0 diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index b2fddd01da7..ce21fd02778 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -9,6 +9,7 @@ import requests from mgr_module import HandleCommandResult from .service_registry import register_cephadm_service +from cephadm.services.service_registry import service_registry from orchestrator import DaemonDescription from ceph.deployment.service_spec import AlertManagerSpec, GrafanaSpec, ServiceSpec, \ @@ -24,6 +25,14 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +def get_field_from_spec(spec: ServiceSpec, attr: str, default: Any) -> Any: + try: + value = getattr(spec, attr) + return value if value else default + except AttributeError: + return default + + @register_cephadm_service class GrafanaService(CephadmService): TYPE = 'grafana' @@ -484,6 +493,14 @@ class PrometheusService(CephadmService): USER_CFG_KEY = 'prometheus/web_user' PASS_CFG_KEY = 'prometheus/web_password' + def prepare_create( + self, + daemon_spec: CephadmDaemonDeploySpec, + ) -> CephadmDaemonDeploySpec: + assert self.TYPE == daemon_spec.daemon_type + daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) + return daemon_spec + def config(self, spec: ServiceSpec) -> None: # make sure module is enabled mgr_map = self.mgr.get('mgr_map') @@ -501,13 +518,52 @@ class PrometheusService(CephadmService): cert, key = self.mgr.cert_mgr.generate_cert([host_fqdn, 'prometheus_servers'], node_ip) return cert, key - def prepare_create( - self, - daemon_spec: CephadmDaemonDeploySpec, - ) -> CephadmDaemonDeploySpec: - assert self.TYPE == daemon_spec.daemon_type - daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) - return daemon_spec + def get_service_discovery_cfg(self, security_enabled: bool, mgmt_gw_enabled: bool) -> Dict[str, List[str]]: + """ + Retrieves the service discovery URLs for the services that require monitoring + + Returns: + Dict[str, List[str]]: A dictionary where the keys represent service categories (e.g., "nfs", "node-exporterr") and + the values are a list of service-discovery URLs used to get the corresponding service targets. + """ + if mgmt_gw_enabled: + service_discovery_url_prefixes = [f'{self.mgr.get_mgmt_gw_internal_endpoint()}'] + else: + port = self.mgr.service_discovery_port + protocol = 'https' if security_enabled else 'http' + service_discovery_url_prefixes = [f'{protocol}://{wrap_ipv6(ip)}:{port}' + for ip in self.mgr._get_mgr_ips()] + return { + service: [f'{prefix}/sd/prometheus/sd-config?service={service}' for prefix in service_discovery_url_prefixes] + for service in service_registry.get_services_requiring_monitoring() + if service == 'ceph' + or bool(self.mgr.cache.get_daemons_by_service(service)) + or bool(self.mgr.cache.get_daemons_by_type(service)) + } + + def configure_alerts(self, r: Dict) -> None: + # include alerts, if present in the container + if os.path.exists(self.mgr.prometheus_alerts_path): + with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f: + alerts = f.read() + r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts + + # Include custom alerts if present in key value store. This enables the + # users to add custom alerts. Write the file in any case, so that if the + # content of the key value store changed, that file is overwritten + # (emptied in case they value has been removed from the key value + # store). This prevents the necessity to adapt `cephadm` binary to + # remove the file. + # + # Don't use the template engine for it as + # + # 1. the alerts are always static and + # 2. they are a template themselves for the Go template engine, which + # use curly braces and escaping that is cumbersome and unnecessary + # for the user. + # + r['files']['/etc/prometheus/alerting/custom_alerts.yml'] = \ + self.mgr.get_store('services/prometheus/alerting/custom_alerts.yml', '') def generate_config( self, @@ -515,56 +571,18 @@ class PrometheusService(CephadmService): ) -> Tuple[Dict[str, Any], List[str]]: assert self.TYPE == daemon_spec.daemon_type - spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec) - try: - retention_time = spec.retention_time if spec.retention_time else '15d' - except AttributeError: - retention_time = '15d' - try: - targets = spec.targets - except AttributeError: - logger.warning('Prometheus targets not found in the spec. Using empty list.') - targets = [] - - try: - retention_size = spec.retention_size if spec.retention_size else '0' - except AttributeError: - # default to disabled - retention_size = '0' + spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec) + retention_time = get_field_from_spec(spec, 'retention_time', '15d') + retention_size = get_field_from_spec(spec, 'retention_size', '0') + targets = get_field_from_spec(spec, 'targets', []) # build service discovery end-point security_enabled, mgmt_gw_enabled, oauth2_enabled = self.mgr._get_security_config() - port = self.mgr.service_discovery_port - mgr_addr = wrap_ipv6(self.mgr.get_mgr_ip()) - - protocol = 'https' if security_enabled else 'http' - self.mgr.get_mgmt_gw_internal_endpoint() - if mgmt_gw_enabled: - service_discovery_url_prefix = f'{self.mgr.get_mgmt_gw_internal_endpoint()}' - else: - service_discovery_url_prefix = f'{protocol}://{mgr_addr}:{port}' - srv_end_point = f'{service_discovery_url_prefix}/sd/prometheus/sd-config?' - - node_exporter_cnt = len(self.mgr.cache.get_daemons_by_service('node-exporter')) - alertmgr_cnt = len(self.mgr.cache.get_daemons_by_service('alertmanager')) - haproxy_cnt = len(self.mgr.cache.get_daemons_by_type('ingress')) - node_exporter_sd_url = f'{srv_end_point}service=node-exporter' if node_exporter_cnt > 0 else None - alertmanager_sd_url = f'{srv_end_point}service=alertmanager' if alertmgr_cnt > 0 else None - haproxy_sd_url = f'{srv_end_point}service=haproxy' if haproxy_cnt > 0 else None - mgr_prometheus_sd_url = f'{srv_end_point}service=mgr-prometheus' # always included - ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter' # always included - nvmeof_sd_url = f'{srv_end_point}service=nvmeof' # always included - mgmt_gw_enabled = len(self.mgr.cache.get_daemons_by_service('mgmt-gateway')) > 0 - nfs_sd_url = f'{srv_end_point}service=nfs' # always included - smb_sd_url = f'{srv_end_point}service=smb' # always included - alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() - prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() federate_path = self.get_target_cluster_federate_path(targets) cluster_credentials: Dict[str, Any] = {} cluster_credentials_files: Dict[str, Any] = {'files': {}} - FSID = self.mgr._cluster_fsid if targets: if 'dashboard' in self.mgr.get('mgr_map')['modules']: cluster_credentials_files, cluster_credentials = self.mgr.remote( @@ -576,21 +594,14 @@ class PrometheusService(CephadmService): # generate the prometheus configuration context = { 'alertmanager_url_prefix': '/alertmanager' if mgmt_gw_enabled else '/', + 'security_enabled': security_enabled, 'alertmanager_web_user': alertmanager_user, 'alertmanager_web_password': alertmanager_password, - 'security_enabled': security_enabled, 'service_discovery_username': self.mgr.http_server.service_discovery.username, 'service_discovery_password': self.mgr.http_server.service_discovery.password, - 'mgr_prometheus_sd_url': mgr_prometheus_sd_url, - 'node_exporter_sd_url': node_exporter_sd_url, - 'alertmanager_sd_url': alertmanager_sd_url, - 'haproxy_sd_url': haproxy_sd_url, - 'ceph_exporter_sd_url': ceph_exporter_sd_url, - 'nvmeof_sd_url': nvmeof_sd_url, + 'service_discovery_cfg': self.get_service_discovery_cfg(security_enabled, mgmt_gw_enabled), 'external_prometheus_targets': targets, - 'cluster_fsid': FSID, - 'nfs_sd_url': nfs_sd_url, - 'smb_sd_url': smb_sd_url, + 'cluster_fsid': self.mgr._cluster_fsid, 'clusters_credentials': cluster_credentials, 'federate_path': federate_path } @@ -600,69 +611,41 @@ class PrometheusService(CephadmService): assert daemon_spec.host is not None ip_to_bind_to = self.mgr.get_first_matching_network_ip(daemon_spec.host, spec) or '' if ip_to_bind_to: - daemon_spec.port_ips = {str(port): ip_to_bind_to} + daemon_spec.port_ips = {str(self.mgr.service_discovery_port): ip_to_bind_to} - web_context = { - 'enable_mtls': mgmt_gw_enabled, - 'enable_basic_auth': not oauth2_enabled, - 'prometheus_web_user': prometheus_user, - 'prometheus_web_password': password_hash(prometheus_password), + files = { + 'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context) + } + r: Dict[str, Any] = { + 'files': files, + 'retention_time': retention_time, + 'retention_size': retention_size, + 'ip_to_bind_to': ip_to_bind_to, + 'use_url_prefix': mgmt_gw_enabled } - if security_enabled: # Following key/cert are needed for: # 1- run the prometheus server (web.yml config) # 2- use mTLS to scrape node-exporter (prometheus acts as client) # 3- use mTLS to send alerts to alertmanager (prometheus acts as client) - cert, key = self.get_prometheus_certificates(daemon_spec) - r: Dict[str, Any] = { - 'files': { - 'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context), - 'root_cert.pem': self.mgr.cert_mgr.get_root_ca(), - 'web.yml': self.mgr.template.render('services/prometheus/web.yml.j2', web_context), - 'prometheus.crt': cert, - 'prometheus.key': key, - }, - 'retention_time': retention_time, - 'retention_size': retention_size, - 'ip_to_bind_to': ip_to_bind_to, - 'web_config': '/etc/prometheus/web.yml', - 'use_url_prefix': mgmt_gw_enabled - } - r['files'].update(cluster_credentials_files['files']) - else: - r = { - 'files': { - 'prometheus.yml': self.mgr.template.render('services/prometheus/prometheus.yml.j2', context) - }, - 'retention_time': retention_time, - 'retention_size': retention_size, - 'ip_to_bind_to': ip_to_bind_to, - 'use_url_prefix': mgmt_gw_enabled + prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() + web_context = { + 'enable_mtls': mgmt_gw_enabled, + 'enable_basic_auth': not oauth2_enabled, + 'prometheus_web_user': prometheus_user, + 'prometheus_web_password': password_hash(prometheus_password), } + cert, key = self.get_prometheus_certificates(daemon_spec) + files.update({ + 'root_cert.pem': self.mgr.cert_mgr.get_root_ca(), + 'web.yml': self.mgr.template.render('services/prometheus/web.yml.j2', web_context), + 'prometheus.crt': cert, + 'prometheus.key': key, + **cluster_credentials_files['files'] + }) + r.update({'web_config': '/etc/prometheus/web.yml'}) - # include alerts, if present in the container - if os.path.exists(self.mgr.prometheus_alerts_path): - with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f: - alerts = f.read() - r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts - - # Include custom alerts if present in key value store. This enables the - # users to add custom alerts. Write the file in any case, so that if the - # content of the key value store changed, that file is overwritten - # (emptied in case they value has been removed from the key value - # store). This prevents the necessity to adapt `cephadm` binary to - # remove the file. - # - # Don't use the template engine for it as - # - # 1. the alerts are always static and - # 2. they are a template themselves for the Go template engine, which - # use curly braces and escaping that is cumbersome and unnecessary - # for the user. - # - r['files']['/etc/prometheus/alerting/custom_alerts.yml'] = \ - self.mgr.get_store('services/prometheus/alerting/custom_alerts.yml', '') + self.configure_alerts(r) return r, self.get_dependencies(self.mgr) diff --git a/src/pybind/mgr/cephadm/services/service_discovery.py b/src/pybind/mgr/cephadm/services/service_discovery.py index d823f67bc8a..c2381948dc2 100644 --- a/src/pybind/mgr/cephadm/services/service_discovery.py +++ b/src/pybind/mgr/cephadm/services/service_discovery.py @@ -145,7 +145,7 @@ class Root(Server): Cephadm HTTP Endpoint

Cephadm Service Discovery Endpoints

-

mgr/Prometheus http sd-config

+

mgr/Prometheus http sd-config

Alertmanager http sd-config

Node exporter http sd-config

HAProxy http sd-config

@@ -161,26 +161,23 @@ class Root(Server): @cherrypy.tools.json_out() def get_sd_config(self, service: str) -> List[Dict[str, Collection[str]]]: """Return compatible prometheus config for the specified service.""" - if service == 'mgr-prometheus': - return self.prometheus_sd_config() - elif service == 'alertmanager': - return self.alertmgr_sd_config() - elif service == 'node-exporter': - return self.node_exporter_sd_config() - elif service == 'haproxy': - return self.haproxy_sd_config() - elif service == 'ceph-exporter': - return self.ceph_exporter_sd_config() - elif service == 'nvmeof': - return self.nvmeof_sd_config() - elif service == 'nfs': - return self.nfs_sd_config() - elif service == 'smb': - return self.smb_sd_config() - elif service.startswith("container"): + + if service.startswith("container"): return self.container_sd_config(service) - else: - return [] + + service_to_config = { + 'mgr-prometheus': self.prometheus_sd_config, + 'ceph': self.prometheus_sd_config, + 'alertmanager': self.alertmgr_sd_config, + 'node-exporter': self.node_exporter_sd_config, + 'haproxy': self.haproxy_sd_config, + 'ingress': self.haproxy_sd_config, + 'ceph-exporter': self.ceph_exporter_sd_config, + 'nvmeof': self.nvmeof_sd_config, + 'nfs': self.nfs_sd_config, + 'smb': self.smb_sd_config, + } + return service_to_config.get(service, lambda: [])() def prometheus_sd_config(self) -> List[Dict[str, Collection[str]]]: """Return compatible prometheus config for prometheus service. diff --git a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 index 961da145dac..2afbf606af2 100644 --- a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 @@ -8,10 +8,10 @@ global: rule_files: - /etc/prometheus/alerting/* -{% if alertmanager_sd_url %} +{% if 'alertmanager' in service_discovery_cfg %} alerting: alertmanagers: -{% if security_enabled %} + {% if security_enabled %} - scheme: https basic_auth: username: {{ alertmanager_web_user }} @@ -19,197 +19,65 @@ alerting: tls_config: ca_file: root_cert.pem cert_file: prometheus.crt - key_file: prometheus.key + key_file: prometheus.key path_prefix: '{{ alertmanager_url_prefix }}' http_sd_configs: - - url: {{ alertmanager_sd_url }} + {% for url in service_discovery_cfg['alertmanager'] %} + - url: {{ url }} basic_auth: username: {{ service_discovery_username }} password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem cert_file: prometheus.crt - key_file: prometheus.key -{% else %} + key_file: prometheus.key + {% endfor %} + {% else %} - scheme: http http_sd_configs: - - url: {{ alertmanager_sd_url }} -{% endif %} + {% for url in service_discovery_cfg['alertmanager'] %} + - url: {{ url }} + {% endfor %} + {% endif %} {% endif %} scrape_configs: - - job_name: 'ceph' +{% for service, urls in service_discovery_cfg.items() %} + {% if service != 'alertmanager' %} + - job_name: '{{ service }}' relabel_configs: - source_labels: [__address__] target_label: cluster replacement: {{ cluster_fsid }} + {% if service == 'ceph' %} - source_labels: [instance] target_label: instance replacement: 'ceph_cluster' -{% if security_enabled %} - scheme: https - tls_config: - ca_file: root_cert.pem - honor_labels: true - http_sd_configs: - - url: {{ mgr_prometheus_sd_url }} - basic_auth: - username: {{ service_discovery_username }} - password: {{ service_discovery_password }} - tls_config: - ca_file: root_cert.pem - cert_file: prometheus.crt - key_file: prometheus.key -{% else %} - honor_labels: true - http_sd_configs: - - url: {{ mgr_prometheus_sd_url }} -{% endif %} - -{% if node_exporter_sd_url %} - - job_name: 'node' - relabel_configs: - - source_labels: [__address__] - target_label: cluster - replacement: {{ cluster_fsid }} -{% if security_enabled %} + {% endif %} + {% if security_enabled %} scheme: https tls_config: ca_file: root_cert.pem cert_file: prometheus.crt - key_file: prometheus.key - http_sd_configs: - - url: {{ node_exporter_sd_url }} - basic_auth: - username: {{ service_discovery_username }} - password: {{ service_discovery_password }} - tls_config: - ca_file: root_cert.pem - cert_file: prometheus.crt - key_file: prometheus.key -{% else %} - http_sd_configs: - - url: {{ node_exporter_sd_url }} -{% endif %} -{% endif %} - -{% if haproxy_sd_url %} - - job_name: 'haproxy' - relabel_configs: - - source_labels: [__address__] - target_label: cluster - replacement: {{ cluster_fsid }} -{% if security_enabled %} - scheme: https - tls_config: - ca_file: root_cert.pem - http_sd_configs: - - url: {{ haproxy_sd_url }} - basic_auth: - username: {{ service_discovery_username }} - password: {{ service_discovery_password }} - tls_config: - ca_file: root_cert.pem - cert_file: prometheus.crt - key_file: prometheus.key -{% else %} - http_sd_configs: - - url: {{ haproxy_sd_url }} -{% endif %} -{% endif %} - -{% if ceph_exporter_sd_url %} - - job_name: 'ceph-exporter' - relabel_configs: - - source_labels: [__address__] - target_label: cluster - replacement: {{ cluster_fsid }} -{% if security_enabled %} + key_file: prometheus.key + {% endif %} honor_labels: true - scheme: https - tls_config: - ca_file: root_cert.pem http_sd_configs: - - url: {{ ceph_exporter_sd_url }} + {% for url in urls %} + - url: {{ url }} + {% if security_enabled %} basic_auth: username: {{ service_discovery_username }} password: {{ service_discovery_password }} tls_config: ca_file: root_cert.pem cert_file: prometheus.crt - key_file: prometheus.key -{% else %} - honor_labels: true - http_sd_configs: - - url: {{ ceph_exporter_sd_url }} -{% endif %} -{% endif %} + key_file: prometheus.key + {% endif %} + {% endfor %} -{% if nvmeof_sd_url %} - - job_name: 'nvmeof' -{% if security_enabled %} - honor_labels: true - scheme: https - tls_config: - ca_file: root_cert.pem - http_sd_configs: - - url: {{ nvmeof_sd_url }} - basic_auth: - username: {{ service_discovery_username }} - password: {{ service_discovery_password }} - tls_config: - ca_file: root_cert.pem - cert_file: prometheus.crt - key_file: prometheus.key -{% else %} - http_sd_configs: - - url: {{ nvmeof_sd_url }} -{% endif %} -{% endif %} - -{% if nfs_sd_url %} - - job_name: 'nfs' -{% if security_enabled %} - honor_labels: true - scheme: https - tls_config: - ca_file: root_cert.pem - http_sd_configs: - - url: {{ nfs_sd_url }} - basic_auth: - username: {{ service_discovery_username }} - password: {{ service_discovery_password }} - tls_config: - ca_file: root_cert.pem - cert_file: prometheus.crt - key_file: prometheus.key -{% else %} - http_sd_configs: - - url: {{ nfs_sd_url }} -{% endif %} -{% endif %} - -{% if smb_sd_url %} - - job_name: 'smb' -{% if security_enabled %} - honor_labels: true - scheme: https - tls_config: - ca_file: root_cert.pem - http_sd_configs: - - url: {{ smb_sd_url }} - basic_auth: - username: {{ service_discovery_username }} - password: {{ service_discovery_password }} - tls_config: - ca_file: root_cert.pem - cert_file: prometheus.crt - key_file: prometheus.key -{% else %} - http_sd_configs: - - url: {{ smb_sd_url }} -{% endif %} -{% endif %} + {% endif %} +{% endfor %} {% for url, details in clusters_credentials.items() %} - job_name: 'federate_{{ loop.index }}' @@ -237,4 +105,3 @@ scrape_configs: static_configs: - targets: ['{{ url }}'] {% endfor %} - diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index f7cd18b5398..8ac23d5d8d5 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -1143,7 +1143,7 @@ class TestMonitoring: ) @patch("cephadm.serve.CephadmServe._run_cephadm") - @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') + @patch("cephadm.module.CephadmOrchestrator._get_mgr_ips", lambda _: ['192.168.100.100', '::1']) def test_prometheus_config_security_disabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast') @@ -1185,6 +1185,7 @@ class TestMonitoring: alertmanagers: - scheme: http http_sd_configs: + - url: http://192.168.100.100:8765/sd/prometheus/sd-config?service=alertmanager - url: http://[::1]:8765/sd/prometheus/sd-config?service=alertmanager scrape_configs: @@ -1198,44 +1199,39 @@ class TestMonitoring: replacement: 'ceph_cluster' honor_labels: true http_sd_configs: - - url: http://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus + - url: http://192.168.100.100:8765/sd/prometheus/sd-config?service=ceph + - url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph - - job_name: 'node' + - job_name: 'ceph-exporter' relabel_configs: - source_labels: [__address__] target_label: cluster replacement: fsid + honor_labels: true http_sd_configs: - - url: http://[::1]:8765/sd/prometheus/sd-config?service=node-exporter + - url: http://192.168.100.100:8765/sd/prometheus/sd-config?service=ceph-exporter + - url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter - - job_name: 'haproxy' + - job_name: 'ingress' relabel_configs: - source_labels: [__address__] target_label: cluster replacement: fsid + honor_labels: true http_sd_configs: - - url: http://[::1]:8765/sd/prometheus/sd-config?service=haproxy + - url: http://192.168.100.100:8765/sd/prometheus/sd-config?service=ingress + - url: http://[::1]:8765/sd/prometheus/sd-config?service=ingress - - job_name: 'ceph-exporter' + - job_name: 'node-exporter' relabel_configs: - source_labels: [__address__] target_label: cluster replacement: fsid honor_labels: true http_sd_configs: - - url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter - - - job_name: 'nvmeof' - http_sd_configs: - - url: http://[::1]:8765/sd/prometheus/sd-config?service=nvmeof - - - job_name: 'nfs' - http_sd_configs: - - url: http://[::1]:8765/sd/prometheus/sd-config?service=nfs + - url: http://192.168.100.100:8765/sd/prometheus/sd-config?service=node-exporter + - url: http://[::1]:8765/sd/prometheus/sd-config?service=node-exporter - - job_name: 'smb' - http_sd_configs: - - url: http://[::1]:8765/sd/prometheus/sd-config?service=smb """).lstrip() @@ -1278,17 +1274,17 @@ class TestMonitoring: use_current_daemon_image=False, ) + @patch("cephadm.module.CephadmOrchestrator.get_unique_name") @patch("cephadm.serve.CephadmServe._run_cephadm") - @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1') + @patch("cephadm.module.CephadmOrchestrator._get_mgr_ips", lambda _: ['::1']) @patch("cephadm.services.monitoring.password_hash", lambda password: 'prometheus_password_hash') @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: 'cephadm_root_cert') @patch('cephadm.cert_mgr.CertMgr.generate_cert', lambda instance, fqdn, ip: ('mycert', 'mykey')) - def test_prometheus_config_security_enabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + def test_prometheus_config_security_enabled(self, _run_cephadm, _get_uname, cephadm_module: CephadmOrchestrator): _run_cephadm.side_effect = async_side_effect(('{}', '', 0)) + _get_uname.return_value = 'test' s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast') - - def gen_cert(host, addr): - return ('mycert', 'mykey') + smb_spec = SMBSpec(cluster_id='foxtrot', config_uri='rados://.smb/foxtrot/config.json',) with with_host(cephadm_module, 'test'): cephadm_module.secure_monitoring_stack = True @@ -1305,6 +1301,8 @@ class TestMonitoring: }, }) with with_service(cephadm_module, MonitoringSpec('node-exporter')) as _, \ + with_service(cephadm_module, smb_spec) as _, \ + with_service(cephadm_module, CephExporterSpec('ceph-exporter')) as _, \ with_service(cephadm_module, s) as _, \ with_service(cephadm_module, AlertManagerSpec('alertmanager')) as _, \ with_service(cephadm_module, IngressSpec(service_id='ingress', @@ -1345,7 +1343,7 @@ class TestMonitoring: tls_config: ca_file: root_cert.pem cert_file: prometheus.crt - key_file: prometheus.key + key_file: prometheus.key path_prefix: '/' http_sd_configs: - url: https://[::1]:8765/sd/prometheus/sd-config?service=alertmanager @@ -1355,7 +1353,7 @@ class TestMonitoring: tls_config: ca_file: root_cert.pem cert_file: prometheus.crt - key_file: prometheus.key + key_file: prometheus.key scrape_configs: - job_name: 'ceph' @@ -1369,18 +1367,20 @@ class TestMonitoring: scheme: https tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key honor_labels: true http_sd_configs: - - url: https://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus + - url: https://[::1]:8765/sd/prometheus/sd-config?service=ceph basic_auth: username: sd_user password: sd_password tls_config: ca_file: root_cert.pem cert_file: prometheus.crt - key_file: prometheus.key + key_file: prometheus.key - - job_name: 'node' + - job_name: 'ceph-exporter' relabel_configs: - source_labels: [__address__] target_label: cluster @@ -1389,18 +1389,19 @@ class TestMonitoring: tls_config: ca_file: root_cert.pem cert_file: prometheus.crt - key_file: prometheus.key + key_file: prometheus.key + honor_labels: true http_sd_configs: - - url: https://[::1]:8765/sd/prometheus/sd-config?service=node-exporter + - url: https://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter basic_auth: username: sd_user password: sd_password tls_config: ca_file: root_cert.pem cert_file: prometheus.crt - key_file: prometheus.key + key_file: prometheus.key - - job_name: 'haproxy' + - job_name: 'ingress' relabel_configs: - source_labels: [__address__] target_label: cluster @@ -1408,70 +1409,51 @@ class TestMonitoring: scheme: https tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key + honor_labels: true http_sd_configs: - - url: https://[::1]:8765/sd/prometheus/sd-config?service=haproxy + - url: https://[::1]:8765/sd/prometheus/sd-config?service=ingress basic_auth: username: sd_user password: sd_password tls_config: ca_file: root_cert.pem cert_file: prometheus.crt - key_file: prometheus.key + key_file: prometheus.key - - job_name: 'ceph-exporter' + - job_name: 'node-exporter' relabel_configs: - source_labels: [__address__] target_label: cluster replacement: fsid - honor_labels: true - scheme: https - tls_config: - ca_file: root_cert.pem - http_sd_configs: - - url: https://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter - basic_auth: - username: sd_user - password: sd_password - tls_config: - ca_file: root_cert.pem - cert_file: prometheus.crt - key_file: prometheus.key - - - job_name: 'nvmeof' - honor_labels: true scheme: https tls_config: ca_file: root_cert.pem - http_sd_configs: - - url: https://[::1]:8765/sd/prometheus/sd-config?service=nvmeof - basic_auth: - username: sd_user - password: sd_password - tls_config: - ca_file: root_cert.pem - cert_file: prometheus.crt - key_file: prometheus.key - - - job_name: 'nfs' + cert_file: prometheus.crt + key_file: prometheus.key honor_labels: true - scheme: https - tls_config: - ca_file: root_cert.pem http_sd_configs: - - url: https://[::1]:8765/sd/prometheus/sd-config?service=nfs + - url: https://[::1]:8765/sd/prometheus/sd-config?service=node-exporter basic_auth: username: sd_user password: sd_password tls_config: ca_file: root_cert.pem cert_file: prometheus.crt - key_file: prometheus.key + key_file: prometheus.key - job_name: 'smb' - honor_labels: true + relabel_configs: + - source_labels: [__address__] + target_label: cluster + replacement: fsid scheme: https tls_config: ca_file: root_cert.pem + cert_file: prometheus.crt + key_file: prometheus.key + honor_labels: true http_sd_configs: - url: https://[::1]:8765/sd/prometheus/sd-config?service=smb basic_auth: @@ -1480,7 +1462,8 @@ class TestMonitoring: tls_config: ca_file: root_cert.pem cert_file: prometheus.crt - key_file: prometheus.key + key_file: prometheus.key + """).lstrip() @@ -1519,8 +1502,8 @@ class TestMonitoring: 'retention_time': '15d', 'retention_size': '0', 'ip_to_bind_to': '', - 'web_config': '/etc/prometheus/web.yml', - "use_url_prefix": False + "use_url_prefix": False, + 'web_config': '/etc/prometheus/web.yml' }, }), error_ok=True, -- 2.39.5