"""
Retrieves the service discovery URLs for the services that require monitoring
+ Note: we always add the 'ceph' Prometheus target as it corresponds to the prometheus-mgr module target
+
Returns:
Dict[str, List[str]]: A dictionary where the keys represent service categories (e.g., "nfs", "node-exporterr") and
the values are a list of service-discovery URLs used to get the corresponding service targets.
"""
+
+ def sd_urls(svc: str, prefixes: List[str]) -> list[str]:
+ return [f'{p}/sd/prometheus/sd-config?service={svc}' for p in prefixes]
+
if mgmt_gw_enabled:
service_discovery_url_prefixes = [f'{self.mgr.get_mgmt_gw_internal_endpoint()}']
else:
protocol = 'https' if security_enabled else 'http'
service_discovery_url_prefixes = [f'{protocol}://{wrap_ipv6(ip)}:{port}'
for ip in self.mgr._get_mgr_ips()]
- return {
- service: [f'{prefix}/sd/prometheus/sd-config?service={service}' for prefix in service_discovery_url_prefixes]
- for service in service_registry.get_services_requiring_monitoring()
- if service == 'ceph'
- or bool(self.mgr.cache.get_daemons_by_service(service))
- or bool(self.mgr.cache.get_daemons_by_type(service))
- }
+
+ services_to_monitor = ['ceph', *(
+ s for s in service_registry.get_services_requiring_monitoring()
+ if self.mgr.cache.get_daemons_by_service(s) or self.mgr.cache.get_daemons_by_type(s)
+ )]
+
+ return {s: sd_urls(s, service_discovery_url_prefixes) for s in services_to_monitor}
def configure_alerts(self, r: Dict) -> None:
# include alerts, if present in the container
deps.append(f'alert-cred:{utils.md5_hash(alertmanager_user + alertmanager_password)}')
# Adding other services as deps (with corresponding justification):
- # ceph-exporter: scraping target
- # node-exporter: scraping target
- # ingress : scraping target
- # alert-manager: part of prometheus configuration
- # mgmt-gateway : since url_prefix depends on the existence of mgmt-gateway
+ # mgmt-gateway : url_prefix depends on the existence of mgmt-gateway
# oauth2-proxy : enbling basic-auth (or not) depends on the existence of 'oauth2-proxy'
- for svc in ['mgmt-gateway', 'oauth2-proxy', 'alertmanager', 'node-exporter', 'ceph-exporter', 'ingress']:
- deps.append(f'{svc}_configured:{bool(mgr.cache.get_daemons_by_service(svc))}')
+ prometheus_svc_deps = service_registry.get_services_requiring_monitoring() + ['mgmt-gateway', 'oauth2-proxy']
+ for svc in prometheus_svc_deps:
+ configured = bool(mgr.cache.get_daemons_by_service(svc)) or bool(mgr.cache.get_daemons_by_type(svc))
+ deps.append(f'{svc}_configured:{configured}')
if not mgmt_gw_enabled:
# Ceph mgrs are dependency because when mgmt-gateway is not enabled the service-discovery depends on mgrs ips
def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
def get_set_cmd_dicts(out: str) -> List[dict]:
- gateways = json.loads(out)['gateways']
- cmd_dicts = []
+ try:
+ gateways = json.loads(out).get('gateways', [])
+ except json.decoder.JSONDecodeError as e:
+ logger.error(f'Error while trying to parse gateways JSON: {e}')
+ return []
+
+ cmd_dicts = []
for dd in daemon_descrs:
spec = cast(NvmeofServiceSpec,
self.mgr.spec_store.all_specs.get(dd.service_name(), None))
def get_services_requiring_monitoring(self) -> List[str]:
"""Return a list with service types that requiere monitoring."""
services_to_monitor = [svc for svc in self._services if self._services[svc].needs_monitoring]
- services_to_monitor.append('ceph') # this is needed for mgr-prometheus targets
return sorted(services_to_monitor)
@patch("cephadm.module.CephadmOrchestrator._get_mgr_ips", lambda _: ['192.168.100.100', '::1'])
def test_prometheus_config_security_disabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
_run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+ pool = 'testpool'
+ group = 'mygroup'
s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast')
with with_host(cephadm_module, 'test'):
# host "test" needs to have networks for keepalive to be placed
keepalived_password='12345',
virtual_ip="1.2.3.4/32",
backend_service='rgw.foo')) as _, \
+ with_service(cephadm_module, NvmeofServiceSpec(service_id=f'{pool}.{group}',
+ group=group,
+ pool=pool)) as _, \
with_service(cephadm_module, PrometheusSpec('prometheus',
networks=['1.2.3.0/24'],
only_bind_port_on_networks=True)) as _:
- url: http://192.168.100.100:8765/sd/prometheus/sd-config?service=node-exporter
- url: http://[::1]:8765/sd/prometheus/sd-config?service=node-exporter
+ - job_name: 'nvmeof'
+ relabel_configs:
+ - source_labels: [__address__]
+ target_label: cluster
+ replacement: fsid
+ honor_labels: true
+ http_sd_configs:
+ - url: http://192.168.100.100:8765/sd/prometheus/sd-config?service=nvmeof
+ - url: http://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
+
""").lstrip()
def test_prometheus_config_security_enabled(self, _run_cephadm, _get_uname, cephadm_module: CephadmOrchestrator):
_run_cephadm.side_effect = async_side_effect(('{}', '', 0))
_get_uname.return_value = 'test'
+ pool = 'testpool'
+ group = 'mygroup'
s = RGWSpec(service_id="foo", placement=PlacementSpec(count=1), rgw_frontend_type='beast')
smb_spec = SMBSpec(cluster_id='foxtrot', config_uri='rados://.smb/foxtrot/config.json',)
keepalived_password='12345',
virtual_ip="1.2.3.4/32",
backend_service='rgw.foo')) as _, \
+ with_service(cephadm_module, NvmeofServiceSpec(service_id=f'{pool}.{group}',
+ group=group,
+ pool=pool)) as _, \
with_service(cephadm_module, PrometheusSpec('prometheus')) as _:
web_config = dedent("""
cert_file: prometheus.crt
key_file: prometheus.key
+ - job_name: 'nvmeof'
+ relabel_configs:
+ - source_labels: [__address__]
+ target_label: cluster
+ replacement: fsid
+ scheme: https
+ tls_config:
+ ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
+ honor_labels: true
+ http_sd_configs:
+ - url: https://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
+ basic_auth:
+ username: sd_user
+ password: sd_password
+ tls_config:
+ ca_file: root_cert.pem
+ cert_file: prometheus.crt
+ key_file: prometheus.key
+
- job_name: 'smb'
relabel_configs:
- source_labels: [__address__]