from cephadm.service_discovery import ServiceDiscovery
+from ceph.deployment.service_spec import PrometheusSpec
+
import string
from typing import List, Dict, Optional, Callable, Tuple, TypeVar, \
Any, Set, TYPE_CHECKING, cast, NamedTuple, Sequence, Type, \
self.set_store(PrometheusService.PASS_CFG_KEY, password)
return 'prometheus credentials updated correctly'
+ @handle_orch_error
+ def set_prometheus_target(self, url: str) -> str:
+ prometheus_spec = cast(PrometheusSpec, self.spec_store['prometheus'].spec)
+ if url not in prometheus_spec.targets:
+ prometheus_spec.targets.append(url)
+ else:
+ return f"Target '{url}' already exists.\n"
+ if not prometheus_spec:
+ return "Service prometheus not found\n"
+ daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_type('prometheus')
+ spec = ServiceSpec.from_json(prometheus_spec.to_json())
+ self.apply([spec], no_overwrite=False)
+ for daemon in daemons:
+ self.daemon_action(action='redeploy', daemon_name=daemon.daemon_name)
+ return 'prometheus multi-cluster targets updated'
+
+ @handle_orch_error
+ def remove_prometheus_target(self, url: str) -> str:
+ prometheus_spec = cast(PrometheusSpec, self.spec_store['prometheus'].spec)
+ if url in prometheus_spec.targets:
+ prometheus_spec.targets.remove(url)
+ else:
+ return f"Target '{url}' does not exist.\n"
+ if not prometheus_spec:
+ return "Service prometheus not found\n"
+ daemons: List[orchestrator.DaemonDescription] = self.cache.get_daemons_by_type('prometheus')
+ spec = ServiceSpec.from_json(prometheus_spec.to_json())
+ self.apply([spec], no_overwrite=False)
+ for daemon in daemons:
+ self.daemon_action(action='redeploy', daemon_name=daemon.daemon_name)
+ return 'prometheus multi-cluster targets updated'
+
@handle_orch_error
def set_alertmanager_access_info(self, user: str, password: str) -> str:
self.set_store(AlertmanagerService.USER_CFG_KEY, user)
assert self.TYPE == daemon_spec.daemon_type
spec = cast(PrometheusSpec, self.mgr.spec_store[daemon_spec.service_name].spec)
-
try:
retention_time = spec.retention_time if spec.retention_time else '15d'
except AttributeError:
retention_time = '15d'
+ try:
+ targets = spec.targets
+ except AttributeError:
+ logger.warning('Prometheus targets not found in the spec. Using empty list.')
+ targets = []
+
try:
retention_size = spec.retention_size if spec.retention_size else '0'
except AttributeError:
alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
+ FSID = self.mgr._cluster_fsid
# generate the prometheus configuration
context = {
'haproxy_sd_url': haproxy_sd_url,
'ceph_exporter_sd_url': ceph_exporter_sd_url,
'nvmeof_sd_url': nvmeof_sd_url,
+ 'external_prometheus_targets': targets,
+ 'cluster_fsid': FSID
}
ip_to_bind_to = ''
global:
scrape_interval: 10s
evaluation_interval: 10s
+{% if not secure_monitoring_stack %}
+ external_labels:
+ cluster: {{ cluster_fsid }}
+{% endif %}
+
rule_files:
- /etc/prometheus/alerting/*
ca_file: root_cert.pem
{% else %}
honor_labels: true
+ relabel_configs:
+ - source_labels: [__address__]
+ target_label: cluster
+ replacement: {{ cluster_fsid }}
http_sd_configs:
- url: {{ mgr_prometheus_sd_url }}
{% endif %}
{% else %}
http_sd_configs:
- url: {{ node_exporter_sd_url }}
+ relabel_configs:
+ - source_labels: [__address__]
+ target_label: cluster
+ replacement: {{ cluster_fsid }}
{% endif %}
{% endif %}
{% else %}
http_sd_configs:
- url: {{ haproxy_sd_url }}
+ relabel_configs:
+ - source_labels: [__address__]
+ target_label: cluster
+ replacement: {{ cluster_fsid }}
{% endif %}
{% endif %}
ca_file: root_cert.pem
{% else %}
honor_labels: true
+ relabel_configs:
+ - source_labels: [__address__]
+ target_label: cluster
+ replacement: {{ cluster_fsid }}
http_sd_configs:
- url: {{ ceph_exporter_sd_url }}
{% endif %}
- url: {{ nvmeof_sd_url }}
{% endif %}
{% endif %}
+
+{% if not secure_monitoring_stack %}
+ - job_name: 'federate'
+ scrape_interval: 15s
+ honor_labels: true
+ metrics_path: '/federate'
+ params:
+ 'match[]':
+ - '{job="ceph"}'
+ - '{job="node"}'
+ - '{job="haproxy"}'
+ - '{job="ceph-exporter"}'
+ static_configs:
+ - targets: {{ external_prometheus_targets }}
+{% endif %}
+
global:
scrape_interval: 10s
evaluation_interval: 10s
+ external_labels:
+ cluster: fsid
+
rule_files:
- /etc/prometheus/alerting/*
scrape_configs:
- job_name: 'ceph'
honor_labels: true
+ relabel_configs:
+ - source_labels: [__address__]
+ target_label: cluster
+ replacement: fsid
http_sd_configs:
- url: http://[::1]:8765/sd/prometheus/sd-config?service=mgr-prometheus
- job_name: 'node'
http_sd_configs:
- url: http://[::1]:8765/sd/prometheus/sd-config?service=node-exporter
+ relabel_configs:
+ - source_labels: [__address__]
+ target_label: cluster
+ replacement: fsid
- job_name: 'haproxy'
http_sd_configs:
- url: http://[::1]:8765/sd/prometheus/sd-config?service=haproxy
+ relabel_configs:
+ - source_labels: [__address__]
+ target_label: cluster
+ replacement: fsid
- job_name: 'ceph-exporter'
honor_labels: true
+ relabel_configs:
+ - source_labels: [__address__]
+ target_label: cluster
+ replacement: fsid
http_sd_configs:
- url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter
- job_name: 'nvmeof'
http_sd_configs:
- url: http://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
+
+ - job_name: 'federate'
+ scrape_interval: 15s
+ honor_labels: true
+ metrics_path: '/federate'
+ params:
+ 'match[]':
+ - '{job="ceph"}'
+ - '{job="node"}'
+ - '{job="haproxy"}'
+ - '{job="ceph-exporter"}'
+ static_configs:
+ - targets: []
""").lstrip()
_run_cephadm.assert_called_with(
global:
scrape_interval: 10s
evaluation_interval: 10s
+
rule_files:
- /etc/prometheus/alerting/*
password: sd_password
tls_config:
ca_file: root_cert.pem
+
""").lstrip()
_run_cephadm.assert_called_with(
"""set prometheus access information"""
raise NotImplementedError()
+ def set_prometheus_target(self, url: str) -> OrchResult[str]:
+ """set prometheus target for multi-cluster"""
+ raise NotImplementedError()
+
+ def remove_prometheus_target(self, url: str) -> OrchResult[str]:
+ """remove prometheus target for multi-cluster"""
+ raise NotImplementedError()
+
def get_alertmanager_access_info(self) -> OrchResult[Dict[str, str]]:
"""get alertmanager access information"""
raise NotImplementedError()
except ArgumentError as e:
return HandleCommandResult(-errno.EINVAL, "", (str(e)))
+ @_cli_write_command('orch prometheus set-target')
+ def _set_prometheus_target(self, url: str) -> HandleCommandResult:
+ completion = self.set_prometheus_target(url)
+ result = raise_if_exception(completion)
+ return HandleCommandResult(stdout=json.dumps(result))
+
+ @_cli_write_command('orch prometheus remove-target')
+ def _remove_prometheus_target(self, url: str) -> HandleCommandResult:
+ completion = self.remove_prometheus_target(url)
+ result = raise_if_exception(completion)
+ return HandleCommandResult(stdout=json.dumps(result))
+
@_cli_write_command('orch alertmanager set-credentials')
def _set_alertmanager_access_info(self, username: Optional[str] = None, password: Optional[str] = None, inbuf: Optional[str] = None) -> HandleCommandResult:
try:
unmanaged: bool = False,
preview_only: bool = False,
networks: Optional[List[str]] = None,
+ targets: Optional[List[str]] = None,
extra_container_args: Optional[GeneralArgList] = None,
extra_entrypoint_args: Optional[GeneralArgList] = None,
custom_configs: Optional[List[CustomConfig]] = None,
#: :ref:`cephadm-monitoring-networks-ports`,
#: :ref:`cephadm-rgw-networks` and :ref:`cephadm-mgr-networks`.
self.networks: List[str] = networks or []
+ self.targets: List[str] = targets or []
self.config: Optional[Dict[str, str]] = None
if config:
unmanaged: bool = False,
preview_only: bool = False,
port: Optional[int] = None,
+ targets: Optional[List[str]] = None,
extra_container_args: Optional[GeneralArgList] = None,
extra_entrypoint_args: Optional[GeneralArgList] = None,
custom_configs: Optional[List[CustomConfig]] = None,
preview_only=preview_only, config=config,
networks=networks, extra_container_args=extra_container_args,
extra_entrypoint_args=extra_entrypoint_args,
- custom_configs=custom_configs)
+ custom_configs=custom_configs, targets=targets)
self.service_type = service_type
self.port = port
port: Optional[int] = None,
retention_time: Optional[str] = None,
retention_size: Optional[str] = None,
+ targets: Optional[List[str]] = None,
extra_container_args: Optional[GeneralArgList] = None,
extra_entrypoint_args: Optional[GeneralArgList] = None,
custom_configs: Optional[List[CustomConfig]] = None,
super(PrometheusSpec, self).__init__(
'prometheus', service_id=service_id,
placement=placement, unmanaged=unmanaged,
- preview_only=preview_only, config=config, networks=networks, port=port,
+ preview_only=preview_only, config=config, networks=networks, port=port, targets=targets,
extra_container_args=extra_container_args, extra_entrypoint_args=extra_entrypoint_args,
custom_configs=custom_configs)