From: Sebastian Wagner Date: Mon, 4 May 2020 12:30:57 +0000 (+0200) Subject: mgr/cephadm: Add PrometheusService X-Git-Tag: v16.1.0~2396^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=27dc31ac84f781c43c87c8ea6e887cb832cf78ba;p=ceph.git mgr/cephadm: Add PrometheusService Signed-off-by: Sebastian Wagner --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 5c430476cd4c..a54f3fc1e375 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -40,7 +40,7 @@ from .services.cephadmservice import MonService, MgrService, MdsService, RgwServ RbdMirrorService from .services.nfs import NFSService from .services.osd import RemoveUtil, OSDRemoval, OSDService -from .services.monitoring import GrafanaService, AlertmanagerService +from .services.monitoring import GrafanaService, AlertmanagerService, PrometheusService from .inventory import Inventory, SpecStore, HostCache try: @@ -430,6 +430,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): self.rbd_mirror_service = RbdMirrorService(self) self.grafana_service = GrafanaService(self) self.alertmanager_service = AlertmanagerService(self) + self.prometheus_service = PrometheusService(self) def shutdown(self): self.log.debug('shutdown') @@ -1829,7 +1830,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): deps = [] # type: List[str] cephadm_config = {} # type: Dict[str, Any] if daemon_type == 'prometheus': - cephadm_config, deps = self._generate_prometheus_config() + cephadm_config, deps = self.prometheus_service.generate_config() extra_args.extend(['--config-json', '-']) elif daemon_type == 'grafana': cephadm_config, deps = self.grafana_service.generate_config() @@ -1941,7 +1942,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): 'nfs': self.nfs_service.create, 'grafana': self.grafana_service.create, 'alertmanager': self.alertmanager_service.create, - 'prometheus': self._create_prometheus, + 'prometheus': self.prometheus_service.create, 'node-exporter': self._create_node_exporter, 'crash': self._create_crash, 'iscsi': self._create_iscsi, @@ -2326,108 +2327,12 @@ api_secure = {api_secure} def apply_nfs(self, spec): return self._apply(spec) - def _generate_prometheus_config(self): - # type: () -> Tuple[Dict[str, Any], List[str]] - deps = [] # type: List[str] - - # scrape mgrs - mgr_scrape_list = [] - mgr_map = self.get('mgr_map') - port = None - t = mgr_map.get('services', {}).get('prometheus', None) - if t: - t = t.split('/')[2] - mgr_scrape_list.append(t) - port = '9283' - if ':' in t: - port = t.split(':')[1] - # scan all mgrs to generate deps and to get standbys too. - # assume that they are all on the same port as the active mgr. - for dd in self.cache.get_daemons_by_service('mgr'): - # we consider the mgr a dep even if the prometheus module is - # disabled in order to be consistent with _calc_daemon_deps(). - deps.append(dd.name()) - if not port: - continue - if dd.daemon_id == self.get_mgr_id(): - continue - addr = self.inventory.get_addr(dd.hostname) - mgr_scrape_list.append(addr.split(':')[0] + ':' + port) - - # scrape node exporters - node_configs = '' - for dd in self.cache.get_daemons_by_service('node-exporter'): - deps.append(dd.name()) - addr = self.inventory.get_addr(dd.hostname) - if not node_configs: - node_configs = """ - - job_name: 'node' - static_configs: -""" - node_configs += """ - targets: {} - labels: - instance: '{}' -""".format([addr.split(':')[0] + ':9100'], - dd.hostname) - - # scrape alert managers - alertmgr_configs = "" - alertmgr_targets = [] - for dd in self.cache.get_daemons_by_service('alertmanager'): - deps.append(dd.name()) - addr = self.inventory.get_addr(dd.hostname) - alertmgr_targets.append("'{}:9093'".format(addr.split(':')[0])) - if alertmgr_targets: - alertmgr_configs = """alerting: - alertmanagers: - - scheme: http - path_prefix: /alertmanager - static_configs: - - targets: [{}] -""".format(", ".join(alertmgr_targets)) - - # generate the prometheus configuration - r = { - 'files': { - 'prometheus.yml': """# generated by cephadm -global: - scrape_interval: 5s - evaluation_interval: 10s -rule_files: - - /etc/prometheus/alerting/* -{alertmgr_configs} -scrape_configs: - - job_name: 'ceph' - static_configs: - - targets: {mgr_scrape_list} - labels: - instance: 'ceph_cluster' -{node_configs} -""".format( - mgr_scrape_list=str(mgr_scrape_list), - node_configs=str(node_configs), - alertmgr_configs=str(alertmgr_configs) - ), - }, - } - - # include alerts, if present in the container - if os.path.exists(self.prometheus_alerts_path): - with open(self.prometheus_alerts_path, "r") as f: - alerts = f.read() - r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts - - return r, sorted(deps) - def _get_dashboard_url(self): # type: () -> str return self.get('mgr_map').get('services', {}).get('dashboard', '') def add_prometheus(self, spec): - return self._add_daemon('prometheus', spec, self._create_prometheus) - - def _create_prometheus(self, daemon_id, host): - return self._create_daemon('prometheus', daemon_id, host) + return self._add_daemon('prometheus', spec, self.prometheus_service.create) @trivial_completion def apply_prometheus(self, spec): diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index f05ff4fe005b..f96ea5a61431 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -1,4 +1,5 @@ import logging +import os from typing import List, Any, Tuple, Dict from cephadm.services.cephadmservice import CephadmService @@ -169,3 +170,101 @@ receivers: }, "peers": peers }, sorted(deps) + + +class PrometheusService(CephadmService): + def create(self, daemon_id, host) -> str: + return self.mgr._create_daemon('prometheus', daemon_id, host) + + def generate_config(self): + # type: () -> Tuple[Dict[str, Any], List[str]] + deps = [] # type: List[str] + + # scrape mgrs + mgr_scrape_list = [] + mgr_map = self.mgr.get('mgr_map') + port = None + t = mgr_map.get('services', {}).get('prometheus', None) + if t: + t = t.split('/')[2] + mgr_scrape_list.append(t) + port = '9283' + if ':' in t: + port = t.split(':')[1] + # scan all mgrs to generate deps and to get standbys too. + # assume that they are all on the same port as the active mgr. + for dd in self.mgr.cache.get_daemons_by_service('mgr'): + # we consider the mgr a dep even if the prometheus module is + # disabled in order to be consistent with _calc_daemon_deps(). + deps.append(dd.name()) + if not port: + continue + if dd.daemon_id == self.mgr.get_mgr_id(): + continue + addr = self.mgr.inventory.get_addr(dd.hostname) + mgr_scrape_list.append(addr.split(':')[0] + ':' + port) + + # scrape node exporters + node_configs = '' + for dd in self.mgr.cache.get_daemons_by_service('node-exporter'): + deps.append(dd.name()) + addr = self.mgr.inventory.get_addr(dd.hostname) + if not node_configs: + node_configs = """ + - job_name: 'node' + static_configs: +""" + node_configs += """ - targets: {} + labels: + instance: '{}' +""".format([addr.split(':')[0] + ':9100'], + dd.hostname) + + # scrape alert managers + alertmgr_configs = "" + alertmgr_targets = [] + for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): + deps.append(dd.name()) + addr = self.mgr.inventory.get_addr(dd.hostname) + alertmgr_targets.append("'{}:9093'".format(addr.split(':')[0])) + if alertmgr_targets: + alertmgr_configs = """alerting: + alertmanagers: + - scheme: http + path_prefix: /alertmanager + static_configs: + - targets: [{}] +""".format(", ".join(alertmgr_targets)) + + # generate the prometheus configuration + r = { + 'files': { + 'prometheus.yml': """# generated by cephadm +global: + scrape_interval: 5s + evaluation_interval: 10s +rule_files: + - /etc/prometheus/alerting/* +{alertmgr_configs} +scrape_configs: + - job_name: 'ceph' + static_configs: + - targets: {mgr_scrape_list} + labels: + instance: 'ceph_cluster' +{node_configs} +""".format( + mgr_scrape_list=str(mgr_scrape_list), + node_configs=str(node_configs), + alertmgr_configs=str(alertmgr_configs) + ), + }, + } + + # include alerts, if present in the container + if os.path.exists(self.mgr.prometheus_alerts_path): + with open(self.mgr.prometheus_alerts_path, "r") as f: + alerts = f.read() + r['files']['/etc/prometheus/alerting/ceph_alerts.yml'] = alerts + + return r, sorted(deps)