From a7ab01b1dac36559a0f3461f9249328a909a8aa9 Mon Sep 17 00:00:00 2001 From: Sebastian Wagner Date: Mon, 4 May 2020 14:26:31 +0200 Subject: [PATCH] mgr/cephadm: Add AlertmanagerService Signed-off-by: Sebastian Wagner (cherry picked from commit d74f0e9949f72fd8c7793b4435d773d85b910f91) --- src/pybind/mgr/cephadm/module.py | 75 ++----------------- src/pybind/mgr/cephadm/services/monitoring.py | 67 +++++++++++++++++ 2 files changed, 72 insertions(+), 70 deletions(-) diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 3e89200d3b470..5c430476cd4c5 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -40,7 +40,7 @@ from .services.cephadmservice import MonService, MgrService, MdsService, RgwServ RbdMirrorService from .services.nfs import NFSService from .services.osd import RemoveUtil, OSDRemoval, OSDService -from .services.monitoring import GrafanaService +from .services.monitoring import GrafanaService, AlertmanagerService from .inventory import Inventory, SpecStore, HostCache try: @@ -429,6 +429,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): self.rgw_service = RgwService(self) self.rbd_mirror_service = RbdMirrorService(self) self.grafana_service = GrafanaService(self) + self.alertmanager_service = AlertmanagerService(self) def shutdown(self): self.log.debug('shutdown') @@ -1838,7 +1839,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): self.nfs_service._generate_nfs_config(daemon_type, daemon_id, host) extra_args.extend(['--config-json', '-']) elif daemon_type == 'alertmanager': - cephadm_config, deps = self._generate_alertmanager_config() + cephadm_config, deps = self.alertmanager_service.generate_config() extra_args.extend(['--config-json', '-']) else: # Ceph.daemons (mon, mgr, mds, osd, etc) @@ -1939,7 +1940,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): 'rbd-mirror': self.rbd_mirror_service.create, 'nfs': self.nfs_service.create, 'grafana': self.grafana_service.create, - 'alertmanager': self._create_alertmanager, + 'alertmanager': self.alertmanager_service.create, 'prometheus': self._create_prometheus, 'node-exporter': self._create_node_exporter, 'crash': self._create_crash, @@ -2422,68 +2423,6 @@ scrape_configs: # type: () -> str return self.get('mgr_map').get('services', {}).get('dashboard', '') - def _generate_alertmanager_config(self): - # type: () -> Tuple[Dict[str, Any], List[str]] - deps = [] # type: List[str] - - # dashboard(s) - dashboard_urls = [] - mgr_map = self.get('mgr_map') - port = None - proto = None # http: or https: - url = mgr_map.get('services', {}).get('dashboard', None) - if url: - dashboard_urls.append(url) - proto = url.split('/')[0] - port = url.split('/')[2].split(':')[1] - # scan all mgrs to generate deps and to get standbys too. - # assume that they are all on the same port as the active mgr. - for dd in self.cache.get_daemons_by_service('mgr'): - # we consider mgr a dep even if the dashboard is disabled - # in order to be consistent with _calc_daemon_deps(). - deps.append(dd.name()) - if not port: - continue - if dd.daemon_id == self.get_mgr_id(): - continue - addr = self.inventory.get_addr(dd.hostname) - dashboard_urls.append('%s//%s:%s/' % (proto, addr.split(':')[0], - port)) - - yml = """# generated by cephadm -# See https://prometheus.io/docs/alerting/configuration/ for documentation. - -global: - resolve_timeout: 5m - -route: - group_by: ['alertname'] - group_wait: 10s - group_interval: 10s - repeat_interval: 1h - receiver: 'ceph-dashboard' -receivers: -- name: 'ceph-dashboard' - webhook_configs: -{urls} -""".format( - urls='\n'.join( - [" - url: '{}api/prometheus_receiver'".format(u) - for u in dashboard_urls] - )) - peers = [] - port = '9094' - for dd in self.cache.get_daemons_by_service('alertmanager'): - deps.append(dd.name()) - addr = self.inventory.get_addr(dd.hostname) - peers.append(addr.split(':')[0] + ':' + port) - return { - "files": { - "alertmanager.yml": yml - }, - "peers": peers - }, sorted(deps) - def add_prometheus(self, spec): return self._add_daemon('prometheus', spec, self._create_prometheus) @@ -2534,16 +2473,12 @@ receivers: def add_alertmanager(self, spec): # type: (ServiceSpec) -> AsyncCompletion - return self._add_daemon('alertmanager', spec, self._create_alertmanager) + return self._add_daemon('alertmanager', spec, self.alertmanager_service.create) @trivial_completion def apply_alertmanager(self, spec: ServiceSpec): return self._apply(spec) - def _create_alertmanager(self, daemon_id, host): - return self._create_daemon('alertmanager', daemon_id, host) - - def _get_container_image_id(self, image_name): # pick a random host... host = None diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index f7d2fda4d7a0a..f05ff4fe005b0 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -102,3 +102,70 @@ datasources: } } return config_file, sorted(deps) + + +class AlertmanagerService(CephadmService): + def create(self, daemon_id, host) -> str: + return self.mgr._create_daemon('alertmanager', daemon_id, host) + + def generate_config(self): + # type: () -> Tuple[Dict[str, Any], List[str]] + deps = [] # type: List[str] + + # dashboard(s) + dashboard_urls = [] + mgr_map = self.mgr.get('mgr_map') + port = None + proto = None # http: or https: + url = mgr_map.get('services', {}).get('dashboard', None) + if url: + dashboard_urls.append(url) + proto = url.split('/')[0] + port = url.split('/')[2].split(':')[1] + # scan all mgrs to generate deps and to get standbys too. + # assume that they are all on the same port as the active mgr. + for dd in self.mgr.cache.get_daemons_by_service('mgr'): + # we consider mgr a dep even if the dashboard is disabled + # in order to be consistent with _calc_daemon_deps(). + deps.append(dd.name()) + if not port: + continue + if dd.daemon_id == self.mgr.get_mgr_id(): + continue + addr = self.mgr.inventory.get_addr(dd.hostname) + dashboard_urls.append('%s//%s:%s/' % (proto, addr.split(':')[0], + port)) + + yml = """# generated by cephadm +# See https://prometheus.io/docs/alerting/configuration/ for documentation. + +global: + resolve_timeout: 5m + +route: + group_by: ['alertname'] + group_wait: 10s + group_interval: 10s + repeat_interval: 1h + receiver: 'ceph-dashboard' +receivers: +- name: 'ceph-dashboard' + webhook_configs: +{urls} +""".format( + urls='\n'.join( + [" - url: '{}api/prometheus_receiver'".format(u) + for u in dashboard_urls] + )) + peers = [] + port = '9094' + for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): + deps.append(dd.name()) + addr = self.mgr.inventory.get_addr(dd.hostname) + peers.append(addr.split(':')[0] + ':' + port) + return { + "files": { + "alertmanager.yml": yml + }, + "peers": peers + }, sorted(deps) -- 2.39.5