]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: config service_url in Dashboard for Prometheus and AlertManager
authorKiefer Chang <kiefer.chang@suse.com>
Fri, 22 May 2020 06:27:56 +0000 (14:27 +0800)
committerSebastian Wagner <sebastian.wagner@suse.com>
Tue, 2 Jun 2020 12:58:27 +0000 (14:58 +0200)
Calling Dashboard's CLI to set service URLs after deploying Prometheus
and AlertManager Daemons.

Fixes: https://tracker.ceph.com/issues/45625
Signed-off-by: Kiefer Chang <kiefer.chang@suse.com>
(cherry picked from commit a6135429b95a86aaae0626ddb9f53f0a6118aad7)

src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/services/cephadmservice.py
src/pybind/mgr/cephadm/services/iscsi.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/tests/test_services.py [new file with mode: 0644]

index 6834c68a6a0002081704db416d9435c101688f9c..7e45cf44b19c69bff55371099cd9f8f31f96442e 100644 (file)
@@ -1,6 +1,7 @@
 import json
 import errno
 import logging
+from collections import defaultdict
 from threading import Event
 from functools import wraps
 
@@ -30,7 +31,7 @@ from orchestrator import OrchestratorError, OrchestratorValidationError, HostSpe
 from . import remotes
 from . import utils
 from .services.cephadmservice import MonService, MgrService, MdsService, RgwService, \
-    RbdMirrorService, CrashService
+    RbdMirrorService, CrashService, CephadmService
 from .services.iscsi import IscsiService
 from .services.nfs import NFSService
 from .services.osd import RemoveUtil, OSDRemoval, OSDService
@@ -347,6 +348,21 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
         self.node_exporter_service = NodeExporterService(self)
         self.crash_service = CrashService(self)
         self.iscsi_service = IscsiService(self)
+        self.cephadm_services = {
+            'mon': self.mon_service,
+            'mgr': self.mgr_service,
+            'osd': self.osd_service,
+            'mds': self.mds_service,
+            'rgw': self.rgw_service,
+            'rbd-mirror': self.rbd_mirror_service,
+            'nfs': self.nfs_service,
+            'grafana': self.grafana_service,
+            'alertmanager': self.alertmanager_service,
+            'prometheus': self.prometheus_service,
+            'node-exporter': self.node_exporter_service,
+            'crash': self.crash_service,
+            'iscsi': self.iscsi_service,
+        }
 
     def shutdown(self):
         self.log.debug('shutdown')
@@ -355,6 +371,10 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
         self.run = False
         self.event.set()
 
+    def _get_cephadm_service(self, service_type: str) -> CephadmService:
+        assert service_type in ServiceSpec.KNOWN_SERVICE_TYPES
+        return self.cephadm_services[service_type]
+
     def _kick_serve_loop(self):
         self.log.debug('_kick_serve_loop')
         self.event.set()
@@ -1870,8 +1890,7 @@ you may want to run:
             last_monmap = None   # just in case clocks are skewed
 
         daemons = self.cache.get_daemons()
-        grafanas = []  # type: List[orchestrator.DaemonDescription]
-        iscsi_daemons = []
+        daemons_post = defaultdict(list)
         for dd in daemons:
             # orphan?
             spec = self.spec_store.specs.get(dd.service_name(), None)
@@ -1885,12 +1904,10 @@ you may want to run:
             if spec and spec.unmanaged:
                 continue
 
-            # dependencies?
-            if dd.daemon_type == 'grafana':
-                # put running instances at the front of the list
-                grafanas.insert(0, dd)
-            elif dd.daemon_type == 'iscsi':
-                iscsi_daemons.append(dd)
+            # These daemon types require additional configs after creation
+            if dd.daemon_type in ['grafana', 'iscsi', 'prometheus', 'alertmanager']:
+                daemons_post[dd.daemon_type].append(dd)
+
             deps = self._calc_daemon_deps(dd.daemon_type, dd.daemon_id)
             last_deps, last_config = self.cache.get_daemon_last_config_deps(
                 dd.hostname, dd.name())
@@ -1916,10 +1933,9 @@ you may want to run:
                 self._create_daemon(dd.daemon_type, dd.daemon_id,
                                     dd.hostname, reconfig=True)
 
-        if grafanas:
-            self.grafana_service.daemon_check_post(grafanas)
-        if iscsi_daemons:
-            self.iscsi_service.daemon_check_post(iscsi_daemons)
+        # do daemon post actions
+        for daemon_type, daemon_descs in daemons_post.items():
+            self._get_cephadm_service(daemon_type).daemon_check_post(daemon_descs)
 
     def _add_daemon(self, daemon_type, spec,
                     create_func: Callable[..., T], config_func=None) -> List[T]:
index 5e331d619809af7b67ad5ea9b166af6222d2be7b..3755ea1b6e1688cbd0c00cd91eb54f315b47074a 100644 (file)
@@ -1,6 +1,8 @@
 import logging
 from typing import TYPE_CHECKING, List
 
+from mgr_module import MonCommandFailed
+
 from ceph.deployment.service_spec import ServiceSpec, RGWSpec
 from orchestrator import OrchestratorError, DaemonDescription
 from cephadm import utils
@@ -22,6 +24,38 @@ class CephadmService:
         """The post actions needed to be done after daemons are checked"""
         raise NotImplementedError()
 
+    def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
+        raise NotImplementedError()
+
+    def _inventory_get_addr(self, hostname: str):
+        """Get a host's address with its hostname."""
+        return self.mgr.inventory.get_addr(hostname)
+
+    def _set_service_url_on_dashboard(self,
+                                      service_name: str,
+                                      get_mon_cmd: str,
+                                      set_mon_cmd: str,
+                                      service_url: str):
+        """A helper to get and set service_url via Dashboard's MON command."""
+        try:
+            _, out, _ = self.mgr.check_mon_command({
+                'prefix': get_mon_cmd
+            })
+        except MonCommandFailed as e:
+            logger.warning('Failed to get service URL for %s: %s', service_name, e)
+            return
+        if out.strip() != service_url:
+            try:
+                logger.info(
+                    'Setting service URL %s for %s in the Dashboard', service_url, service_name)
+                _, out, _ = self.mgr.check_mon_command({
+                    'prefix': set_mon_cmd,
+                    'value': service_url,
+                })
+            except MonCommandFailed as e:
+                logger.warning('Failed to set service URL %s for %s in the Dashboard: %s',
+                               service_url, service_name, e)
+
 
 class MonService(CephadmService):
     def create(self, name, host, network):
index 18bf1699f783b7767a6aaf5e286f8f0314d99127..0e5a3a3d1185e54bebdb6f5450c240a6680d9115 100644 (file)
@@ -93,7 +93,7 @@ class IscsiService(CephadmService):
                 logger.warning(
                     'Unable to add iSCSI gateway to the Dashboard for %s: %s', dd, reason)
                 continue
-            host = self.mgr.inventory.get_addr(dd.hostname)
+            host = self._inventory_get_addr(dd.hostname)
             service_url = 'http://{}:{}@{}:{}'.format(
                 spec.api_user, spec.api_password, host, spec.api_port or '5000')
             gw = gateways.get(dd.hostname)
index 73d52a2f0a1b724b0849fc9610678708cac37b36..dc20ac9e0e46df699b4d560bd61b0bfe0006e15d 100644 (file)
@@ -9,6 +9,8 @@ from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert
 logger = logging.getLogger(__name__)
 
 class GrafanaService(CephadmService):
+    DEFAULT_SERVICE_PORT = 3000
+
     def create(self, daemon_id, host):
         # type: (str, str) -> str
         return self.mgr._create_daemon('grafana', daemon_id, host)
@@ -92,12 +94,12 @@ datasources:
   protocol = https
   cert_file = /etc/grafana/certs/cert_file
   cert_key = /etc/grafana/certs/cert_key
-  http_port = 3000
+  http_port = {}
 [security]
   admin_user = admin
   admin_password = admin
   allow_embedding = true
-""",
+""".format(self.DEFAULT_SERVICE_PORT),
                 'provisioning/datasources/ceph-dashboard.yml': generate_grafana_ds_config(prom_services),
                 'certs/cert_file': '# generated by cephadm\n%s' % cert,
                 'certs/cert_key': '# generated by cephadm\n%s' % pkey,
@@ -105,21 +107,25 @@ datasources:
         }
         return config_file, sorted(deps)
 
-    def daemon_check_post(self, daemon_descrs: List[DaemonDescription]):
-        # make sure the dashboard [does not] references grafana
-        try:
-            current_url = self.mgr.get_module_option_ex('dashboard', 'GRAFANA_API_URL')
-            host = daemon_descrs[0].hostname
-            url = f'https://{self.mgr.inventory.get_addr(host)}:3000'
-            if current_url != url:
-                logger.info('Setting dashboard grafana config to %s' % url)
-                self.mgr.set_module_option_ex('dashboard', 'GRAFANA_API_URL', url)
-                # FIXME: is it a signed cert??
-        except Exception as e:
-            logger.debug('got exception fetching dashboard grafana state: %s', e)
+    def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
+        # Use the least-created one as the active daemon
+        return daemon_descrs[-1]
 
+    def daemon_check_post(self, daemon_descrs: List[DaemonDescription]):
+        # TODO: signed cert
+        dd = self.get_active_daemon(daemon_descrs)
+        service_url = 'https://{}:{}'.format(
+            self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
+        self._set_service_url_on_dashboard(
+            'Grafana',
+            'dashboard get-grafana-api-url',
+            'dashboard set-grafana-api-url',
+            service_url
+        )
 
 class AlertmanagerService(CephadmService):
+    DEFAULT_SERVICE_PORT = 9093
+
     def create(self, daemon_id, host) -> str:
         return self.mgr._create_daemon('alertmanager', daemon_id, host)
 
@@ -185,8 +191,24 @@ receivers:
             "peers": peers
         }, sorted(deps)
 
+    def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
+        # TODO: if there are multiple daemons, who is the active one?
+        return daemon_descrs[0]
+
+    def daemon_check_post(self, daemon_descrs: List[DaemonDescription]):
+        dd = self.get_active_daemon(daemon_descrs)
+        service_url = 'http://{}:{}'.format(self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
+        self._set_service_url_on_dashboard(
+            'AlertManager',
+            'dashboard get-alertmanager-api-host',
+            'dashboard set-alertmanager-api-host',
+            service_url
+        )
+
 
 class PrometheusService(CephadmService):
+    DEFAULT_SERVICE_PORT = 9095
+
     def create(self, daemon_id, host) -> str:
         return self.mgr._create_daemon('prometheus', daemon_id, host)
 
@@ -283,6 +305,20 @@ scrape_configs:
 
         return r, sorted(deps)
 
+    def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
+        # TODO: if there are multiple daemons, who is the active one?
+        return daemon_descrs[0]
+
+    def daemon_check_post(self, daemon_descrs: List[DaemonDescription]):
+        dd = self.get_active_daemon(daemon_descrs)
+        service_url = 'http://{}:{}'.format(
+            self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
+        self._set_service_url_on_dashboard(
+            'Prometheus',
+            'dashboard get-prometheus-api-host',
+            'dashboard set-prometheus-api-host',
+            service_url
+        )
 
 class NodeExporterService(CephadmService):
     def create(self, daemon_id, host) -> str:
diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py
new file mode 100644 (file)
index 0000000..de0f9ea
--- /dev/null
@@ -0,0 +1,33 @@
+from unittest.mock import MagicMock
+
+from cephadm.services.cephadmservice import CephadmService
+
+
+class FakeMgr:
+    def __init__(self):
+        self.config = ''
+        self.check_mon_command = MagicMock(side_effect=self._check_mon_command)
+
+    def _check_mon_command(self, cmd_dict):
+        prefix = cmd_dict.get('prefix')
+        if prefix == 'get-cmd':
+            return 0, self.config, ''
+        if prefix == 'set-cmd':
+            self.config = cmd_dict.get('value')
+            return 0, 'value set', ''
+        return -1, '', 'error'
+
+
+class TestCephadmService:
+    def test_set_service_url_on_dashboard(self):
+        # pylint: disable=protected-access
+        mgr = FakeMgr()
+        service_url = 'http://svc:1000'
+        service = CephadmService(mgr)
+        service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url)
+        assert mgr.config == service_url
+
+        # set-cmd should not be called if value doesn't change
+        mgr.check_mon_command.reset_mock()
+        service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url)
+        mgr.check_mon_command.assert_called_once_with({'prefix': 'get-cmd'})