From: Kiefer Chang Date: Fri, 22 May 2020 06:27:56 +0000 (+0800) Subject: mgr/cephadm: config service_url in Dashboard for Prometheus and AlertManager X-Git-Tag: v16.1.0~2211^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a6135429b95a86aaae0626ddb9f53f0a6118aad7;p=ceph.git mgr/cephadm: config service_url in Dashboard for Prometheus and AlertManager Calling Dashboard's CLI to set service URLs after deploying Prometheus and AlertManager Daemons. Fixes: https://tracker.ceph.com/issues/45625 Signed-off-by: Kiefer Chang --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 9adb76c17eed1..6fc2f14025782 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -1,6 +1,7 @@ import json import errno import logging +from collections import defaultdict from threading import Event from functools import wraps @@ -30,7 +31,7 @@ from orchestrator import OrchestratorError, OrchestratorValidationError, HostSpe from . import remotes from . import utils from .services.cephadmservice import MonService, MgrService, MdsService, RgwService, \ - RbdMirrorService, CrashService + RbdMirrorService, CrashService, CephadmService from .services.iscsi import IscsiService from .services.nfs import NFSService from .services.osd import RemoveUtil, OSDRemoval, OSDService @@ -338,6 +339,21 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): self.node_exporter_service = NodeExporterService(self) self.crash_service = CrashService(self) self.iscsi_service = IscsiService(self) + self.cephadm_services = { + 'mon': self.mon_service, + 'mgr': self.mgr_service, + 'osd': self.osd_service, + 'mds': self.mds_service, + 'rgw': self.rgw_service, + 'rbd-mirror': self.rbd_mirror_service, + 'nfs': self.nfs_service, + 'grafana': self.grafana_service, + 'alertmanager': self.alertmanager_service, + 'prometheus': self.prometheus_service, + 'node-exporter': self.node_exporter_service, + 'crash': self.crash_service, + 'iscsi': self.iscsi_service, + } def shutdown(self): self.log.debug('shutdown') @@ -346,6 +362,10 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): self.run = False self.event.set() + def _get_cephadm_service(self, service_type: str) -> CephadmService: + assert service_type in ServiceSpec.KNOWN_SERVICE_TYPES + return self.cephadm_services[service_type] + def _kick_serve_loop(self): self.log.debug('_kick_serve_loop') self.event.set() @@ -1858,8 +1878,7 @@ you may want to run: last_monmap = None # just in case clocks are skewed daemons = self.cache.get_daemons() - grafanas = [] # type: List[orchestrator.DaemonDescription] - iscsi_daemons = [] + daemons_post = defaultdict(list) for dd in daemons: # orphan? spec = self.spec_store.specs.get(dd.service_name(), None) @@ -1873,12 +1892,10 @@ you may want to run: if spec and spec.unmanaged: continue - # dependencies? - if dd.daemon_type == 'grafana': - # put running instances at the front of the list - grafanas.insert(0, dd) - elif dd.daemon_type == 'iscsi': - iscsi_daemons.append(dd) + # These daemon types require additional configs after creation + if dd.daemon_type in ['grafana', 'iscsi', 'prometheus', 'alertmanager']: + daemons_post[dd.daemon_type].append(dd) + deps = self._calc_daemon_deps(dd.daemon_type, dd.daemon_id) last_deps, last_config = self.cache.get_daemon_last_config_deps( dd.hostname, dd.name()) @@ -1904,10 +1921,9 @@ you may want to run: self._create_daemon(dd.daemon_type, dd.daemon_id, dd.hostname, reconfig=True) - if grafanas: - self.grafana_service.daemon_check_post(grafanas) - if iscsi_daemons: - self.iscsi_service.daemon_check_post(iscsi_daemons) + # do daemon post actions + for daemon_type, daemon_descs in daemons_post.items(): + self._get_cephadm_service(daemon_type).daemon_check_post(daemon_descs) def _add_daemon(self, daemon_type, spec, create_func: Callable[..., T], config_func=None) -> List[T]: diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index 5e331d619809a..3755ea1b6e168 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -1,6 +1,8 @@ import logging from typing import TYPE_CHECKING, List +from mgr_module import MonCommandFailed + from ceph.deployment.service_spec import ServiceSpec, RGWSpec from orchestrator import OrchestratorError, DaemonDescription from cephadm import utils @@ -22,6 +24,38 @@ class CephadmService: """The post actions needed to be done after daemons are checked""" raise NotImplementedError() + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + raise NotImplementedError() + + def _inventory_get_addr(self, hostname: str): + """Get a host's address with its hostname.""" + return self.mgr.inventory.get_addr(hostname) + + def _set_service_url_on_dashboard(self, + service_name: str, + get_mon_cmd: str, + set_mon_cmd: str, + service_url: str): + """A helper to get and set service_url via Dashboard's MON command.""" + try: + _, out, _ = self.mgr.check_mon_command({ + 'prefix': get_mon_cmd + }) + except MonCommandFailed as e: + logger.warning('Failed to get service URL for %s: %s', service_name, e) + return + if out.strip() != service_url: + try: + logger.info( + 'Setting service URL %s for %s in the Dashboard', service_url, service_name) + _, out, _ = self.mgr.check_mon_command({ + 'prefix': set_mon_cmd, + 'value': service_url, + }) + except MonCommandFailed as e: + logger.warning('Failed to set service URL %s for %s in the Dashboard: %s', + service_url, service_name, e) + class MonService(CephadmService): def create(self, name, host, network): diff --git a/src/pybind/mgr/cephadm/services/iscsi.py b/src/pybind/mgr/cephadm/services/iscsi.py index 18bf1699f783b..0e5a3a3d1185e 100644 --- a/src/pybind/mgr/cephadm/services/iscsi.py +++ b/src/pybind/mgr/cephadm/services/iscsi.py @@ -93,7 +93,7 @@ class IscsiService(CephadmService): logger.warning( 'Unable to add iSCSI gateway to the Dashboard for %s: %s', dd, reason) continue - host = self.mgr.inventory.get_addr(dd.hostname) + host = self._inventory_get_addr(dd.hostname) service_url = 'http://{}:{}@{}:{}'.format( spec.api_user, spec.api_password, host, spec.api_port or '5000') gw = gateways.get(dd.hostname) diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index 73d52a2f0a1b7..dc20ac9e0e46d 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -9,6 +9,8 @@ from mgr_util import verify_tls, ServerConfigException, create_self_signed_cert logger = logging.getLogger(__name__) class GrafanaService(CephadmService): + DEFAULT_SERVICE_PORT = 3000 + def create(self, daemon_id, host): # type: (str, str) -> str return self.mgr._create_daemon('grafana', daemon_id, host) @@ -92,12 +94,12 @@ datasources: protocol = https cert_file = /etc/grafana/certs/cert_file cert_key = /etc/grafana/certs/cert_key - http_port = 3000 + http_port = {} [security] admin_user = admin admin_password = admin allow_embedding = true -""", +""".format(self.DEFAULT_SERVICE_PORT), 'provisioning/datasources/ceph-dashboard.yml': generate_grafana_ds_config(prom_services), 'certs/cert_file': '# generated by cephadm\n%s' % cert, 'certs/cert_key': '# generated by cephadm\n%s' % pkey, @@ -105,21 +107,25 @@ datasources: } return config_file, sorted(deps) - def daemon_check_post(self, daemon_descrs: List[DaemonDescription]): - # make sure the dashboard [does not] references grafana - try: - current_url = self.mgr.get_module_option_ex('dashboard', 'GRAFANA_API_URL') - host = daemon_descrs[0].hostname - url = f'https://{self.mgr.inventory.get_addr(host)}:3000' - if current_url != url: - logger.info('Setting dashboard grafana config to %s' % url) - self.mgr.set_module_option_ex('dashboard', 'GRAFANA_API_URL', url) - # FIXME: is it a signed cert?? - except Exception as e: - logger.debug('got exception fetching dashboard grafana state: %s', e) + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + # Use the least-created one as the active daemon + return daemon_descrs[-1] + def daemon_check_post(self, daemon_descrs: List[DaemonDescription]): + # TODO: signed cert + dd = self.get_active_daemon(daemon_descrs) + service_url = 'https://{}:{}'.format( + self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT) + self._set_service_url_on_dashboard( + 'Grafana', + 'dashboard get-grafana-api-url', + 'dashboard set-grafana-api-url', + service_url + ) class AlertmanagerService(CephadmService): + DEFAULT_SERVICE_PORT = 9093 + def create(self, daemon_id, host) -> str: return self.mgr._create_daemon('alertmanager', daemon_id, host) @@ -185,8 +191,24 @@ receivers: "peers": peers }, sorted(deps) + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + # TODO: if there are multiple daemons, who is the active one? + return daemon_descrs[0] + + def daemon_check_post(self, daemon_descrs: List[DaemonDescription]): + dd = self.get_active_daemon(daemon_descrs) + service_url = 'http://{}:{}'.format(self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT) + self._set_service_url_on_dashboard( + 'AlertManager', + 'dashboard get-alertmanager-api-host', + 'dashboard set-alertmanager-api-host', + service_url + ) + class PrometheusService(CephadmService): + DEFAULT_SERVICE_PORT = 9095 + def create(self, daemon_id, host) -> str: return self.mgr._create_daemon('prometheus', daemon_id, host) @@ -283,6 +305,20 @@ scrape_configs: return r, sorted(deps) + def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: + # TODO: if there are multiple daemons, who is the active one? + return daemon_descrs[0] + + def daemon_check_post(self, daemon_descrs: List[DaemonDescription]): + dd = self.get_active_daemon(daemon_descrs) + service_url = 'http://{}:{}'.format( + self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT) + self._set_service_url_on_dashboard( + 'Prometheus', + 'dashboard get-prometheus-api-host', + 'dashboard set-prometheus-api-host', + service_url + ) class NodeExporterService(CephadmService): def create(self, daemon_id, host) -> str: diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py new file mode 100644 index 0000000000000..de0f9ea059c85 --- /dev/null +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -0,0 +1,33 @@ +from unittest.mock import MagicMock + +from cephadm.services.cephadmservice import CephadmService + + +class FakeMgr: + def __init__(self): + self.config = '' + self.check_mon_command = MagicMock(side_effect=self._check_mon_command) + + def _check_mon_command(self, cmd_dict): + prefix = cmd_dict.get('prefix') + if prefix == 'get-cmd': + return 0, self.config, '' + if prefix == 'set-cmd': + self.config = cmd_dict.get('value') + return 0, 'value set', '' + return -1, '', 'error' + + +class TestCephadmService: + def test_set_service_url_on_dashboard(self): + # pylint: disable=protected-access + mgr = FakeMgr() + service_url = 'http://svc:1000' + service = CephadmService(mgr) + service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) + assert mgr.config == service_url + + # set-cmd should not be called if value doesn't change + mgr.check_mon_command.reset_mock() + service._set_service_url_on_dashboard('svc', 'get-cmd', 'set-cmd', service_url) + mgr.check_mon_command.assert_called_once_with({'prefix': 'get-cmd'})