From 8ae471ec690ead631efe8f6828aaa3c8808f6f72 Mon Sep 17 00:00:00 2001 From: Paul Cuzner Date: Tue, 4 Jun 2019 10:16:10 +1200 Subject: [PATCH] Add mgr metdata to prometheus exporter module Add's metadata and status information for the mgr daemon(s), together with the status of the mgr modules. Signed-off-by: Paul Cuzner --- src/pybind/mgr/prometheus/module.py | 77 +++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index fb7213262b2b3..cf29464e06dab 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -75,6 +75,14 @@ MDS_METADATA = ('ceph_daemon', 'fs_id', 'hostname', 'public_addr', 'rank', MON_METADATA = ('ceph_daemon', 'hostname', 'public_addr', 'rank', 'ceph_version') +MGR_METADATA = ('ceph_daemon', 'hostname', 'ceph_version') + +MGR_STATUS = ('ceph_daemon',) + +MGR_MODULE_STATUS = ('name',) + +MGR_MODULE_CAN_RUN = ('name',) + OSD_METADATA = ('back_iface', 'ceph_daemon', 'cluster_addr', 'device_class', 'front_iface', 'hostname', 'objectstore', 'public_addr', 'ceph_version') @@ -241,6 +249,30 @@ class Module(MgrModule): 'MON Metadata', MON_METADATA ) + metrics['mgr_metadata'] = Metric( + 'gauge', + 'mgr_metadata', + 'MGR metadata', + MGR_METADATA + ) + metrics['mgr_status'] = Metric( + 'gauge', + 'mgr_status', + 'MGR status (0=standby, 1=active)', + MGR_STATUS + ) + metrics['mgr_module_status'] = Metric( + 'gauge', + 'mgr_module_status', + 'MGR module status (0=disabled, 1=enabled, 2=auto-enabled)', + MGR_MODULE_STATUS + ) + metrics['mgr_module_can_run'] = Metric( + 'gauge', + 'mgr_module_can_run', + 'MGR module runnable state i.e. can it run (0=no, 1=yes)', + MGR_MODULE_CAN_RUN + ) metrics['osd_metadata'] = Metric( 'untyped', 'osd_metadata', @@ -400,6 +432,50 @@ class Module(MgrModule): 'mon.{}'.format(id_), )) + def get_mgr_status(self): + mgr_map = self.get('mgr_map') + servers = self.get_service_list() + + active = mgr_map['active_name'] + standbys = [s.get('name') for s in mgr_map['standbys']] + + all_mgrs = list(standbys) + all_mgrs.append(active) + + all_modules = {module.get('name'):module.get('can_run') for module in mgr_map['available_modules']} + + for mgr in all_mgrs: + host_version = servers.get((mgr, 'mgr'), ('', '')) + if mgr == active: + _state = 1 + ceph_release = host_version[1].split()[-2] # e.g. nautilus + else: + _state = 0 + + self.metrics['mgr_metadata'].set(1, ( + 'mgr.{}'.format(mgr), host_version[0], + host_version[1] + )) + self.metrics['mgr_status'].set(_state, ( + 'mgr.{}'.format(mgr), + )) + always_on_modules = mgr_map['always_on_modules'][ceph_release] + active_modules = list(always_on_modules) + active_modules.extend(mgr_map['modules']) + + for mod_name in all_modules.keys(): + + if mod_name in always_on_modules: + _state = 2 + elif mod_name in active_modules: + _state = 1 + else: + _state = 0 + + _can_run = 1 if all_modules[mod_name] else 0 + self.metrics['mgr_module_status'].set(_state, (mod_name,)) + self.metrics['mgr_module_can_run'].set(_can_run, (mod_name,)) + def get_pg_status(self): # TODO add per pool status? pg_status = self.get('pg_status') @@ -808,6 +884,7 @@ class Module(MgrModule): self.get_fs() self.get_osd_stats() self.get_quorum_status() + self.get_mgr_status() self.get_metadata_and_osd_status() self.get_pg_status() self.get_num_objects() -- 2.39.5