From b2a1a21602b228cf90137c3effd74916e6b847b5 Mon Sep 17 00:00:00 2001 From: Jan Fajerski Date: Fri, 20 Apr 2018 10:21:42 +0200 Subject: [PATCH] pybind/mgr/prometheus: unify label names, move away from "id" This commit makes label names that are supposed to be used for matching consistent. Prometheus can not match on say osd_metadata{id=1} and osd_up{ceph_daemon=osd.1}. We want to use the label ceph_daemon=osd.1 on all metrics. Signed-off-by: Jan Fajerski --- src/pybind/mgr/prometheus/module.py | 35 ++++++++++++++++------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index c7daa128dd8af..4919955ca7e56 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -83,14 +83,15 @@ DF_POOL = ['max_avail', 'bytes_used', 'raw_bytes_used', 'objects', 'dirty', OSD_FLAGS = ('noup', 'nodown', 'noout', 'noin', 'nobackfill', 'norebalance', 'norecover', 'noscrub', 'nodeep-scrub') -FS_METADATA = ('data_pools', 'id', 'metadata_pool', 'name') +FS_METADATA = ('data_pools', 'fs_id', 'metadata_pool', 'name') -MDS_METADATA = ('id', 'fs', 'hostname', 'public_addr', 'rank', 'ceph_version') +MDS_METADATA = ('ceph_daemon', 'fs_id', 'hostname', 'public_addr', 'rank', + 'ceph_version') -MON_METADATA = ('id', 'hostname', 'public_addr', 'rank', 'ceph_version') +MON_METADATA = ('ceph_daemon', 'hostname', 'public_addr', 'rank', 'ceph_version') -OSD_METADATA = ('cluster_addr', 'device_class', 'id', 'hostname', 'public_addr', - 'ceph_version') +OSD_METADATA = ('ceph_daemon', 'cluster_addr', 'device_class', 'hostname', + 'public_addr', 'ceph_version') OSD_STATUS = ['weight', 'up', 'in'] @@ -98,9 +99,9 @@ OSD_STATS = ['apply_latency_ms', 'commit_latency_ms'] POOL_METADATA = ('pool_id', 'name') -RGW_METADATA = ('id', 'hostname', 'ceph_version') +RGW_METADATA = ('ceph_daemon', 'hostname', 'ceph_version') -DISK_OCCUPATION = ('instance', 'device', 'ceph_daemon') +DISK_OCCUPATION = ( 'ceph_daemon', 'device','instance') class Metrics(object): @@ -373,13 +374,14 @@ class Module(MgrModule): fs['id'], fs['mdsmap']['metadata_pool'], fs['mdsmap']['fs_name'])) + self.log.debug('mdsmap: {}'.format(fs['mdsmap'])) for gid, daemon in fs['mdsmap']['info'].items(): id_ = daemon['name'] host_version = servers.get((id_, 'mds'), ('','')) self.metrics.append('mds_metadata', 1, - (id_, fs['id'], host_version[0], - daemon['addr'], daemon['rank'], - host_version[1])) + ('mds.{}'.format(id_), fs['id'], + host_version[0], daemon['addr'], + daemon['rank'], host_version[1])) def get_quorum_status(self): mon_status = json.loads(self.get('mon_status')['json']) @@ -389,12 +391,12 @@ class Module(MgrModule): id_ = mon['name'] host_version = servers.get((id_, 'mon'), ('','')) self.metrics.append('mon_metadata', 1, - (id_, host_version[0], + ('mon.{}'.format(id_), host_version[0], mon['public_addr'].split(':')[0], rank, host_version[1])) in_quorum = int(rank in mon_status['quorum']) self.metrics.append('mon_quorum_status', in_quorum, - ('mon_{}'.format(id_),)) + ('mon.{}'.format(id_),)) def get_pg_status(self): # TODO add per pool status? @@ -477,9 +479,10 @@ class Module(MgrModule): host_version = servers.get((str(id_), 'osd'), ('','')) self.metrics.append('osd_metadata', 1, ( + 'osd.{}'.format(id_), c_addr, dev_class, - id_, host_version[0], + host_version[0], p_addr, host_version[1] )) @@ -505,9 +508,9 @@ class Module(MgrModule): self.log.debug("Got dev for osd {0}: {1}/{2}".format( id_, osd_hostname, osd_dev_node)) self.metrics.set('disk_occupation', 1, ( + "osd.{0}".format(id_), osd_hostname, - osd_dev_node, - "osd.{0}".format(id_) + osd_dev_node )) else: self.log.info("Missing dev node metadata for osd {0}, skipping " @@ -526,7 +529,7 @@ class Module(MgrModule): self.metrics.append( 'rgw_metadata', 1, - (service_id, hostname, version) + ('{}.{}'.format(service_type, service_id), hostname, version) ) def collect(self): -- 2.39.5