From: John Spray Date: Thu, 28 Sep 2017 14:10:14 +0000 (-0400) Subject: mgr/prometheus: add ceph_disk_occupation series X-Git-Tag: v12.2.2~61^2~47 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8a9e1cb00110ed73f492f1f6cf23b3595756efc4;p=ceph.git mgr/prometheus: add ceph_disk_occupation series This is the magic series that enables consumers to easily get the drive stats that go with their OSD stats. Fixes: http://tracker.ceph.com/issues/21594 Signed-off-by: John Spray (cherry picked from commit 284be75524f7125dc1409b9c05fe47b37484964e) --- diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index 2b22b67e8c11..162aadc91d81 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -83,6 +83,8 @@ OSD_METADATA = ('cluster_addr', 'device_class', 'id', 'public_addr', 'weight') POOL_METADATA = ('pool_id', 'name') +DISK_OCCUPATION = ('instance', 'device', 'ceph_daemon') + class Metric(object): def __init__(self, mtype, name, desc, labels=None): @@ -184,6 +186,17 @@ class Module(MgrModule): 'OSD Metadata', OSD_METADATA ) + + # The reason for having this separate to OSD_METADATA is + # so that we can stably use the same tag names that + # the Prometheus node_exporter does + metrics['disk_occupation'] = Metric( + 'undef', + 'disk_occupation', + 'Associate Ceph daemon with disk used', + DISK_OCCUPATION + ) + metrics['pool_metadata'] = Metric( 'untyped', 'pool_metadata', @@ -258,15 +271,15 @@ class Module(MgrModule): if state not in reported_states: self.metrics[path].set(0) - def get_metadata(self): + def get_osd_metadata(self): osd_map = self.get('osd_map') - osd_dev = self.get('osd_map_crush')['devices'] + osd_devices = self.get('osd_map_crush')['devices'] for osd in osd_map['osds']: id_ = osd['osd'] p_addr = osd['public_addr'] c_addr = osd['cluster_addr'] w = osd['weight'] - dev_class = next((osd for osd in osd_dev if osd['id'] == id_)) + dev_class = next((osd for osd in osd_devices if osd['id'] == id_)) self.metrics['osd_metadata'].set(0, ( c_addr, dev_class['class'], @@ -275,6 +288,27 @@ class Module(MgrModule): w )) + osd_metadata = self.get_metadata("osd", str(id_)) + dev_keys = ("backend_filestore_dev_node", "bluestore_bdev_dev_node") + osd_dev_node = None + for dev_key in dev_keys: + val = osd_metadata.get(dev_key, None) + if val and val != "unknown": + osd_dev_node = val + break + osd_hostname = osd_metadata.get('hostname', None) + if osd_dev_node and osd_hostname: + self.log.debug("Got dev for osd {0}: {1}/{2}".format( + id_, osd_hostname, osd_dev_node)) + self.metrics['disk_occupation'].set(0, ( + osd_hostname, + osd_dev_node, + "osd.{0}".format(id_) + )) + else: + self.log.info("Missing dev node metadata for osd {0}, skipping " + "occupation record for this osd".format(id_)) + for pool in osd_map['pools']: id_ = pool['pool'] name = pool['pool_name'] @@ -284,7 +318,7 @@ class Module(MgrModule): self.get_health() self.get_df() self.get_quorum_status() - self.get_metadata() + self.get_osd_metadata() self.get_pg_status() for daemon, counters in self.get_all_perf_counters().iteritems():