From: Paul Cuzner Date: Tue, 12 May 2020 02:16:32 +0000 (+1200) Subject: mgr/prometheus:add disk metadata to occupation metric X-Git-Tag: wip-pdonnell-testing-20200918.022351~592^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=422c828bd337a5a3da3b4d0aa7adf81324be669b;p=ceph-ci.git mgr/prometheus:add disk metadata to occupation metric This patch exposes the devices and device_ids meta data from the osd to the ceph_disk_occupation metric. This potentially allows osd alerts to merge with the occupation data to present a disk failure message that points to a specific device/disk serial. AWS, vmware and baremetal all populate both device_ids and devices metadata - but in the event either of these is empty we default to 'N/A' so at least the label is consistent. Signed-off-by: Paul Cuzner --- diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index 2cb7849577f..1df11213f22 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -104,7 +104,7 @@ RBD_MIRROR_METADATA = ('ceph_daemon', 'id', 'instance_id', 'hostname', 'ceph_version') DISK_OCCUPATION = ('ceph_daemon', 'device', 'db_device', - 'wal_device', 'instance') + 'wal_device', 'instance', 'devices', 'device_ids') NUM_OBJECTS = ['degraded', 'misplaced', 'unfound'] @@ -632,6 +632,11 @@ class Module(MgrModule): if osd_dev_node and osd_dev_node == "unknown": osd_dev_node = None + # fetch the devices and ids (vendor, model, serial) from the + # osd_metadata + osd_devs = osd_metadata.get('devices', '') or 'N/A' + osd_dev_ids = osd_metadata.get('device_ids', '') or 'N/A' + osd_hostname = osd_metadata.get('hostname', None) if osd_dev_node and osd_hostname: self.log.debug("Got dev for osd {0}: {1}/{2}".format( @@ -641,7 +646,9 @@ class Module(MgrModule): osd_dev_node, osd_db_dev_node, osd_wal_dev_node, - osd_hostname + osd_hostname, + osd_devs, + osd_dev_ids, )) else: self.log.info("Missing dev node metadata for osd {0}, skipping "