mgr/prometheus: add ceph_disk_occupation series

author John Spray <john.spray@redhat.com>

Thu, 28 Sep 2017 14:10:14 +0000 (10:10 -0400)

committer John Spray <john.spray@redhat.com>

Wed, 1 Nov 2017 23:03:26 +0000 (23:03 +0000)
author John Spray <john.spray@redhat.com>
Thu, 28 Sep 2017 14:10:14 +0000 (10:10 -0400)
committer John Spray <john.spray@redhat.com>
Wed, 1 Nov 2017 23:03:26 +0000 (23:03 +0000)
diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py

index 2b22b67e8c11916b7d3fbc36e1b11ea78f94a802..162aadc91d81df4ef79e16e0f5038e12d1ed4e22 100644 (file)
--- a/src/pybind/mgr/prometheus/module.py
+++ b/src/pybind/mgr/prometheus/module.py
@@ -83,6 +83,8 @@ OSD_METADATA = ('cluster_addr', 'device_class', 'id', 'public_addr', 'weight')
  
  POOL_METADATA = ('pool_id', 'name')
  
+DISK_OCCUPATION = ('instance', 'device', 'ceph_daemon')
+
  
  class Metric(object):
      def __init__(self, mtype, name, desc, labels=None):
@@ -184,6 +186,17 @@ class Module(MgrModule):
              'OSD Metadata',
              OSD_METADATA
          )
+
+        # The reason for having this separate to OSD_METADATA is
+        # so that we can stably use the same tag names that
+        # the Prometheus node_exporter does
+        metrics['disk_occupation'] = Metric(
+            'undef',
+            'disk_occupation',
+            'Associate Ceph daemon with disk used',
+            DISK_OCCUPATION
+        )
+
          metrics['pool_metadata'] = Metric(
              'untyped',
              'pool_metadata',
@@ -258,15 +271,15 @@ class Module(MgrModule):
              if state not in reported_states:
                  self.metrics[path].set(0)
  
-    def get_metadata(self):
+    def get_osd_metadata(self):
          osd_map = self.get('osd_map')
-        osd_dev = self.get('osd_map_crush')['devices']
+        osd_devices = self.get('osd_map_crush')['devices']
          for osd in osd_map['osds']:
              id_ = osd['osd']
              p_addr = osd['public_addr']
              c_addr = osd['cluster_addr']
              w = osd['weight']
-            dev_class = next((osd for osd in osd_dev if osd['id'] == id_))
+            dev_class = next((osd for osd in osd_devices if osd['id'] == id_))
              self.metrics['osd_metadata'].set(0, (
                  c_addr,
                  dev_class['class'],
@@ -275,6 +288,27 @@ class Module(MgrModule):
                  w
              ))
  
+            osd_metadata = self.get_metadata("osd", str(id_))
+            dev_keys = ("backend_filestore_dev_node", "bluestore_bdev_dev_node")
+            osd_dev_node = None
+            for dev_key in dev_keys:
+                val = osd_metadata.get(dev_key, None)
+                if val and val != "unknown":
+                    osd_dev_node = val
+                    break
+            osd_hostname = osd_metadata.get('hostname', None)
+            if osd_dev_node and osd_hostname:
+                self.log.debug("Got dev for osd {0}: {1}/{2}".format(
+                    id_, osd_hostname, osd_dev_node))
+                self.metrics['disk_occupation'].set(0, (
+                    osd_hostname,
+                    osd_dev_node,
+                    "osd.{0}".format(id_)
+                ))
+            else:
+                self.log.info("Missing dev node metadata for osd {0}, skipping "
+                               "occupation record for this osd".format(id_))
+
          for pool in osd_map['pools']:
              id_ = pool['pool']
              name = pool['pool_name']
@@ -284,7 +318,7 @@ class Module(MgrModule):
          self.get_health()
          self.get_df()
          self.get_quorum_status()
-        self.get_metadata()
+        self.get_osd_metadata()
          self.get_pg_status()
  
          for daemon, counters in self.get_all_perf_counters().iteritems():
author	John Spray <john.spray@redhat.com>
	Thu, 28 Sep 2017 14:10:14 +0000 (10:10 -0400)
committer	John Spray <john.spray@redhat.com>
	Wed, 1 Nov 2017 23:03:26 +0000 (23:03 +0000)