Add recovery metrics to prometheus exporter

author Paul Cuzner <pcuzner@redhat.com>

Mon, 11 Mar 2019 00:46:18 +0000 (13:46 +1300)

committer Paul Cuzner <pcuzner@redhat.com>

Mon, 11 Mar 2019 00:46:18 +0000 (13:46 +1300)
author Paul Cuzner <pcuzner@redhat.com>
Mon, 11 Mar 2019 00:46:18 +0000 (13:46 +1300)
committer Paul Cuzner <pcuzner@redhat.com>
Mon, 11 Mar 2019 00:46:18 +0000 (13:46 +1300)
diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py

index fb7213262b2b3fa0c8e259e3ce389bf50019d1a6..b7fecf8d85a9a64fc9734d82468f92e8bd1ed637 100644 (file)
--- a/src/pybind/mgr/prometheus/module.py
+++ b/src/pybind/mgr/prometheus/module.py
@@ -64,6 +64,10 @@ DF_CLUSTER = ['total_bytes', 'total_used_bytes', 'total_used_raw_bytes']
  DF_POOL = ['max_avail', 'stored', 'stored_raw', 'objects', 'dirty',
             'quota_bytes', 'quota_objects', 'rd', 'rd_bytes', 'wr', 'wr_bytes']
  
+OSD_POOL_STATS = ('recovering_objects_per_sec', 'recovering_bytes_per_sec',
+                  'recovering_keys_per_sec', 'num_objects_recovered',
+                  'num_bytes_recovered', 'num_bytes_recovered')
+
  OSD_FLAGS = ('noup', 'nodown', 'noout', 'noin', 'nobackfill', 'norebalance',
               'norecover', 'noscrub', 'nodeep-scrub')
  
@@ -285,6 +289,12 @@ class Module(MgrModule):
              'PG Total Count'
          )
  
+        metrics['scrape_duration_seconds'] = Metric(
+            'gauge',
+            'scrape_duration_secs',
+            'Time taken to gather metrics from Ceph (secs)'
+        )
+
          for flag in OSD_FLAGS:
              path = 'osd_flag_{}'.format(flag)
              metrics[path] = Metric(
@@ -308,6 +318,14 @@ class Module(MgrModule):
                  'OSD stat {}'.format(stat),
                  ('ceph_daemon',)
              )
+        for stat in OSD_POOL_STATS:
+            path = 'pool_{}'.format(stat)
+            metrics[path] = Metric(
+                'gauge',
+                path,
+                "OSD POOL STATS: {}".format(stat),
+                ('pool_id',)
+            )
          for state in PG_STATES:
              path = 'pg_{}'.format(state)
              metrics[path] = Metric(
@@ -346,6 +364,17 @@ class Module(MgrModule):
              health_status_to_number(health['status'])
          )
  
+    def get_pool_stats(self):
+        # retrieve pool stats to provide per pool recovery metrics
+        # (osd_pool_stats moved to mgr in Mimic)
+        pstats = self.get('osd_pool_stats')
+        for pool in pstats['pool_stats']:
+            for stat in OSD_POOL_STATS:
+                self.metrics['pool_{}'.format(stat)].set(
+                    pool['recovery_rate'].get(stat, 0),
+                    (pool['pool_id'],)
+                )
+
      def get_df(self):
          # maybe get the to-be-exported metrics from a config?
          df = self.get('df')
@@ -803,8 +832,11 @@ class Module(MgrModule):
          for k in self.metrics.keys():
              self.metrics[k].clear()
  
+        _start_time = time.time()
+
          self.get_health()
          self.get_df()
+        self.get_pool_stats()
          self.get_fs()
          self.get_osd_stats()
          self.get_quorum_status()
@@ -857,6 +889,9 @@ class Module(MgrModule):
  
          self.get_rbd_stats()
  
+        _end_time = time.time()
+        self.metrics['scrape_duration_seconds'].set(_end_time - _start_time)
+
          # Return formatted metrics and clear no longer used data
          _metrics = [m.str_expfmt() for m in self.metrics.values()]
          for k in self.metrics.keys():
author	Paul Cuzner <pcuzner@redhat.com>
	Mon, 11 Mar 2019 00:46:18 +0000 (13:46 +1300)
committer	Paul Cuzner <pcuzner@redhat.com>
	Mon, 11 Mar 2019 00:46:18 +0000 (13:46 +1300)