]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/prometheus: introduce metric for collection time
authorPatrick Seidensal <pseidensal@suse.com>
Fri, 24 Jul 2020 17:11:35 +0000 (19:11 +0200)
committerPatrick Seidensal <pseidensal@suse.com>
Thu, 25 Feb 2021 14:45:51 +0000 (15:45 +0100)
Introduces metric `prometheus_collect_duration_seconds` for the time it
takes the Prometheus manager module to collect all the metrics.

```
ceph_prometheus_collect_duration_seconds_sum{method="get_health"} 0.0002613067626953125
ceph_prometheus_collect_duration_seconds_sum{method="get_pool_stats"} 0.0018298625946044922
ceph_prometheus_collect_duration_seconds_sum{method="get_df"} 0.0005767345428466797
ceph_prometheus_collect_duration_seconds_sum{method="get_fs"} 0.0010402202606201172
ceph_prometheus_collect_duration_seconds_sum{method="get_quorum_status"} 0.0007524490356445312
ceph_prometheus_collect_duration_seconds_sum{method="get_mgr_status"} 0.0035364627838134766
ceph_prometheus_collect_duration_seconds_sum{method="get_pg_status"} 0.00021266937255859375
ceph_prometheus_collect_duration_seconds_sum{method="get_osd_stats"} 0.0018737316131591797
ceph_prometheus_collect_duration_seconds_sum{method="get_metadata_and_osd_status"} 0.0032796859741210938
ceph_prometheus_collect_duration_seconds_sum{method="get_num_objects"} 0.00011086463928222656
ceph_prometheus_collect_duration_seconds_sum{method="get_rbd_stats"} 0.00036144256591796875
ceph_prometheus_collect_duration_seconds_count{method="get_health"} 1.0
ceph_prometheus_collect_duration_seconds_count{method="get_pool_stats"} 1.0
ceph_prometheus_collect_duration_seconds_count{method="get_df"} 1.0
ceph_prometheus_collect_duration_seconds_count{method="get_fs"} 1.0
ceph_prometheus_collect_duration_seconds_count{method="get_quorum_status"} 1.0
ceph_prometheus_collect_duration_seconds_count{method="get_mgr_status"} 1.0
ceph_prometheus_collect_duration_seconds_count{method="get_pg_status"} 1.0
ceph_prometheus_collect_duration_seconds_count{method="get_osd_stats"} 1.0
ceph_prometheus_collect_duration_seconds_count{method="get_metadata_and_osd_status"} 1.0
ceph_prometheus_collect_duration_seconds_count{method="get_num_objects"} 1.0
ceph_prometheus_collect_duration_seconds_count{method="get_rbd_stats"} 1.0
```

Fixes: https://tracker.ceph.com/issues/46703
Signed-off-by: Patrick Seidensal <pseidensal@suse.com>
src/pybind/mgr/prometheus/module.py

index 5400dbbf7e7c7879e58cc7e38c43b11d4855915c..981a1ab89ba3314ba8bf2113e97ccc3688b96431 100644 (file)
@@ -2,6 +2,7 @@ import cherrypy
 from collections import defaultdict
 from distutils.version import StrictVersion
 import json
+from collections import defaultdict
 import errno
 import math
 import os
@@ -184,6 +185,24 @@ class Metric(object):
         return expfmt
 
 
+class MetricCounter(Metric):
+    def __init__(self, name, desc, labels=None):
+        super(MetricCounter, self).__init__('counter', name, desc, labels)
+        self.value = defaultdict(lambda: 0)
+
+    def clear(self):
+        pass  # Skip calls to clear as we want to keep the counters here.
+
+    def set(self, value, labelvalues=None):
+        msg = 'This method must not be used for instances of MetricCounter class'
+        raise NotImplementedError(msg)
+
+    def add(self, value, labelvalues=None):
+        # labelvalues must be a tuple
+        labelvalues = labelvalues or ('',)
+        self.value[labelvalues] += value
+
+
 class MetricCollectionThread(threading.Thread):
     def __init__(self, module):
         # type: (Module) -> None
@@ -1104,6 +1123,32 @@ class Module(MgrModule):
 
         self.metrics.update(new_metrics)
 
+    def get_collect_time_metrics(self):
+        if 'prometheus_collect_duration_seconds_sum' not in self.metrics:
+            self.metrics['prometheus_collect_duration_seconds_sum'] = MetricCounter(
+                'prometheus_collect_duration_seconds_sum',
+                'The sum of seconds took to collect all metrics of this exporter',
+                ('method',),
+            )
+        if 'prometheus_collect_duration_seconds_count' not in self.metrics:
+            self.metrics['prometheus_collect_duration_seconds_count'] = MetricCounter(
+                'prometheus_collect_duration_seconds_count',
+                'The amount of metrics gathered for this exporter',
+                ('method',),
+            )
+
+        # Collect all timing data and make it available as metric, excluding the
+        # `collect` method because it has not finished at this point and hence
+        # there's no `_execution_duration` attribute to be found. The
+        # `_execution_duration` attribute is added by the `profile_method`
+        # decorator.
+        for method_name, method in Module.__dict__.items():
+            if hasattr(method, '_execution_duration'):
+                self.metrics['prometheus_collect_duration_seconds_sum'].add(
+                    method._execution_duration, (method_name, ))
+                self.metrics['prometheus_collect_duration_seconds_count'].add(
+                    1, (method_name, ))
+
     @profile_method(True)
     def collect(self):
         # Clear the metrics before scraping
@@ -1170,6 +1215,8 @@ class Module(MgrModule):
         self.add_fixed_name_metrics()
         self.get_rbd_stats()
 
+        self.get_collect_time_metrics()
+
         # Return formatted metrics and clear no longer used data
         _metrics = [m.str_expfmt() for m in self.metrics.values()]
         for k in self.metrics.keys():