From 801d3f670330499fb9cd5f8674678908f2115fe8 Mon Sep 17 00:00:00 2001 From: Patrick Seidensal Date: Fri, 24 Jul 2020 19:11:35 +0200 Subject: [PATCH] mgr/prometheus: introduce metric for collection time Introduces metric `prometheus_collect_duration_seconds` for the time it takes the Prometheus manager module to collect all the metrics. ``` ceph_prometheus_collect_duration_seconds_sum{method="get_health"} 0.0002613067626953125 ceph_prometheus_collect_duration_seconds_sum{method="get_pool_stats"} 0.0018298625946044922 ceph_prometheus_collect_duration_seconds_sum{method="get_df"} 0.0005767345428466797 ceph_prometheus_collect_duration_seconds_sum{method="get_fs"} 0.0010402202606201172 ceph_prometheus_collect_duration_seconds_sum{method="get_quorum_status"} 0.0007524490356445312 ceph_prometheus_collect_duration_seconds_sum{method="get_mgr_status"} 0.0035364627838134766 ceph_prometheus_collect_duration_seconds_sum{method="get_pg_status"} 0.00021266937255859375 ceph_prometheus_collect_duration_seconds_sum{method="get_osd_stats"} 0.0018737316131591797 ceph_prometheus_collect_duration_seconds_sum{method="get_metadata_and_osd_status"} 0.0032796859741210938 ceph_prometheus_collect_duration_seconds_sum{method="get_num_objects"} 0.00011086463928222656 ceph_prometheus_collect_duration_seconds_sum{method="get_rbd_stats"} 0.00036144256591796875 ceph_prometheus_collect_duration_seconds_count{method="get_health"} 1.0 ceph_prometheus_collect_duration_seconds_count{method="get_pool_stats"} 1.0 ceph_prometheus_collect_duration_seconds_count{method="get_df"} 1.0 ceph_prometheus_collect_duration_seconds_count{method="get_fs"} 1.0 ceph_prometheus_collect_duration_seconds_count{method="get_quorum_status"} 1.0 ceph_prometheus_collect_duration_seconds_count{method="get_mgr_status"} 1.0 ceph_prometheus_collect_duration_seconds_count{method="get_pg_status"} 1.0 ceph_prometheus_collect_duration_seconds_count{method="get_osd_stats"} 1.0 ceph_prometheus_collect_duration_seconds_count{method="get_metadata_and_osd_status"} 1.0 ceph_prometheus_collect_duration_seconds_count{method="get_num_objects"} 1.0 ceph_prometheus_collect_duration_seconds_count{method="get_rbd_stats"} 1.0 ``` Fixes: https://tracker.ceph.com/issues/46703 Signed-off-by: Patrick Seidensal --- src/pybind/mgr/prometheus/module.py | 47 +++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index 5400dbbf7e7c7..981a1ab89ba33 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -2,6 +2,7 @@ import cherrypy from collections import defaultdict from distutils.version import StrictVersion import json +from collections import defaultdict import errno import math import os @@ -184,6 +185,24 @@ class Metric(object): return expfmt +class MetricCounter(Metric): + def __init__(self, name, desc, labels=None): + super(MetricCounter, self).__init__('counter', name, desc, labels) + self.value = defaultdict(lambda: 0) + + def clear(self): + pass # Skip calls to clear as we want to keep the counters here. + + def set(self, value, labelvalues=None): + msg = 'This method must not be used for instances of MetricCounter class' + raise NotImplementedError(msg) + + def add(self, value, labelvalues=None): + # labelvalues must be a tuple + labelvalues = labelvalues or ('',) + self.value[labelvalues] += value + + class MetricCollectionThread(threading.Thread): def __init__(self, module): # type: (Module) -> None @@ -1104,6 +1123,32 @@ class Module(MgrModule): self.metrics.update(new_metrics) + def get_collect_time_metrics(self): + if 'prometheus_collect_duration_seconds_sum' not in self.metrics: + self.metrics['prometheus_collect_duration_seconds_sum'] = MetricCounter( + 'prometheus_collect_duration_seconds_sum', + 'The sum of seconds took to collect all metrics of this exporter', + ('method',), + ) + if 'prometheus_collect_duration_seconds_count' not in self.metrics: + self.metrics['prometheus_collect_duration_seconds_count'] = MetricCounter( + 'prometheus_collect_duration_seconds_count', + 'The amount of metrics gathered for this exporter', + ('method',), + ) + + # Collect all timing data and make it available as metric, excluding the + # `collect` method because it has not finished at this point and hence + # there's no `_execution_duration` attribute to be found. The + # `_execution_duration` attribute is added by the `profile_method` + # decorator. + for method_name, method in Module.__dict__.items(): + if hasattr(method, '_execution_duration'): + self.metrics['prometheus_collect_duration_seconds_sum'].add( + method._execution_duration, (method_name, )) + self.metrics['prometheus_collect_duration_seconds_count'].add( + 1, (method_name, )) + @profile_method(True) def collect(self): # Clear the metrics before scraping @@ -1170,6 +1215,8 @@ class Module(MgrModule): self.add_fixed_name_metrics() self.get_rbd_stats() + self.get_collect_time_metrics() + # Return formatted metrics and clear no longer used data _metrics = [m.str_expfmt() for m in self.metrics.values()] for k in self.metrics.keys(): -- 2.39.5