]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/prometheus: introduce fetch_perf_counters_metrics module option 49248/head
authorAvan Thakkar <athakkar@redhat.com>
Thu, 1 Dec 2022 06:06:56 +0000 (11:36 +0530)
committerAvan Thakkar <athakkar@redhat.com>
Tue, 4 Apr 2023 17:42:29 +0000 (23:12 +0530)
Fixes: https://tracker.ceph.com/issues/58164
Signed-off-by: Avan Thakkar <athakkar@redhat.com>
Introducing module option in prometheus module to enable/disable support for exporting ceph daemons
perf counters as prometheus metrics, by default this option will be disabled. The use case for this option
is in case if ceph-exporter deployment failed for any reason then user still can have option to fetch metrics
from promethues exporter.

doc/mgr/prometheus.rst
src/pybind/mgr/prometheus/module.py

index cb5053451053b0f6f22f7a66721f88e46e2db1b6..afbbdcf268e150244ab1e62a3e92c277500652b7 100644 (file)
@@ -39,6 +39,7 @@ Configuration
 .. confval:: rbd_stats_pools_refresh_interval
 .. confval:: standby_behaviour
 .. confval:: standby_error_status_code
+.. confval:: exclude_perf_counters
 
 By default the module will accept HTTP requests on port ``9283`` on all IPv4
 and IPv6 addresses on the host.  The port and listen address are both
@@ -184,6 +185,15 @@ Example to turn up the sync interval to 10 minutes::
 
   ceph config set mgr mgr/prometheus/rbd_stats_pools_refresh_interval 600
 
+Ceph daemon performance counters metrics
+-----------------------------------------
+
+With the introduction of ``ceph-exporter`` daemon, the prometheus module will no longer export Ceph daemon
+perf counters as prometheus metrics by default. However, one may re-enable exporting these metrics by setting
+the module option ``exclude_perf_counters`` to ``false``::
+
+    ceph config set mgr mgr/prometheus/exclude_perf_counters false
+
 Statistic names and labels
 ==========================
 
index 2689e69f1a9e22674660c970426a2dfe4aeeaf1d..ade068b0c368147b98eccc698caf16b88ee1a2f3 100644 (file)
@@ -598,6 +598,14 @@ class Module(MgrModule):
             min=400,
             max=599,
             runtime=True
+        ),
+        Option(
+            name='exclude_perf_counters',
+            type='bool',
+            default=True,
+            desc='Do not include perf-counters in the metrics output',
+            long_desc='Gathering perf-counters from a single Prometheus exporter can degrade ceph-mgr performance, especially in large clusters. Instead, Ceph-exporter daemons are now used by default for perf-counter gathering. This should only be disabled when no ceph-exporters are deployed.',
+            runtime=True
         )
     ]
 
@@ -1618,26 +1626,10 @@ class Module(MgrModule):
                 self.metrics[path].set(health_metric['value'], labelvalues=(
                     health_metric['type'], daemon_name,))
 
-    @profile_method(True)
-    def collect(self) -> str:
-        # Clear the metrics before scraping
-        for k in self.metrics.keys():
-            self.metrics[k].clear()
-
-        self.get_health()
-        self.get_df()
-        self.get_osd_blocklisted_entries()
-        self.get_pool_stats()
-        self.get_fs()
-        self.get_osd_stats()
-        self.get_quorum_status()
-        self.get_mgr_status()
-        self.get_metadata_and_osd_status()
-        self.get_pg_status()
-        self.get_pool_repaired_objects()
-        self.get_num_objects()
-        self.get_all_daemon_health_metrics()
-
+    def get_perf_counters(self) -> None:
+        """
+        Get the perf counters for all daemons
+        """
         for daemon, counters in self.get_all_perf_counters().items():
             for path, counter_info in counters.items():
                 # Skip histograms, they are represented by long running avgs
@@ -1664,7 +1656,6 @@ class Module(MgrModule):
                             label_names,
                         )
                     self.metrics[_path].set(value, labels)
-
                     _path = path + '_count'
                     if _path not in self.metrics:
                         self.metrics[_path] = Metric(
@@ -1683,8 +1674,30 @@ class Module(MgrModule):
                             label_names,
                         )
                     self.metrics[path].set(value, labels)
-
         self.add_fixed_name_metrics()
+
+    @profile_method(True)
+    def collect(self) -> str:
+        # Clear the metrics before scraping
+        for k in self.metrics.keys():
+            self.metrics[k].clear()
+
+        self.get_health()
+        self.get_df()
+        self.get_osd_blocklisted_entries()
+        self.get_pool_stats()
+        self.get_fs()
+        self.get_osd_stats()
+        self.get_quorum_status()
+        self.get_mgr_status()
+        self.get_metadata_and_osd_status()
+        self.get_pg_status()
+        self.get_pool_repaired_objects()
+        self.get_num_objects()
+        self.get_all_daemon_health_metrics()
+
+        if not self.get_module_option('exclude_perf_counters'):
+            self.get_perf_counters()
         self.get_rbd_stats()
 
         self.get_collect_time_metrics()