]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/prometheus: introduce fetch_perf_counters_metrics module option 52774/head
authorAvan Thakkar <athakkar@redhat.com>
Thu, 1 Dec 2022 06:06:56 +0000 (11:36 +0530)
committeravanthakkar <avanjohn@gmail.com>
Fri, 4 Aug 2023 09:55:08 +0000 (15:25 +0530)
Fixes: https://tracker.ceph.com/issues/58164
Signed-off-by: Avan Thakkar <athakkar@redhat.com>
Introducing module option in prometheus module to enable/disable support for exporting ceph daemons
perf counters as prometheus metrics, by default this option will be disabled. The use case for this option
is in case if ceph-exporter deployment failed for any reason then user still can have option to fetch metrics
from promethues exporter.

doc/mgr/prometheus.rst
src/pybind/mgr/prometheus/module.py

index 698b6a2d539d7312b8812842df6bd03dd0977518..25a7b0d084ee654970b9bafa2ccb8bbe3a8ab351 100644 (file)
@@ -41,6 +41,7 @@ Configuration
 .. confval:: rbd_stats_pools_refresh_interval
 .. confval:: standby_behaviour
 .. confval:: standby_error_status_code
+.. confval:: exclude_perf_counters
 
 By default the module will accept HTTP requests on port ``9283`` on all IPv4
 and IPv6 addresses on the host.  The port and listen address are both
@@ -217,6 +218,15 @@ the module option ``exclude_perf_counters`` to ``false``:
 
    ceph config set mgr mgr/prometheus/exclude_perf_counters false
 
+Ceph daemon performance counters metrics
+-----------------------------------------
+
+With the introduction of ``ceph-exporter`` daemon, the prometheus module will no longer export Ceph daemon
+perf counters as prometheus metrics by default. However, one may re-enable exporting these metrics by setting
+the module option ``exclude_perf_counters`` to ``false``::
+
+    ceph config set mgr mgr/prometheus/exclude_perf_counters false
+
 Statistic names and labels
 ==========================
 
index 566b649a0fdc4096e855cebc1a4416ea5b7dc5ab..f2e97c9d183ad8f6782d99aa444501ea5751c6cb 100644 (file)
@@ -600,6 +600,14 @@ class Module(MgrModule, OrchestratorClientMixin):
             min=400,
             max=599,
             runtime=True
+        ),
+        Option(
+            name='exclude_perf_counters',
+            type='bool',
+            default=True,
+            desc='Do not include perf-counters in the metrics output',
+            long_desc='Gathering perf-counters from a single Prometheus exporter can degrade ceph-mgr performance, especially in large clusters. Instead, Ceph-exporter daemons are now used by default for perf-counter gathering. This should only be disabled when no ceph-exporters are deployed.',
+            runtime=True
         )
     ]
 
@@ -1640,26 +1648,10 @@ class Module(MgrModule, OrchestratorClientMixin):
                 self.metrics[path].set(health_metric['value'], labelvalues=(
                     health_metric['type'], daemon_name,))
 
-    @profile_method(True)
-    def collect(self) -> str:
-        # Clear the metrics before scraping
-        for k in self.metrics.keys():
-            self.metrics[k].clear()
-
-        self.get_health()
-        self.get_df()
-        self.get_osd_blocklisted_entries()
-        self.get_pool_stats()
-        self.get_fs()
-        self.get_osd_stats()
-        self.get_quorum_status()
-        self.get_mgr_status()
-        self.get_metadata_and_osd_status()
-        self.get_pg_status()
-        self.get_pool_repaired_objects()
-        self.get_num_objects()
-        self.get_all_daemon_health_metrics()
-
+    def get_perf_counters(self) -> None:
+        """
+        Get the perf counters for all daemons
+        """
         for daemon, counters in self.get_all_perf_counters().items():
             for path, counter_info in counters.items():
                 # Skip histograms, they are represented by long running avgs
@@ -1686,7 +1678,6 @@ class Module(MgrModule, OrchestratorClientMixin):
                             label_names,
                         )
                     self.metrics[_path].set(value, labels)
-
                     _path = path + '_count'
                     if _path not in self.metrics:
                         self.metrics[_path] = Metric(
@@ -1705,8 +1696,30 @@ class Module(MgrModule, OrchestratorClientMixin):
                             label_names,
                         )
                     self.metrics[path].set(value, labels)
-
         self.add_fixed_name_metrics()
+
+    @profile_method(True)
+    def collect(self) -> str:
+        # Clear the metrics before scraping
+        for k in self.metrics.keys():
+            self.metrics[k].clear()
+
+        self.get_health()
+        self.get_df()
+        self.get_osd_blocklisted_entries()
+        self.get_pool_stats()
+        self.get_fs()
+        self.get_osd_stats()
+        self.get_quorum_status()
+        self.get_mgr_status()
+        self.get_metadata_and_osd_status()
+        self.get_pg_status()
+        self.get_pool_repaired_objects()
+        self.get_num_objects()
+        self.get_all_daemon_health_metrics()
+
+        if not self.get_module_option('exclude_perf_counters'):
+            self.get_perf_counters()
         self.get_rbd_stats()
 
         self.get_collect_time_metrics()