From: Avan Thakkar Date: Thu, 1 Dec 2022 06:06:56 +0000 (+0530) Subject: mgr/prometheus: introduce fetch_perf_counters_metrics module option X-Git-Tag: v18.2.1~414^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F52774%2Fhead;p=ceph.git mgr/prometheus: introduce fetch_perf_counters_metrics module option Fixes: https://tracker.ceph.com/issues/58164 Signed-off-by: Avan Thakkar Introducing module option in prometheus module to enable/disable support for exporting ceph daemons perf counters as prometheus metrics, by default this option will be disabled. The use case for this option is in case if ceph-exporter deployment failed for any reason then user still can have option to fetch metrics from promethues exporter. --- diff --git a/doc/mgr/prometheus.rst b/doc/mgr/prometheus.rst index 698b6a2d539..25a7b0d084e 100644 --- a/doc/mgr/prometheus.rst +++ b/doc/mgr/prometheus.rst @@ -41,6 +41,7 @@ Configuration .. confval:: rbd_stats_pools_refresh_interval .. confval:: standby_behaviour .. confval:: standby_error_status_code +.. confval:: exclude_perf_counters By default the module will accept HTTP requests on port ``9283`` on all IPv4 and IPv6 addresses on the host. The port and listen address are both @@ -217,6 +218,15 @@ the module option ``exclude_perf_counters`` to ``false``: ceph config set mgr mgr/prometheus/exclude_perf_counters false +Ceph daemon performance counters metrics +----------------------------------------- + +With the introduction of ``ceph-exporter`` daemon, the prometheus module will no longer export Ceph daemon +perf counters as prometheus metrics by default. However, one may re-enable exporting these metrics by setting +the module option ``exclude_perf_counters`` to ``false``:: + + ceph config set mgr mgr/prometheus/exclude_perf_counters false + Statistic names and labels ========================== diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index 566b649a0fd..f2e97c9d183 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -600,6 +600,14 @@ class Module(MgrModule, OrchestratorClientMixin): min=400, max=599, runtime=True + ), + Option( + name='exclude_perf_counters', + type='bool', + default=True, + desc='Do not include perf-counters in the metrics output', + long_desc='Gathering perf-counters from a single Prometheus exporter can degrade ceph-mgr performance, especially in large clusters. Instead, Ceph-exporter daemons are now used by default for perf-counter gathering. This should only be disabled when no ceph-exporters are deployed.', + runtime=True ) ] @@ -1640,26 +1648,10 @@ class Module(MgrModule, OrchestratorClientMixin): self.metrics[path].set(health_metric['value'], labelvalues=( health_metric['type'], daemon_name,)) - @profile_method(True) - def collect(self) -> str: - # Clear the metrics before scraping - for k in self.metrics.keys(): - self.metrics[k].clear() - - self.get_health() - self.get_df() - self.get_osd_blocklisted_entries() - self.get_pool_stats() - self.get_fs() - self.get_osd_stats() - self.get_quorum_status() - self.get_mgr_status() - self.get_metadata_and_osd_status() - self.get_pg_status() - self.get_pool_repaired_objects() - self.get_num_objects() - self.get_all_daemon_health_metrics() - + def get_perf_counters(self) -> None: + """ + Get the perf counters for all daemons + """ for daemon, counters in self.get_all_perf_counters().items(): for path, counter_info in counters.items(): # Skip histograms, they are represented by long running avgs @@ -1686,7 +1678,6 @@ class Module(MgrModule, OrchestratorClientMixin): label_names, ) self.metrics[_path].set(value, labels) - _path = path + '_count' if _path not in self.metrics: self.metrics[_path] = Metric( @@ -1705,8 +1696,30 @@ class Module(MgrModule, OrchestratorClientMixin): label_names, ) self.metrics[path].set(value, labels) - self.add_fixed_name_metrics() + + @profile_method(True) + def collect(self) -> str: + # Clear the metrics before scraping + for k in self.metrics.keys(): + self.metrics[k].clear() + + self.get_health() + self.get_df() + self.get_osd_blocklisted_entries() + self.get_pool_stats() + self.get_fs() + self.get_osd_stats() + self.get_quorum_status() + self.get_mgr_status() + self.get_metadata_and_osd_status() + self.get_pg_status() + self.get_pool_repaired_objects() + self.get_num_objects() + self.get_all_daemon_health_metrics() + + if not self.get_module_option('exclude_perf_counters'): + self.get_perf_counters() self.get_rbd_stats() self.get_collect_time_metrics()