From 2b223bd5d77d831bfbe5ed416e3b8e97009e604e Mon Sep 17 00:00:00 2001 From: avanthakkar Date: Mon, 26 Jun 2023 12:41:24 +0530 Subject: [PATCH] mgr/dashboard: empty grafana panels for performance of daemons Fixes: https://tracker.ceph.com/issues/61792 Signed-off-by: avanthakkar Removing the `ceph-` prefix from ceph_daemon label to adopt it with the label format used by queries in grafana dashboards. Also changing the `instance_id` label for rgw to match the values coming from exporter and prometheus module (cherry picked from commit fa2d1f9f0745ffecc671af3a3f0a93e7edaa1222) --- src/exporter/DaemonMetricCollector.cc | 24 +++++++++++++++---- src/exporter/DaemonMetricCollector.h | 4 ++-- .../ci/cephadm/run-cephadm-e2e-tests.sh | 2 ++ src/pybind/mgr/prometheus/module.py | 24 +++++++++++++++++-- src/test/exporter/CMakeLists.txt | 1 + src/test/exporter/test_exporter.cc | 23 ++++++++++++++++++ 6 files changed, 69 insertions(+), 9 deletions(-) diff --git a/src/exporter/DaemonMetricCollector.cc b/src/exporter/DaemonMetricCollector.cc index f4f7240fcfc..cb95fcaa966 100644 --- a/src/exporter/DaemonMetricCollector.cc +++ b/src/exporter/DaemonMetricCollector.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -157,7 +158,6 @@ void DaemonMetricCollector::dump_asok_metrics() { labels.insert(multisite_labels_and_name.first.begin(), multisite_labels_and_name.first.end()); counter_name = multisite_labels_and_name.second; } - labels.insert({"ceph_daemon", quote(daemon_name)}); auto perf_values = counters_values.at(counter_name_init); dump_asok_metric(counter_group, perf_values, counter_name, labels); } @@ -291,6 +291,14 @@ DaemonMetricCollector::get_labels_and_metric_name(std::string daemon_name, std::string new_metric_name; labels_t labels; new_metric_name = metric_name; + const std::string ceph_daemon_prefix = "ceph-"; + const std::string ceph_client_prefix = "client."; + if (daemon_name.rfind(ceph_daemon_prefix, 0) == 0) { + daemon_name = daemon_name.substr(ceph_daemon_prefix.size()); + } + if (daemon_name.rfind(ceph_client_prefix, 0) == 0) { + daemon_name = daemon_name.substr(ceph_client_prefix.size()); + } // In vstart cluster socket files for rgw are stored as radosgw..asok if (daemon_name.find("radosgw") != std::string::npos) { std::size_t pos = daemon_name.find_last_of('.'); @@ -298,11 +306,17 @@ DaemonMetricCollector::get_labels_and_metric_name(std::string daemon_name, labels["instance_id"] = quote(tmp); } else if (daemon_name.find("rgw") != std::string::npos) { - std::string tmp = daemon_name.substr(16, std::string::npos); - std::string::size_type pos = tmp.find('.'); - labels["instance_id"] = quote("rgw." + tmp.substr(0, pos)); + // fetch intance_id for e.g. "okbvtv" from daemon_name=rgw.foo.ceph-node-00.okbvtv + size_t pos = daemon_name.find_last_of("."); + std::string instance_id = ""; + if (pos != std::string::npos) { + instance_id = daemon_name.substr(pos+1); + } + labels["instance_id"] = quote(instance_id); + } else { + labels.insert({"ceph_daemon", quote(daemon_name)}); } - else if (daemon_name.find("rbd-mirror") != std::string::npos) { + if (daemon_name.find("rbd-mirror") != std::string::npos) { std::regex re( "^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/" ")?)(.*)\\.(replay(?:_bytes|_latency)?)$"); diff --git a/src/exporter/DaemonMetricCollector.h b/src/exporter/DaemonMetricCollector.h index 3d35a9c79de..88e827bddae 100644 --- a/src/exporter/DaemonMetricCollector.h +++ b/src/exporter/DaemonMetricCollector.h @@ -34,6 +34,8 @@ class DaemonMetricCollector { public: void main(); std::string get_metrics(); + std::pair + get_labels_and_metric_name(std::string daemon_name, std::string metric_name); private: std::map clients; @@ -47,8 +49,6 @@ private: void dump_asok_metric(boost::json::object perf_info, boost::json::value perf_values, std::string name, labels_t labels); - std::pair - get_labels_and_metric_name(std::string daemon_name, std::string metric_name); std::pair add_fixed_name_metrics(std::string metric_name); void get_process_metrics(std::vector> daemon_pids); std::string asok_request(AdminSocketClient &asok, std::string command, std::string daemon_name); diff --git a/src/pybind/mgr/dashboard/ci/cephadm/run-cephadm-e2e-tests.sh b/src/pybind/mgr/dashboard/ci/cephadm/run-cephadm-e2e-tests.sh index 83b2eb69488..a48f759f5e7 100755 --- a/src/pybind/mgr/dashboard/ci/cephadm/run-cephadm-e2e-tests.sh +++ b/src/pybind/mgr/dashboard/ci/cephadm/run-cephadm-e2e-tests.sh @@ -38,6 +38,8 @@ cypress_run () { cd ${CEPH_DEV_FOLDER}/src/pybind/mgr/dashboard/frontend +kcli ssh -u root ceph-node-00 'cephadm shell "ceph config set mgr mgr/prometheus/exclude_perf_counters false"' + # check if the prometheus daemon is running # before starting the e2e tests diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index 3467c3080a7..566b649a0fd 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -13,6 +13,7 @@ from collections import namedtuple from mgr_module import CLIReadCommand, MgrModule, MgrStandbyModule, PG_STATES, Option, ServiceInfoT, HandleCommandResult, CLIWriteCommand from mgr_util import get_default_addr, profile_method, build_url +from orchestrator import OrchestratorClientMixin, raise_if_exception, NoOrchestrator from rbd import RBD from typing import DefaultDict, Optional, Dict, Any, Set, cast, Tuple, Union, List, Callable @@ -548,7 +549,7 @@ class MetricCollectionThread(threading.Thread): self.event.set() -class Module(MgrModule): +class Module(MgrModule, OrchestratorClientMixin): MODULE_OPTIONS = [ Option( 'server_addr', @@ -637,6 +638,8 @@ class Module(MgrModule): _global_instance = self self.metrics_thread = MetricCollectionThread(_global_instance) self.health_history = HealthHistory(self) + self.modify_instance_id = self.get_orch_status() and self.get_module_option( + 'exclude_perf_counters') def _setup_static_metrics(self) -> Dict[str, Metric]: metrics = {} @@ -853,6 +856,12 @@ class Module(MgrModule): return metrics + def get_orch_status(self) -> bool: + try: + return self.available()[0] + except NoOrchestrator: + return False + def get_server_addr(self) -> str: """ Return the current mgr server IP. @@ -1273,9 +1282,20 @@ class Module(MgrModule): ) # Populate other servers metadata + # If orchestrator is available and ceph-exporter is running modify rgw instance id + # to match the one from exporter + if self.modify_instance_id: + daemons = raise_if_exception(self.list_daemons(daemon_type='rgw')) + for daemon in daemons: + self.metrics['rgw_metadata'].set(1, + ('{}.{}'.format(str(daemon.daemon_type), + str(daemon.daemon_id)), + str(daemon.hostname), + str(daemon.version), + str(daemon.daemon_id).split(".")[2])) for key, value in servers.items(): service_id, service_type = key - if service_type == 'rgw': + if service_type == 'rgw' and not self.modify_instance_id: hostname, version, name = value self.metrics['rgw_metadata'].set( 1, diff --git a/src/test/exporter/CMakeLists.txt b/src/test/exporter/CMakeLists.txt index 3a46618261f..7ef5631f892 100644 --- a/src/test/exporter/CMakeLists.txt +++ b/src/test/exporter/CMakeLists.txt @@ -1,6 +1,7 @@ add_executable(unittest_exporter test_exporter.cc "${CMAKE_SOURCE_DIR}/src/exporter/util.cc" + "${CMAKE_SOURCE_DIR}/src/exporter/DaemonMetricCollector.cc" ) target_link_libraries(unittest_exporter diff --git a/src/test/exporter/test_exporter.cc b/src/test/exporter/test_exporter.cc index 83750e0eaf1..7bc44d9b321 100644 --- a/src/test/exporter/test_exporter.cc +++ b/src/test/exporter/test_exporter.cc @@ -1,10 +1,13 @@ #include "gtest/gtest.h" #include "exporter/util.h" +#include "exporter/DaemonMetricCollector.h" #include #include #include +typedef std::map labels_t; + // 17.2.6's memento mori: // This data was gathered from the python implementation of the promethize method // where we transform the path of a counter to a valid prometheus name. @@ -662,3 +665,23 @@ TEST(Exporter, promethize) { } } +TEST(Exporter, check_labels_and_metric_name) { + static std::vector> counters_data; + counters_data.emplace_back("ceph-osd.0", "ceph_osd_numpg"); + counters_data.emplace_back("ceph-client.rgw.foo.ceph-node-00.okbvtv", "ceph_rgw_get"); + + static std::vector> labels_and_name; + labels_and_name.emplace_back(labels_t{{"ceph_daemon", "\"osd.0\""}}, "ceph_osd_numpg"); + labels_and_name.emplace_back(labels_t{{"instance_id", "\"okbvtv\""}}, "ceph_rgw_get"); + auto counter_data_itr = counters_data.begin(); + auto labels_and_name_itr = labels_and_name.begin(); + for (; counter_data_itr != counters_data.end() && labels_and_name_itr != labels_and_name.end(); + ++counter_data_itr, ++labels_and_name_itr) { + std::string daemon_name = counter_data_itr->first; + std::string counter_name = counter_data_itr->second; + DaemonMetricCollector &collector = collector_instance(); + std::pair result = collector.get_labels_and_metric_name(daemon_name, counter_name); + ASSERT_EQ(result.first, labels_and_name_itr->first); + ASSERT_EQ(result.second, labels_and_name_itr->second); + } +} -- 2.39.5