]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/dashboard: empty grafana panels for performance of daemons 52191/head
authoravanthakkar <avanjohn@gmail.com>
Mon, 26 Jun 2023 07:11:24 +0000 (12:41 +0530)
committeravanthakkar <avanjohn@gmail.com>
Fri, 28 Jul 2023 10:35:32 +0000 (16:05 +0530)
Fixes: https://tracker.ceph.com/issues/61792
Signed-off-by: avanthakkar <avanjohn@gmail.com>
Removing the `ceph-` prefix from ceph_daemon label to adopt it with the label
format used by queries in grafana dashboards. Also changing the
`instance_id` label for rgw to match the values coming from
exporter and prometheus module

src/exporter/DaemonMetricCollector.cc
src/exporter/DaemonMetricCollector.h
src/pybind/mgr/dashboard/ci/cephadm/run-cephadm-e2e-tests.sh
src/pybind/mgr/prometheus/module.py
src/test/exporter/CMakeLists.txt
src/test/exporter/test_exporter.cc

index f4f7240fcfcb2fc384f90972aa5abba855d910da..cb95fcaa9665dc68c25c2a76d9dfad796f873a4e 100644 (file)
@@ -7,6 +7,7 @@
 #include <map>
 #include <memory>
 #include <regex>
+#include <sstream>
 #include <string>
 #include <utility>
 
@@ -157,7 +158,6 @@ void DaemonMetricCollector::dump_asok_metrics() {
             labels.insert(multisite_labels_and_name.first.begin(), multisite_labels_and_name.first.end());
             counter_name = multisite_labels_and_name.second;
           }
-          labels.insert({"ceph_daemon", quote(daemon_name)});
           auto perf_values = counters_values.at(counter_name_init);
           dump_asok_metric(counter_group, perf_values, counter_name, labels);
         }
@@ -291,6 +291,14 @@ DaemonMetricCollector::get_labels_and_metric_name(std::string daemon_name,
   std::string new_metric_name;
   labels_t labels;
   new_metric_name = metric_name;
+  const std::string ceph_daemon_prefix = "ceph-";
+  const std::string ceph_client_prefix = "client.";
+  if (daemon_name.rfind(ceph_daemon_prefix, 0) == 0) {
+    daemon_name = daemon_name.substr(ceph_daemon_prefix.size());
+  }
+  if (daemon_name.rfind(ceph_client_prefix, 0) == 0) {
+    daemon_name = daemon_name.substr(ceph_client_prefix.size());
+  }
   // In vstart cluster socket files for rgw are stored as radosgw.<instance_id>.asok
   if (daemon_name.find("radosgw") != std::string::npos) {
     std::size_t pos = daemon_name.find_last_of('.');
@@ -298,11 +306,17 @@ DaemonMetricCollector::get_labels_and_metric_name(std::string daemon_name,
     labels["instance_id"] = quote(tmp);
   }
   else if (daemon_name.find("rgw") != std::string::npos) {
-    std::string tmp = daemon_name.substr(16, std::string::npos);
-    std::string::size_type pos = tmp.find('.');
-    labels["instance_id"] = quote("rgw." + tmp.substr(0, pos));
+    // fetch intance_id for e.g. "okbvtv" from daemon_name=rgw.foo.ceph-node-00.okbvtv 
+    size_t pos = daemon_name.find_last_of(".");
+    std::string instance_id = "";
+    if (pos != std::string::npos) {
+       instance_id = daemon_name.substr(pos+1);
+    }
+    labels["instance_id"] = quote(instance_id);
+  } else {
+    labels.insert({"ceph_daemon", quote(daemon_name)});
   }
-  else if (daemon_name.find("rbd-mirror") != std::string::npos) {
+  if (daemon_name.find("rbd-mirror") != std::string::npos) {
     std::regex re(
         "^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/"
         ")?)(.*)\\.(replay(?:_bytes|_latency)?)$");
index 3d35a9c79de7d59f824ed4716a149c302c4b4325..88e827bddae70211afd2938304663462409da5ee 100644 (file)
@@ -34,6 +34,8 @@ class DaemonMetricCollector {
 public:
   void main();
   std::string get_metrics();
+  std::pair<labels_t, std::string>
+  get_labels_and_metric_name(std::string daemon_name, std::string metric_name);
 
 private:
   std::map<std::string, AdminSocketClient> clients;
@@ -47,8 +49,6 @@ private:
   void dump_asok_metric(boost::json::object perf_info,
                         boost::json::value perf_values, std::string name,
                         labels_t labels);
-  std::pair<labels_t, std::string>
-  get_labels_and_metric_name(std::string daemon_name, std::string metric_name);
   std::pair<labels_t, std::string> add_fixed_name_metrics(std::string metric_name);
   void get_process_metrics(std::vector<std::pair<std::string, int>> daemon_pids);
   std::string asok_request(AdminSocketClient &asok, std::string command, std::string daemon_name);
index d688a3c304028d4b99658176cc3b16f765ec20cd..a48f759f5e788ed069fcf878da2a09aaf129c320 100755 (executable)
@@ -38,6 +38,8 @@ cypress_run () {
 
 cd ${CEPH_DEV_FOLDER}/src/pybind/mgr/dashboard/frontend
 
+kcli ssh -u root ceph-node-00 'cephadm shell "ceph config set mgr mgr/prometheus/exclude_perf_counters false"'
+
 # check if the prometheus daemon is running
 # before starting the e2e tests
 
@@ -53,7 +55,5 @@ kcli ssh -u root ceph-node-00 'cephadm shell "ceph dashboard set-prometheus-api-
 kcli ssh -u root ceph-node-00 'cephadm shell "ceph dashboard set-grafana-api-url https://192.168.100.100:3000"'
 kcli ssh -u root ceph-node-00 'cephadm shell "ceph orch apply node-exporter --placement 'count:2'"'
 
-kcli ssh -u root ceph-node-00 'cephadm shell "ceph config set mgr mgr/prometheus/exclude_perf_counters false"'
-
 cypress_run ["cypress/e2e/orchestrator/workflow/*.feature","cypress/e2e/orchestrator/workflow/*-spec.ts"]
 cypress_run "cypress/e2e/orchestrator/grafana/*.feature"
index ee0607f23fd9d618fda40e380ddfe9baf20c7592..96f08f027a9ecc81ad03fed76a4ab881250d75f3 100644 (file)
@@ -13,6 +13,7 @@ from collections import namedtuple
 
 from mgr_module import CLIReadCommand, MgrModule, MgrStandbyModule, PG_STATES, Option, ServiceInfoT, HandleCommandResult, CLIWriteCommand
 from mgr_util import get_default_addr, profile_method, build_url
+from orchestrator import OrchestratorClientMixin, raise_if_exception, NoOrchestrator
 from rbd import RBD
 
 from typing import DefaultDict, Optional, Dict, Any, Set, cast, Tuple, Union, List, Callable
@@ -548,7 +549,7 @@ class MetricCollectionThread(threading.Thread):
         self.event.set()
 
 
-class Module(MgrModule):
+class Module(MgrModule, OrchestratorClientMixin):
     MODULE_OPTIONS = [
         Option(
             'server_addr',
@@ -645,6 +646,8 @@ class Module(MgrModule):
         _global_instance = self
         self.metrics_thread = MetricCollectionThread(_global_instance)
         self.health_history = HealthHistory(self)
+        self.modify_instance_id = self.get_orch_status() and self.get_module_option(
+            'exclude_perf_counters')
 
     def _setup_static_metrics(self) -> Dict[str, Metric]:
         metrics = {}
@@ -861,6 +864,12 @@ class Module(MgrModule):
 
         return metrics
 
+    def get_orch_status(self) -> bool:
+        try:
+            return self.available()[0]
+        except NoOrchestrator:
+            return False
+
     def get_server_addr(self) -> str:
         """
         Return the current mgr server IP.
@@ -1281,9 +1290,20 @@ class Module(MgrModule):
             )
 
         # Populate other servers metadata
+        # If orchestrator is available and ceph-exporter is running modify rgw instance id
+        # to match the one from exporter
+        if self.modify_instance_id:
+            daemons = raise_if_exception(self.list_daemons(daemon_type='rgw'))
+            for daemon in daemons:
+                self.metrics['rgw_metadata'].set(1,
+                                                 ('{}.{}'.format(str(daemon.daemon_type),
+                                                                 str(daemon.daemon_id)),
+                                                  str(daemon.hostname),
+                                                  str(daemon.version),
+                                                  str(daemon.daemon_id).split(".")[2]))
         for key, value in servers.items():
             service_id, service_type = key
-            if service_type == 'rgw':
+            if service_type == 'rgw' and not self.modify_instance_id:
                 hostname, version, name = value
                 self.metrics['rgw_metadata'].set(
                     1,
index 3a46618261ff44eef025d661b276fc6c0e1b3673..7ef5631f892a787920a31c910b0d5c715936fc43 100644 (file)
@@ -1,6 +1,7 @@
 add_executable(unittest_exporter
   test_exporter.cc
   "${CMAKE_SOURCE_DIR}/src/exporter/util.cc"
+  "${CMAKE_SOURCE_DIR}/src/exporter/DaemonMetricCollector.cc"
   )
 
 target_link_libraries(unittest_exporter
index 83750e0eaf1d353b1fda4aa6d46048b2abe7d701..7bc44d9b3213e14b940e4c0e9a7e70645ae61d17 100644 (file)
@@ -1,10 +1,13 @@
 #include "gtest/gtest.h"
 #include "exporter/util.h"
+#include "exporter/DaemonMetricCollector.h"
 
 #include <string>
 #include <vector>
 #include <utility>
 
+typedef std::map<std::string, std::string> labels_t;
+
 // 17.2.6's memento mori:
 // This data was gathered from the python implementation of the promethize method
 // where we transform the path of a counter to a valid prometheus name.
@@ -662,3 +665,23 @@ TEST(Exporter, promethize) {
   }
 }
 
+TEST(Exporter, check_labels_and_metric_name) {
+  static std::vector<std::pair<std::string, std::string>> counters_data;
+  counters_data.emplace_back("ceph-osd.0", "ceph_osd_numpg");
+  counters_data.emplace_back("ceph-client.rgw.foo.ceph-node-00.okbvtv", "ceph_rgw_get");
+
+  static std::vector<std::pair<labels_t, std::string>> labels_and_name;
+  labels_and_name.emplace_back(labels_t{{"ceph_daemon", "\"osd.0\""}}, "ceph_osd_numpg");
+  labels_and_name.emplace_back(labels_t{{"instance_id", "\"okbvtv\""}}, "ceph_rgw_get");
+  auto counter_data_itr = counters_data.begin();
+  auto labels_and_name_itr = labels_and_name.begin();
+  for (; counter_data_itr != counters_data.end() && labels_and_name_itr != labels_and_name.end();
+         ++counter_data_itr, ++labels_and_name_itr) {
+        std::string daemon_name = counter_data_itr->first;
+        std::string counter_name = counter_data_itr->second;
+        DaemonMetricCollector &collector = collector_instance();
+        std::pair<labels_t, std::string> result = collector.get_labels_and_metric_name(daemon_name, counter_name);
+        ASSERT_EQ(result.first, labels_and_name_itr->first);
+        ASSERT_EQ(result.second, labels_and_name_itr->second);
+  }
+}