]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
exporter: user only counter dump/schema commands for extacting counters 50718/head
authorAvan Thakkar <athakkar@redhat.com>
Tue, 28 Mar 2023 13:32:47 +0000 (19:02 +0530)
committerAvan Thakkar <athakkar@redhat.com>
Tue, 28 Mar 2023 18:12:09 +0000 (23:42 +0530)
Fixes: https://tracker.ceph.com/issues/59191
Signed-off-by: Avan Thakkar <athakkar@redhat.com>
Ceph exporter no more required the output of perf dump/schema, as the ``counter dump`` command
returns both labeled and unlabeled perf counters which exporter can fetch and export.
Removed the ``exporter_get_labeled_counters`` confiug option as exporter will now export
all the counters, labeled or unlabeled.
Also the fix includes the support for renaming the metrics name of rgw multi-site and
adding labels to it, similar to what is there in prometheus module.

src/common/options/ceph-exporter.yaml.in
src/exporter/DaemonMetricCollector.cc
src/exporter/DaemonMetricCollector.h

index 269dccd68cb95979fe42f9c56dcabe072313e1cc..798a185e96bcc1ec6e205d8a66e196071bf570e1 100644 (file)
@@ -52,15 +52,3 @@ options:
   - ceph-exporter
   flags:
   - runtime
-- name: exporter_get_labeled_counters
-  type: bool
-  level: advanced
-  desc: If true will fetch and export labeled performance counters
-  long_desc: Ceph perf counters now support labels to provide fine-grained
-    stats using ``counter dump`` command and exporter can fetch these counters
-    and add the labels in Prometheus format.
-  default: true
-  services:
-  - ceph-exporter
-  flags:
-  - runtime
index 779519b53e9b9dce0a83e361c070675b2b51ae74..4816896ad1e4d88e4b89fa70a2cd3b33e2c0a3b8 100644 (file)
@@ -96,6 +96,7 @@ void DaemonMetricCollector::dump_asok_metrics() {
     builder =
         std::unique_ptr<UnorderedMetricsBuilder>(new UnorderedMetricsBuilder());
   }
+  auto prio_limit = g_conf().get_val<int64_t>("exporter_prio_limit");
   for (auto &[daemon_name, sock_client] : clients) {
     bool ok;
     sock_client.ping(&ok);
@@ -103,18 +104,59 @@ void DaemonMetricCollector::dump_asok_metrics() {
       failures++;
       continue;
     }
-    std::string perf_dump_response =
-        asok_request(sock_client, "perf dump", daemon_name);
-    if (perf_dump_response.size() == 0) {
-      failures++;
-      continue;
+    std::string counter_dump_response =
+      asok_request(sock_client, "counter dump", daemon_name);
+    if (counter_dump_response.size() == 0) {
+        failures++;
+        continue;
     }
-    std::string perf_schema_response =
-        asok_request(sock_client, "perf schema", daemon_name);
-    if (perf_schema_response.size() == 0) {
+    std::string counter_schema_response =
+        asok_request(sock_client, "counter schema", daemon_name);
+    if (counter_schema_response.size() == 0) {
       failures++;
       continue;
     }
+
+    json_object counter_dump = boost::json::parse(counter_dump_response).as_object();
+    json_object counter_schema = boost::json::parse(counter_schema_response).as_object();
+
+    for (auto &labeled_perf : counter_schema) {
+      std::string labeled_perf_group = {labeled_perf.key().begin(), labeled_perf.key().end()};
+      json_object labeled_perf_group_object = labeled_perf.value().as_object();
+      auto counters = labeled_perf_group_object["counters"].as_object();
+      auto counters_labels = labeled_perf_group_object["labels"].as_object();
+      auto labeled_perf_group_counters = counter_dump[labeled_perf_group].as_object()["counters"].as_object();
+      labels_t labels;
+
+      for(auto &label: counters_labels) {
+        std::string label_key = {label.key().begin(), label.key().end()};
+        labels[label_key] = quote(label.value().as_string().c_str());
+      }
+      for (auto &counter : counters) {
+        json_object counter_group = counter.value().as_object();
+        if (counter_group["priority"].as_int64() < prio_limit) {
+          continue;        
+        }
+        std::string counter_name_init =  {counter.key().begin(), counter.key().end()};
+        std::string counter_name = "ceph_" + labeled_perf_group + "_" + counter_name_init;
+        std::replace_if(counter_name.begin(), counter_name.end(), is_hyphen, '_');
+
+        if (counters_labels.empty()) {
+          auto labels_and_name = get_labels_and_metric_name(daemon_name, counter_name);
+          labels = labels_and_name.first;
+          counter_name = labels_and_name.second;
+        }
+        // For now this is only required for rgw multi-site metrics
+        auto multisite_labels_and_name = add_fixed_name_metrics(counter_name);
+        if (!multisite_labels_and_name.first.empty()) {
+          labels.insert(multisite_labels_and_name.first.begin(), multisite_labels_and_name.first.end());
+          counter_name = multisite_labels_and_name.second;
+        }
+        labels.insert({"ceph_daemon", quote(daemon_name)});
+        auto perf_values = labeled_perf_group_counters.at(counter_name_init);
+        dump_asok_metric(counter_group, perf_values, counter_name, labels);
+      }
+    }
     std::string config_show =
         asok_request(sock_client, "config show", daemon_name);
     if (config_show.size() == 0) {
@@ -132,76 +174,6 @@ void DaemonMetricCollector::dump_asok_metrics() {
     if (!pid_str.empty()) {
       daemon_pids.push_back({daemon_name, std::stoi(pid_str)});
     }
-    json_object dump = boost::json::parse(perf_dump_response).as_object();
-    json_object schema = boost::json::parse(perf_schema_response).as_object();
-    auto prio_limit = g_conf().get_val<int64_t>("exporter_prio_limit");
-    for (auto &perf : schema) {
-      std::string perf_group = {perf.key().begin(), perf.key().end()};
-      json_object perf_group_object = perf.value().as_object();
-      for (auto &perf_counter : perf_group_object) {
-        std::string perf_name = {perf_counter.key().begin(),
-                                 perf_counter.key().end()};
-        json_object perf_info = perf_counter.value().as_object();
-        if (perf_info["priority"].as_int64() < prio_limit) {
-          continue;
-        }
-        std::string name = "ceph_" + perf_group + "_" + perf_name;
-        std::replace_if(name.begin(), name.end(), is_hyphen, '_');
-
-        auto labels_and_name = get_labels_and_metric_name(daemon_name, name);
-        labels_t labels = labels_and_name.first;
-        name = labels_and_name.second;
-
-        json_value perf_values = dump[perf_group].as_object()[perf_name];
-        dump_asok_metric(perf_info, perf_values, name, labels);
-      }
-    }
-    // fetch labeled perf counters if config is set to true
-    bool labeledperf = g_conf().get_val<bool>("exporter_get_labeled_counters");
-    if (labeledperf) {
-       std::string counter_dump_response =
-        asok_request(sock_client, "counter dump", daemon_name);
-      if (counter_dump_response.size() == 0) {
-          failures++;
-          continue;
-      }
-      std::string counter_schema_response =
-          asok_request(sock_client, "counter schema", daemon_name);
-      if (counter_schema_response.size() == 0) {
-        failures++;
-        continue;
-      }
-
-      json_object counter_dump = boost::json::parse(counter_dump_response).as_object();
-      json_object counter_schema = boost::json::parse(counter_schema_response).as_object();
-
-      for (auto &labeled_perf : counter_schema) {
-        std::string labeled_perf_group = {labeled_perf.key().begin(), labeled_perf.key().end()};
-        json_object labeled_perf_group_object = labeled_perf.value().as_object();
-        auto counters = labeled_perf_group_object["counters"].as_object();
-        auto counters_labels = labeled_perf_group_object["labels"].as_object();
-        auto labeled_perf_group_counters = counter_dump[labeled_perf_group].as_object()["counters"].as_object();
-        labels_t labels;
-
-        for(auto &label: counters_labels) {
-          std::string label_key = {label.key().begin(), label.key().end()};
-          labels[label_key] = quote(label.value().as_string().c_str());
-        }
-        labels["ceph_daemon"] = quote(daemon_name);
-        for (auto &counter : counters) {
-          json_object counter_group = counter.value().as_object();
-          if (counter_group["priority"].as_int64() < prio_limit) {
-            continue;        
-          }
-          std::string counter_name_init =  {counter.key().begin(), counter.key().end()};
-          std::string counter_name = "ceph_" + labeled_perf_group + "_" + counter_name_init;
-          std::replace_if(counter_name.begin(), counter_name.end(), is_hyphen, '_');
-
-          auto perf_values = labeled_perf_group_counters.at(counter_name_init);
-          dump_asok_metric(counter_group, perf_values, counter_name, labels);
-      }
-    }
-  }
   }
   dout(10) << "Perf counters retrieved for " << clients.size() - failures << "/"
            << clients.size() << " daemons." << dendl;
@@ -313,28 +285,53 @@ DaemonMetricCollector::get_labels_and_metric_name(std::string daemon_name,
   std::string new_metric_name;
   labels_t labels;
   new_metric_name = metric_name;
-  if (daemon_name.find("rgw") != std::string::npos) {
+  // In vstart cluster socket files for rgw are stored as radosgw.<instance_id>.asok
+  if (daemon_name.find("radosgw") != std::string::npos) {
+    std::size_t pos = daemon_name.find_last_of('.');
+    std::string tmp = daemon_name.substr(pos+1);
+    labels["instance_id"] = quote(tmp);
+  }
+  else if (daemon_name.find("rgw") != std::string::npos) {
     std::string tmp = daemon_name.substr(16, std::string::npos);
     std::string::size_type pos = tmp.find('.');
     labels["instance_id"] = quote("rgw." + tmp.substr(0, pos));
-  } else {
-    labels["ceph_daemon"] = quote(daemon_name);
-    if (daemon_name.find("rbd-mirror") != std::string::npos) {
-      std::regex re(
-          "^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/"
-          ")?)(.*)\\.(replay(?:_bytes|_latency)?)$");
-      std::smatch match;
-      if (std::regex_search(daemon_name, match, re) == true) {
-        new_metric_name = "ceph_rbd_mirror_image_" + match.str(4);
-        labels["pool"] = quote(match.str(1));
-        labels["namespace"] = quote(match.str(2));
-        labels["image"] = quote(match.str(3));
-      }
+  }
+  else if (daemon_name.find("rbd-mirror") != std::string::npos) {
+    std::regex re(
+        "^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/"
+        ")?)(.*)\\.(replay(?:_bytes|_latency)?)$");
+    std::smatch match;
+    if (std::regex_search(daemon_name, match, re) == true) {
+      new_metric_name = "ceph_rbd_mirror_image_" + match.str(4);
+      labels["pool"] = quote(match.str(1));
+      labels["namespace"] = quote(match.str(2));
+      labels["image"] = quote(match.str(3));
     }
   }
   return {labels, new_metric_name};
 }
 
+// Add fixed name metrics from existing ones that have details in their names
+// that should be in labels (not in name). For backward compatibility,
+// a new fixed name metric is created (instead of replacing)and details are put
+// in new labels. Intended for RGW sync perf. counters but extendable as required.
+// See: https://tracker.ceph.com/issues/45311
+std::pair<labels_t, std::string>
+DaemonMetricCollector::add_fixed_name_metrics(std::string metric_name) {
+  std::string new_metric_name;
+  labels_t labels;
+  new_metric_name = metric_name;
+
+  std::regex re("^data_sync_from_(.*)\\.");
+    std::smatch match;
+    if (std::regex_search(metric_name, match, re) == true) {
+      new_metric_name = std::regex_replace(metric_name, re, "from_([^.]*)', 'from_zone");
+      labels["source_zone"] = quote(match.str(1));
+      return {labels, new_metric_name};
+    }
+  return {};
+}
+
 /*
 perf_values can be either a int/double or a json_object. Since
    json_value is a wrapper of both we use that class.
index c8238669c419c32455c34ebd85175f569a42bae9..3d35a9c79de7d59f824ed4716a149c302c4b4325 100644 (file)
@@ -49,6 +49,7 @@ private:
                         labels_t labels);
   std::pair<labels_t, std::string>
   get_labels_and_metric_name(std::string daemon_name, std::string metric_name);
+  std::pair<labels_t, std::string> add_fixed_name_metrics(std::string metric_name);
   void get_process_metrics(std::vector<std::pair<std::string, int>> daemon_pids);
   std::string asok_request(AdminSocketClient &asok, std::string command, std::string daemon_name);
 };