]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
exporter: don't skip loop if pid path is empty
authorAvan Thakkar <athakkar@redhat.com>
Mon, 19 Sep 2022 10:30:20 +0000 (16:00 +0530)
committerAvan Thakkar <athakkar@redhat.com>
Mon, 19 Sep 2022 10:30:20 +0000 (16:00 +0530)
Signed-off-by: Avan Thakkar <athakkar@redhat.com>
Fixes issue when pid file config comes empty from config dump which prevents to add metrics. Also get process metrics only if
pid_path isn't empty.

src/exporter/DaemonMetricCollector.cc

index 7f88113b9905690e6abc426041b64fef70654190..8d5bad0d97b468e75937135b815a14ec8b7cdbe3 100644 (file)
@@ -1,13 +1,4 @@
 #include "DaemonMetricCollector.h"
-#include "common/admin_socket_client.h"
-#include "common/debug.h"
-#include "common/hostname.h"
-#include "common/perf_counters.h"
-#include "global/global_init.h"
-#include "global/global_context.h"
-#include "common/split.h"
-#include "include/common_fwd.h"
-#include "util.h"
 
 #include <boost/json/src.hpp>
 #include <chrono>
 #include <string>
 #include <utility>
 
+#include "common/admin_socket_client.h"
+#include "common/debug.h"
+#include "common/hostname.h"
+#include "common/perf_counters.h"
+#include "common/split.h"
+#include "global/global_context.h"
+#include "global/global_init.h"
+#include "include/common_fwd.h"
+#include "util.h"
+
 #define dout_context g_ceph_context
 #define dout_subsys ceph_subsys_ceph_exporter
 
@@ -86,48 +87,60 @@ void DaemonMetricCollector::dump_asok_metrics() {
 
   std::vector<std::pair<std::string, int>> daemon_pids;
 
+  int failures = 0;
   bool sort = g_conf().get_val<bool>("exporter_sort_metrics");
   if (sort) {
-    builder = std::unique_ptr<OrderedMetricsBuilder>(new OrderedMetricsBuilder());
+    builder =
+        std::unique_ptr<OrderedMetricsBuilder>(new OrderedMetricsBuilder());
   } else {
-    builder = std::unique_ptr<UnorderedMetricsBuilder>(new UnorderedMetricsBuilder());
+    builder =
+        std::unique_ptr<UnorderedMetricsBuilder>(new UnorderedMetricsBuilder());
   }
   for (auto &[daemon_name, sock_client] : clients) {
     bool ok;
     sock_client.ping(&ok);
     if (!ok) {
+      failures++;
       continue;
     }
-    std::string perf_dump_response = asok_request(sock_client, "perf dump", daemon_name);
+    std::string perf_dump_response =
+        asok_request(sock_client, "perf dump", daemon_name);
     if (perf_dump_response.size() == 0) {
+      failures++;
       continue;
     }
-    std::string perf_schema_response = asok_request(sock_client, "perf schema", daemon_name);
+    std::string perf_schema_response =
+        asok_request(sock_client, "perf schema", daemon_name);
     if (perf_schema_response.size() == 0) {
+      failures++;
+      continue;
+    }
+    std::string config_show =
+        asok_request(sock_client, "config show", daemon_name);
+    if (config_show.size() == 0) {
+      failures++;
       continue;
     }
-    std::string config_show = asok_request(sock_client, "config show", daemon_name);
     json_object pid_file_json = boost::json::parse(config_show).as_object();
     std::string pid_path =
-      boost_string_to_std(pid_file_json["pid_file"].as_string());
+        boost_string_to_std(pid_file_json["pid_file"].as_string());
     std::string pid_str = read_file_to_string(pid_path);
     if (!pid_path.size()) {
-      continue;
+      dout(1) << "pid path is empty; process metrics won't be fetched for: "
+              << daemon_name << dendl;
     }
     daemon_pids.push_back({daemon_name, std::stoi(pid_str)});
     json_object dump = boost::json::parse(perf_dump_response).as_object();
     json_object schema = boost::json::parse(perf_schema_response).as_object();
     for (auto &perf : schema) {
-      auto sv = perf.key();
-      std::string perf_group = {sv.begin(), sv.end()};
+      std::string perf_group = {perf.key().begin(), perf.key().end()};
       json_object perf_group_object = perf.value().as_object();
       for (auto &perf_counter : perf_group_object) {
-        auto sv1 = perf_counter.key();
-        std::string perf_name = {sv1.begin(), sv1.end()};
+        std::string perf_name = {perf_counter.key().begin(),
+                                 perf_counter.key().end()};
         json_object perf_info = perf_counter.value().as_object();
         auto prio_limit = g_conf().get_val<int64_t>("exporter_prio_limit");
-        if (perf_info["priority"].as_int64() <
-            prio_limit) {
+        if (perf_info["priority"].as_int64() < prio_limit) {
           continue;
         }
         std::string name = "ceph_" + perf_group + "_" + perf_name;
@@ -143,10 +156,12 @@ void DaemonMetricCollector::dump_asok_metrics() {
       }
     }
   }
-  dout(10) << "Perf counters retrieved for " << clients.size() << " daemons." << dendl;
+  dout(10) << "Perf counters retrieved for " << clients.size() - failures << "/"
+           << clients.size() << " daemons." << dendl;
   // get time spent on this function
   timer.stop();
-  std::string scrap_desc("Time spent scraping and transforming perfcounters to metrics");
+  std::string scrap_desc(
+      "Time spent scraping and transforming perf counters to metrics");
   labels_t scrap_labels;
   scrap_labels["host"] = quote(ceph_get_hostname());
   scrap_labels["function"] = quote(__FUNCTION__);
@@ -154,7 +169,10 @@ void DaemonMetricCollector::dump_asok_metrics() {
              "gauge", scrap_labels);
 
   const std::lock_guard<std::mutex> lock(metrics_mutex);
-  get_process_metrics(daemon_pids);
+  // only get metrics if there's pid path for some or all daemons isn't empty
+  if (daemon_pids.size() != 0) {
+    get_process_metrics(daemon_pids);
+  }
   metrics = builder->dump();
 }
 
@@ -181,7 +199,8 @@ struct pstat read_pid_stat(int pid) {
   return stat;
 }
 
-void DaemonMetricCollector::get_process_metrics(std::vector<std::pair<std::string, int>> daemon_pids) {
+void DaemonMetricCollector::get_process_metrics(
+    std::vector<std::pair<std::string, int>> daemon_pids) {
   std::string path("/proc");
   std::stringstream ss;
   for (auto &[daemon_name, pid] : daemon_pids) {
@@ -194,7 +213,7 @@ void DaemonMetricCollector::get_process_metrics(std::vector<std::pair<std::strin
     double total_time_seconds = user_time + kernel_time;
     double uptime = std::stod(uptimes[0]);
     double elapsed_time = uptime - start_time_seconds;
-    double idle_time = elapsed_time  - total_time_seconds;
+    double idle_time = elapsed_time - total_time_seconds;
     double usage = total_time_seconds * 100 / elapsed_time;
 
     labels_t labels;
@@ -205,8 +224,8 @@ void DaemonMetricCollector::get_process_metrics(std::vector<std::pair<std::strin
                "Number of major page faults of daemon", "counter", labels);
     add_metric(builder, stat.num_threads, "ceph_exporter_num_threads",
                "Number of threads used by daemon", "gauge", labels);
-    add_metric(builder, usage, "ceph_exporter_cpu_usage", "CPU usage of a daemon",
-               "gauge", labels);
+    add_metric(builder, usage, "ceph_exporter_cpu_usage",
+               "CPU usage of a daemon", "gauge", labels);
 
     std::string cpu_time_desc = "Process time in kernel/user/idle mode";
     labels_t cpu_total_labels;
@@ -220,21 +239,22 @@ void DaemonMetricCollector::get_process_metrics(std::vector<std::pair<std::strin
     cpu_total_labels["mode"] = quote("idle");
     add_metric(builder, idle_time, "ceph_exporter_cpu_total", cpu_time_desc,
                "counter", cpu_total_labels);
-    add_metric(builder, stat.vm_size, "ceph_exporter_vm_size", "Virtual memory used in a daemon",
-               "gauge", labels);
+    add_metric(builder, stat.vm_size, "ceph_exporter_vm_size",
+               "Virtual memory used in a daemon", "gauge", labels);
     add_metric(builder, stat.resident_size, "ceph_exporter_resident_size",
                "Resident memory in a daemon", "gauge", labels);
   }
 }
 
 std::string DaemonMetricCollector::asok_request(AdminSocketClient &asok,
-                                                std::string command, std::string daemon_name) {
+                                                std::string command,
+                                                std::string daemon_name) {
   std::string request("{\"prefix\": \"" + command + "\"}");
   std::string response;
   std::string err = asok.do_request(request, &response);
   if (err.length() > 0 || response.substr(0, 5) == "ERROR") {
-    dout(1) << "command " << command << "failed for daemon " << daemon_name 
-      << "with error: " << err << dendl;
+    dout(1) << "command " << command << "failed for daemon " << daemon_name
+            << "with error: " << err << dendl;
     return "";
   }
   return response;
@@ -253,8 +273,9 @@ DaemonMetricCollector::get_labels_and_metric_name(std::string daemon_name,
   } else {
     labels["ceph_daemon"] = quote(daemon_name);
     if (daemon_name.find("rbd-mirror") != std::string::npos) {
-      std::regex re("^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/"
-                    ")?)(.*)\\.(replay(?:_bytes|_latency)?)$");
+      std::regex re(
+          "^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/"
+          ")?)(.*)\\.(replay(?:_bytes|_latency)?)$");
       std::smatch match;
       if (std::regex_search(daemon_name, match, re) == true) {
         new_metric_name = "ceph_rbd_mirror_image_" + match.str(4);
@@ -277,9 +298,9 @@ void DaemonMetricCollector::dump_asok_metric(json_object perf_info,
                                              labels_t labels) {
   int64_t type = perf_info["type"].as_int64();
   std::string metric_type =
-    boost_string_to_std(perf_info["metric_type"].as_string());
+      boost_string_to_std(perf_info["metric_type"].as_string());
   std::string description =
-    boost_string_to_std(perf_info["description"].as_string());
+      boost_string_to_std(perf_info["description"].as_string());
 
   if (type & PERFCOUNTER_LONGRUNAVG) {
     int64_t count = perf_values.as_object()["avgcount"].as_int64();
@@ -306,16 +327,15 @@ void DaemonMetricCollector::update_sockets() {
   std::string sock_dir = g_conf().get_val<std::string>("exporter_sock_dir");
   clients.clear();
   std::filesystem::path sock_path = sock_dir;
-  if(!std::filesystem::is_directory(sock_path.parent_path())) {
+  if (!std::filesystem::is_directory(sock_path.parent_path())) {
     dout(1) << "ERROR: No such directory exist" << sock_dir << dendl;
     return;
   }
-  for (const auto &entry :
-         std::filesystem::directory_iterator(sock_dir)) {
+  for (const auto &entry : std::filesystem::directory_iterator(sock_dir)) {
     if (entry.path().extension() == ".asok") {
       std::string daemon_socket_name = entry.path().filename().string();
       std::string daemon_name =
-        daemon_socket_name.substr(0, daemon_socket_name.size() - 5);
+          daemon_socket_name.substr(0, daemon_socket_name.size() - 5);
       if (clients.find(daemon_name) == clients.end() &&
           !(daemon_name.find("mgr") != std::string::npos) &&
           !(daemon_name.find("ceph-exporter") != std::string::npos)) {
@@ -329,7 +349,6 @@ void DaemonMetricCollector::update_sockets() {
 void OrderedMetricsBuilder::add(std::string value, std::string name,
                                 std::string description, std::string mtype,
                                 labels_t labels) {
-
   if (metrics.find(name) == metrics.end()) {
     Metric metric(name, mtype, description);
     metrics[name] = std::move(metric);
@@ -348,7 +367,6 @@ std::string OrderedMetricsBuilder::dump() {
 void UnorderedMetricsBuilder::add(std::string value, std::string name,
                                   std::string description, std::string mtype,
                                   labels_t labels) {
-
   Metric metric(name, mtype, description);
   metric.add(labels, value);
   out += metric.dump() + "\n\n";