]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
Handle failures in metric parsing 64335/head
authorAnmol Babu <anmolbabu@Anmols-MacBook-Pro.local>
Thu, 3 Jul 2025 13:25:39 +0000 (18:55 +0530)
committerAnmol Babu <anmolbabu@Anmols-MacBook-Pro.local>
Fri, 4 Jul 2025 01:34:56 +0000 (07:04 +0530)
fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2345460
Signed-off-by: Anmol Babu <anmolbabu@Anmols-MacBook-Pro.local>
src/exporter/DaemonMetricCollector.cc

index 80cdf24458618e6cc52e2c1b7e96014a6f37aaa0..160c75d4c0356779b678c13e8fa5c1794797223e 100644 (file)
@@ -102,62 +102,137 @@ std::string quote(std::string value) { return "\"" + value + "\""; }
 void DaemonMetricCollector::parse_asok_metrics(
     std::string &counter_dump_response, std::string &counter_schema_response,
     int64_t prio_limit, const std::string &daemon_name) {
-  json_object counter_dump =
-      boost::json::parse(counter_dump_response).as_object();
-  json_object counter_schema =
-      boost::json::parse(counter_schema_response).as_object();
-
-  for (auto &perf_group_item : counter_schema) {
-    std::string perf_group = {perf_group_item.key().begin(),
-                              perf_group_item.key().end()};
-    json_array perf_group_schema_array = perf_group_item.value().as_array();
-    json_array perf_group_dump_array = counter_dump[perf_group].as_array();
-    for (auto schema_itr = perf_group_schema_array.begin(),
-              dump_itr = perf_group_dump_array.begin();
-         schema_itr != perf_group_schema_array.end() &&
-         dump_itr != perf_group_dump_array.end();
-         ++schema_itr, ++dump_itr) {
-      auto counters = schema_itr->at("counters").as_object();
-      auto counters_labels = schema_itr->at("labels").as_object();
-      auto counters_values = dump_itr->at("counters").as_object();
-      labels_t labels;
-
-      for (auto &label : counters_labels) {
-        std::string label_key = {label.key().begin(), label.key().end()};
-        labels[label_key] = quote(label.value().as_string().c_str());
-      }
-      for (auto &counter : counters) {
-        json_object counter_group = counter.value().as_object();
-        if (counter_group["priority"].as_int64() < prio_limit) {
-          continue;
-        }
-        std::string counter_name_init = {counter.key().begin(),
-                                         counter.key().end()};
-        std::string counter_name = perf_group + "_" + counter_name_init;
-        promethize(counter_name);
-
-        auto extra_labels = get_extra_labels(daemon_name);
-        if (extra_labels.empty()) {
-          dout(1) << "Unable to parse instance_id from daemon_name: "
-                  << daemon_name << dendl;
+  try {
+    json_object counter_dump =
+        boost::json::parse(counter_dump_response).as_object();
+    json_object counter_schema =
+        boost::json::parse(counter_schema_response).as_object();
+
+    for (auto &perf_group_item : counter_schema) {
+      std::string perf_group = {perf_group_item.key().begin(),
+                                perf_group_item.key().end()};
+      json_array perf_group_schema_array = perf_group_item.value().as_array();
+      json_array perf_group_dump_array = counter_dump[perf_group].as_array();
+      for (auto schema_itr = perf_group_schema_array.begin(),
+                dump_itr = perf_group_dump_array.begin();
+           schema_itr != perf_group_schema_array.end() &&
+           dump_itr != perf_group_dump_array.end();
+           ++schema_itr, ++dump_itr) {
+        try {
+          auto counters = schema_itr->at("counters").as_object();
+          auto counters_labels = schema_itr->at("labels").as_object();
+          auto counters_values = dump_itr->at("counters").as_object();
+          labels_t labels;
+
+          for (auto &label : counters_labels) {
+            std::string label_key = {label.key().begin(), label.key().end()};
+            labels[label_key] = quote(label.value().as_string().c_str());
+          }
+          for (auto &counter : counters) {
+            try {
+              json_object counter_group = counter.value().as_object();
+              if (counter_group["priority"].as_int64() < prio_limit) {
+                continue;
+              }
+              std::string counter_name_init = {counter.key().begin(),
+                                               counter.key().end()};
+              std::string counter_name = perf_group + "_" + counter_name_init;
+              promethize(counter_name);
+
+              auto extra_labels = get_extra_labels(daemon_name);
+              if (extra_labels.empty()) {
+                dout(1) << "Unable to parse instance_id from daemon_name: "
+                        << daemon_name << dendl;
+                continue;
+              }
+              labels.insert(extra_labels.begin(), extra_labels.end());
+
+              // For now this is only required for rgw multi-site metrics
+              auto multisite_labels_and_name = add_fixed_name_metrics(counter_name);
+              if (!multisite_labels_and_name.first.empty()) {
+                labels.insert(multisite_labels_and_name.first.begin(),
+                              multisite_labels_and_name.first.end());
+                counter_name = multisite_labels_and_name.second;
+              }
+              auto perf_values = counters_values.at(counter_name_init);
+              dump_asok_metric(counter_group, perf_values, counter_name, labels);
+            } catch (const std::exception &e) {
+              dout(1) << "Exception in counter processing for " << daemon_name << ": " << e.what() << dendl;
+              continue;
+            }
+          }
+        } catch (const std::exception &e) {
+          dout(1) << "Exception in schema/dump iteration for " << daemon_name << ": " << e.what() << dendl;
           continue;
         }
-        labels.insert(extra_labels.begin(), extra_labels.end());
-
-        // For now this is only required for rgw multi-site metrics
-        auto multisite_labels_and_name = add_fixed_name_metrics(counter_name);
-        if (!multisite_labels_and_name.first.empty()) {
-          labels.insert(multisite_labels_and_name.first.begin(),
-                        multisite_labels_and_name.first.end());
-          counter_name = multisite_labels_and_name.second;
-        }
-        auto perf_values = counters_values.at(counter_name_init);
-        dump_asok_metric(counter_group, perf_values, counter_name, labels);
       }
     }
+  } catch (const std::exception &e) {
+    dout(1) << "Exception in parse_asok_metrics for " << daemon_name << ": " << e.what() << dendl;
+    return;
   }
 }
 
+/*
+perf_values can be either a int/double or a json_object. Since
+   json_value is a wrapper of both we use that class.
+ */
+void DaemonMetricCollector::dump_asok_metric(json_object perf_info,
+                                             json_value perf_values,
+                                             std::string name,
+                                             labels_t labels) {
+  try {
+    if (!perf_info.if_contains("type") ||
+        !perf_info.if_contains("metric_type") ||
+        !perf_info.if_contains("description")) {
+      dout(1) << "Missing required key in perf_info for metric: " << name << dendl;
+      return;
+    }
+    int64_t type = perf_info["type"].as_int64();
+
+    if (!perf_info["metric_type"].is_string()) {
+      dout(1) << "Missing or invalid 'metric_type' in perf_info for metric: " << name << dendl;
+      return;
+    }
+    std::string metric_type =
+        boost_string_to_std(perf_info["metric_type"].as_string());
+
+    if (!perf_info["description"].is_string()) {
+      dout(1) << "Missing or invalid 'description' in perf_info for metric: " << name << dendl;
+      return;
+    }
+    std::string description =
+        boost_string_to_std(perf_info["description"].as_string());
+
+    if (type & PERFCOUNTER_LONGRUNAVG) {
+      if (!perf_values.is_object()) {
+        dout(1) << "perf_values is not an object for metric: " << name << dendl;
+        return;
+      }
+      auto perf_obj = perf_values.as_object();
+      if (!perf_obj.if_contains("avgcount")) {
+        dout(1) << "Missing 'avgcount' in perf_values for metric: " << name << dendl;
+        return;
+      }
+      if (!perf_obj.if_contains("sum")) {
+        dout(1) << "Missing 'sum' in perf_values for metric: " << name << dendl;
+        return;
+      }
+      int64_t count = perf_obj["avgcount"].as_int64();
+      add_metric(builder, count, name + "_count", description + " Count", "counter",
+                 labels);
+      json_value sum_value = perf_obj["sum"];
+      add_double_or_int_metric(builder, sum_value, name + "_sum", description + " Total",
+                               metric_type, labels);
+    } else {
+      add_double_or_int_metric(builder, perf_values, name, description,
+                               metric_type, labels);
+    }
+  } catch (const std::exception& e) {
+    dout(1) << "Exception in dump_asok_metric for metric: " << name << ": " << e.what() << dendl;
+    return;
+  }
+}
 
 void DaemonMetricCollector::dump_asok_metrics(bool sort_metrics, int64_t counter_prio,
                                               bool sockClientsPing, std::string &dump_response,
@@ -400,33 +475,6 @@ DaemonMetricCollector::add_fixed_name_metrics(std::string metric_name) {
   return {};
 }
 
-/*
-perf_values can be either a int/double or a json_object. Since
-   json_value is a wrapper of both we use that class.
- */
-void DaemonMetricCollector::dump_asok_metric(json_object perf_info,
-                                             json_value perf_values,
-                                             std::string name,
-                                             labels_t labels) {
-  int64_t type = perf_info["type"].as_int64();
-  std::string metric_type =
-      boost_string_to_std(perf_info["metric_type"].as_string());
-  std::string description =
-      boost_string_to_std(perf_info["description"].as_string());
-
-  if (type & PERFCOUNTER_LONGRUNAVG) {
-    int64_t count = perf_values.as_object()["avgcount"].as_int64();
-    add_metric(builder, count, name + "_count", description + " Count", "counter",
-               labels);
-    json_value sum_value = perf_values.as_object()["sum"];
-    add_double_or_int_metric(builder, sum_value, name + "_sum", description + " Total",
-                             metric_type, labels);
-  } else {
-    add_double_or_int_metric(builder, perf_values, name, description,
-                             metric_type, labels);
-  }
-}
-
 void DaemonMetricCollector::update_sockets() {
   std::string sock_dir = g_conf().get_val<std::string>("exporter_sock_dir");
   clients.clear();