From: Divyansh Kamboj Date: Tue, 30 Apr 2024 07:44:10 +0000 (+0530) Subject: exporter: handle exceptions gracefully X-Git-Tag: v20.0.0~1989^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=e442ee479987f339afdc333582b6b64c66ba6846;p=ceph.git exporter: handle exceptions gracefully ceph exporter crahes, and fails to handle exceptions in dump_asok_metrics(). add try and catch blocks to handle the exceptions gracefully. Signed-off-by: Divyansh Kamboj --- diff --git a/src/exporter/DaemonMetricCollector.cc b/src/exporter/DaemonMetricCollector.cc index dda0899e18637..dccb247fb74e6 100644 --- a/src/exporter/DaemonMetricCollector.cc +++ b/src/exporter/DaemonMetricCollector.cc @@ -84,6 +84,66 @@ std::string boost_string_to_std(boost::json::string js) { std::string quote(std::string value) { return "\"" + value + "\""; } +void DaemonMetricCollector::parse_asok_metrics( + std::string &counter_dump_response, std::string &counter_schema_response, + int64_t prio_limit, const std::string &daemon_name) { + json_object counter_dump = + boost::json::parse(counter_dump_response).as_object(); + json_object counter_schema = + boost::json::parse(counter_schema_response).as_object(); + + for (auto &perf_group_item : counter_schema) { + std::string perf_group = {perf_group_item.key().begin(), + perf_group_item.key().end()}; + json_array perf_group_schema_array = perf_group_item.value().as_array(); + json_array perf_group_dump_array = counter_dump[perf_group].as_array(); + for (auto schema_itr = perf_group_schema_array.begin(), + dump_itr = perf_group_dump_array.begin(); + schema_itr != perf_group_schema_array.end() && + dump_itr != perf_group_dump_array.end(); + ++schema_itr, ++dump_itr) { + auto counters = schema_itr->at("counters").as_object(); + auto counters_labels = schema_itr->at("labels").as_object(); + auto counters_values = dump_itr->at("counters").as_object(); + labels_t labels; + + for (auto &label : counters_labels) { + std::string label_key = {label.key().begin(), label.key().end()}; + labels[label_key] = quote(label.value().as_string().c_str()); + } + for (auto &counter : counters) { + json_object counter_group = counter.value().as_object(); + if (counter_group["priority"].as_int64() < prio_limit) { + continue; + } + std::string counter_name_init = {counter.key().begin(), + counter.key().end()}; + std::string counter_name = perf_group + "_" + counter_name_init; + promethize(counter_name); + + auto extra_labels = get_extra_labels(daemon_name); + if (extra_labels.empty()) { + dout(1) << "Unable to parse instance_id from daemon_name: " + << daemon_name << dendl; + continue; + } + labels.insert(extra_labels.begin(), extra_labels.end()); + + // For now this is only required for rgw multi-site metrics + auto multisite_labels_and_name = add_fixed_name_metrics(counter_name); + if (!multisite_labels_and_name.first.empty()) { + labels.insert(multisite_labels_and_name.first.begin(), + multisite_labels_and_name.first.end()); + counter_name = multisite_labels_and_name.second; + } + auto perf_values = counters_values.at(counter_name_init); + dump_asok_metric(counter_group, perf_values, counter_name, labels); + } + } + } +} + + void DaemonMetricCollector::dump_asok_metrics(bool sort_metrics, int64_t counter_prio, bool sockClientsPing, std::string &dump_response, std::string &schema_response, @@ -125,71 +185,36 @@ void DaemonMetricCollector::dump_asok_metrics(bool sort_metrics, int64_t counter continue; } - json_object counter_dump = boost::json::parse(counter_dump_response).as_object(); - json_object counter_schema = boost::json::parse(counter_schema_response).as_object(); - - for (auto &perf_group_item : counter_schema) { - std::string perf_group = {perf_group_item.key().begin(), - perf_group_item.key().end()}; - json_array perf_group_schema_array = perf_group_item.value().as_array(); - json_array perf_group_dump_array = counter_dump[perf_group].as_array(); - for (auto schema_itr = perf_group_schema_array.begin(), - dump_itr = perf_group_dump_array.begin(); - schema_itr != perf_group_schema_array.end() && - dump_itr != perf_group_dump_array.end(); - ++schema_itr, ++dump_itr) { - auto counters = schema_itr->at("counters").as_object(); - auto counters_labels = schema_itr->at("labels").as_object(); - auto counters_values = dump_itr->at("counters").as_object(); - labels_t labels; - - for (auto &label: counters_labels) { - std::string label_key = {label.key().begin(), label.key().end()}; - labels[label_key] = quote(label.value().as_string().c_str()); - } - for (auto &counter : counters) { - json_object counter_group = counter.value().as_object(); - if (counter_group["priority"].as_int64() < prio_limit) { - continue; - } - std::string counter_name_init = {counter.key().begin(), counter.key().end()}; - std::string counter_name = perf_group + "_" + counter_name_init; - promethize(counter_name); - - auto extra_labels = get_extra_labels(daemon_name); - if (extra_labels.empty()) { - dout(1) << "Unable to parse instance_id from daemon_name: " << daemon_name << dendl; - continue; - } - labels.insert(extra_labels.begin(), extra_labels.end()); - - // For now this is only required for rgw multi-site metrics - auto multisite_labels_and_name = add_fixed_name_metrics(counter_name); - if (!multisite_labels_and_name.first.empty()) { - labels.insert(multisite_labels_and_name.first.begin(), multisite_labels_and_name.first.end()); - counter_name = multisite_labels_and_name.second; - } - auto perf_values = counters_values.at(counter_name_init); - dump_asok_metric(counter_group, perf_values, counter_name, labels); - } - } - } - std::string config_show = !config_show_response ? "" : + try { + std::string config_show = !config_show_response ? "" : asok_request(sock_client, "config show", daemon_name); - if (config_show.size() == 0) { + if (config_show.size() == 0) { + failures++; + continue; + } + json_object pid_file_json = boost::json::parse(config_show).as_object(); + std::string pid_path = + boost_string_to_std(pid_file_json["pid_file"].as_string()); + std::string pid_str = read_file_to_string(pid_path); + if (!pid_path.size()) { + dout(1) << "pid path is empty; process metrics won't be fetched for: " + << daemon_name << dendl; + } + if (!pid_str.empty()) { + daemon_pids.push_back({daemon_name, std::stoi(pid_str)}); + } + parse_asok_metrics(counter_dump_response, counter_schema_response, + prio_limit, daemon_name); + } catch (const std::invalid_argument &e) { failures++; + dout(1) << "failed to handle " << daemon_name << ": " << e.what() + << dendl; + continue; + } catch (const std::runtime_error &e) { + failures++; + dout(1) << "failed to parse json for " << daemon_name << ": " << e.what() + << dendl; continue; - } - json_object pid_file_json = boost::json::parse(config_show).as_object(); - std::string pid_path = - boost_string_to_std(pid_file_json["pid_file"].as_string()); - std::string pid_str = read_file_to_string(pid_path); - if (!pid_path.size()) { - dout(1) << "pid path is empty; process metrics won't be fetched for: " - << daemon_name << dendl; - } - if (!pid_str.empty()) { - daemon_pids.push_back({daemon_name, std::stoi(pid_str)}); } } dout(10) << "Perf counters retrieved for " << clients.size() - failures << "/" diff --git a/src/exporter/DaemonMetricCollector.h b/src/exporter/DaemonMetricCollector.h index 2dcdc9ce231e1..d2e929b4d670f 100644 --- a/src/exporter/DaemonMetricCollector.h +++ b/src/exporter/DaemonMetricCollector.h @@ -52,6 +52,9 @@ private: void dump_asok_metric(boost::json::object perf_info, boost::json::value perf_values, std::string name, labels_t labels); + void parse_asok_metrics(std::string &counter_dump_response, + std::string &counter_schema_response, + int64_t prio_limit, const std::string &daemon_name); void get_process_metrics(std::vector> daemon_pids); std::string asok_request(AdminSocketClient &asok, std::string command, std::string daemon_name); };