From: Avan Thakkar Date: Tue, 28 Mar 2023 13:32:47 +0000 (+0530) Subject: exporter: user only counter dump/schema commands for extacting counters X-Git-Tag: v19.0.0~1500^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F50718%2Fhead;p=ceph.git exporter: user only counter dump/schema commands for extacting counters Fixes: https://tracker.ceph.com/issues/59191 Signed-off-by: Avan Thakkar Ceph exporter no more required the output of perf dump/schema, as the ``counter dump`` command returns both labeled and unlabeled perf counters which exporter can fetch and export. Removed the ``exporter_get_labeled_counters`` confiug option as exporter will now export all the counters, labeled or unlabeled. Also the fix includes the support for renaming the metrics name of rgw multi-site and adding labels to it, similar to what is there in prometheus module. --- diff --git a/src/common/options/ceph-exporter.yaml.in b/src/common/options/ceph-exporter.yaml.in index 269dccd68cb95..798a185e96bcc 100644 --- a/src/common/options/ceph-exporter.yaml.in +++ b/src/common/options/ceph-exporter.yaml.in @@ -52,15 +52,3 @@ options: - ceph-exporter flags: - runtime -- name: exporter_get_labeled_counters - type: bool - level: advanced - desc: If true will fetch and export labeled performance counters - long_desc: Ceph perf counters now support labels to provide fine-grained - stats using ``counter dump`` command and exporter can fetch these counters - and add the labels in Prometheus format. - default: true - services: - - ceph-exporter - flags: - - runtime diff --git a/src/exporter/DaemonMetricCollector.cc b/src/exporter/DaemonMetricCollector.cc index 779519b53e9b9..4816896ad1e4d 100644 --- a/src/exporter/DaemonMetricCollector.cc +++ b/src/exporter/DaemonMetricCollector.cc @@ -96,6 +96,7 @@ void DaemonMetricCollector::dump_asok_metrics() { builder = std::unique_ptr(new UnorderedMetricsBuilder()); } + auto prio_limit = g_conf().get_val("exporter_prio_limit"); for (auto &[daemon_name, sock_client] : clients) { bool ok; sock_client.ping(&ok); @@ -103,18 +104,59 @@ void DaemonMetricCollector::dump_asok_metrics() { failures++; continue; } - std::string perf_dump_response = - asok_request(sock_client, "perf dump", daemon_name); - if (perf_dump_response.size() == 0) { - failures++; - continue; + std::string counter_dump_response = + asok_request(sock_client, "counter dump", daemon_name); + if (counter_dump_response.size() == 0) { + failures++; + continue; } - std::string perf_schema_response = - asok_request(sock_client, "perf schema", daemon_name); - if (perf_schema_response.size() == 0) { + std::string counter_schema_response = + asok_request(sock_client, "counter schema", daemon_name); + if (counter_schema_response.size() == 0) { failures++; continue; } + + json_object counter_dump = boost::json::parse(counter_dump_response).as_object(); + json_object counter_schema = boost::json::parse(counter_schema_response).as_object(); + + for (auto &labeled_perf : counter_schema) { + std::string labeled_perf_group = {labeled_perf.key().begin(), labeled_perf.key().end()}; + json_object labeled_perf_group_object = labeled_perf.value().as_object(); + auto counters = labeled_perf_group_object["counters"].as_object(); + auto counters_labels = labeled_perf_group_object["labels"].as_object(); + auto labeled_perf_group_counters = counter_dump[labeled_perf_group].as_object()["counters"].as_object(); + labels_t labels; + + for(auto &label: counters_labels) { + std::string label_key = {label.key().begin(), label.key().end()}; + labels[label_key] = quote(label.value().as_string().c_str()); + } + for (auto &counter : counters) { + json_object counter_group = counter.value().as_object(); + if (counter_group["priority"].as_int64() < prio_limit) { + continue; + } + std::string counter_name_init = {counter.key().begin(), counter.key().end()}; + std::string counter_name = "ceph_" + labeled_perf_group + "_" + counter_name_init; + std::replace_if(counter_name.begin(), counter_name.end(), is_hyphen, '_'); + + if (counters_labels.empty()) { + auto labels_and_name = get_labels_and_metric_name(daemon_name, counter_name); + labels = labels_and_name.first; + counter_name = labels_and_name.second; + } + // For now this is only required for rgw multi-site metrics + auto multisite_labels_and_name = add_fixed_name_metrics(counter_name); + if (!multisite_labels_and_name.first.empty()) { + labels.insert(multisite_labels_and_name.first.begin(), multisite_labels_and_name.first.end()); + counter_name = multisite_labels_and_name.second; + } + labels.insert({"ceph_daemon", quote(daemon_name)}); + auto perf_values = labeled_perf_group_counters.at(counter_name_init); + dump_asok_metric(counter_group, perf_values, counter_name, labels); + } + } std::string config_show = asok_request(sock_client, "config show", daemon_name); if (config_show.size() == 0) { @@ -132,76 +174,6 @@ void DaemonMetricCollector::dump_asok_metrics() { if (!pid_str.empty()) { daemon_pids.push_back({daemon_name, std::stoi(pid_str)}); } - json_object dump = boost::json::parse(perf_dump_response).as_object(); - json_object schema = boost::json::parse(perf_schema_response).as_object(); - auto prio_limit = g_conf().get_val("exporter_prio_limit"); - for (auto &perf : schema) { - std::string perf_group = {perf.key().begin(), perf.key().end()}; - json_object perf_group_object = perf.value().as_object(); - for (auto &perf_counter : perf_group_object) { - std::string perf_name = {perf_counter.key().begin(), - perf_counter.key().end()}; - json_object perf_info = perf_counter.value().as_object(); - if (perf_info["priority"].as_int64() < prio_limit) { - continue; - } - std::string name = "ceph_" + perf_group + "_" + perf_name; - std::replace_if(name.begin(), name.end(), is_hyphen, '_'); - - auto labels_and_name = get_labels_and_metric_name(daemon_name, name); - labels_t labels = labels_and_name.first; - name = labels_and_name.second; - - json_value perf_values = dump[perf_group].as_object()[perf_name]; - dump_asok_metric(perf_info, perf_values, name, labels); - } - } - // fetch labeled perf counters if config is set to true - bool labeledperf = g_conf().get_val("exporter_get_labeled_counters"); - if (labeledperf) { - std::string counter_dump_response = - asok_request(sock_client, "counter dump", daemon_name); - if (counter_dump_response.size() == 0) { - failures++; - continue; - } - std::string counter_schema_response = - asok_request(sock_client, "counter schema", daemon_name); - if (counter_schema_response.size() == 0) { - failures++; - continue; - } - - json_object counter_dump = boost::json::parse(counter_dump_response).as_object(); - json_object counter_schema = boost::json::parse(counter_schema_response).as_object(); - - for (auto &labeled_perf : counter_schema) { - std::string labeled_perf_group = {labeled_perf.key().begin(), labeled_perf.key().end()}; - json_object labeled_perf_group_object = labeled_perf.value().as_object(); - auto counters = labeled_perf_group_object["counters"].as_object(); - auto counters_labels = labeled_perf_group_object["labels"].as_object(); - auto labeled_perf_group_counters = counter_dump[labeled_perf_group].as_object()["counters"].as_object(); - labels_t labels; - - for(auto &label: counters_labels) { - std::string label_key = {label.key().begin(), label.key().end()}; - labels[label_key] = quote(label.value().as_string().c_str()); - } - labels["ceph_daemon"] = quote(daemon_name); - for (auto &counter : counters) { - json_object counter_group = counter.value().as_object(); - if (counter_group["priority"].as_int64() < prio_limit) { - continue; - } - std::string counter_name_init = {counter.key().begin(), counter.key().end()}; - std::string counter_name = "ceph_" + labeled_perf_group + "_" + counter_name_init; - std::replace_if(counter_name.begin(), counter_name.end(), is_hyphen, '_'); - - auto perf_values = labeled_perf_group_counters.at(counter_name_init); - dump_asok_metric(counter_group, perf_values, counter_name, labels); - } - } - } } dout(10) << "Perf counters retrieved for " << clients.size() - failures << "/" << clients.size() << " daemons." << dendl; @@ -313,28 +285,53 @@ DaemonMetricCollector::get_labels_and_metric_name(std::string daemon_name, std::string new_metric_name; labels_t labels; new_metric_name = metric_name; - if (daemon_name.find("rgw") != std::string::npos) { + // In vstart cluster socket files for rgw are stored as radosgw..asok + if (daemon_name.find("radosgw") != std::string::npos) { + std::size_t pos = daemon_name.find_last_of('.'); + std::string tmp = daemon_name.substr(pos+1); + labels["instance_id"] = quote(tmp); + } + else if (daemon_name.find("rgw") != std::string::npos) { std::string tmp = daemon_name.substr(16, std::string::npos); std::string::size_type pos = tmp.find('.'); labels["instance_id"] = quote("rgw." + tmp.substr(0, pos)); - } else { - labels["ceph_daemon"] = quote(daemon_name); - if (daemon_name.find("rbd-mirror") != std::string::npos) { - std::regex re( - "^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/" - ")?)(.*)\\.(replay(?:_bytes|_latency)?)$"); - std::smatch match; - if (std::regex_search(daemon_name, match, re) == true) { - new_metric_name = "ceph_rbd_mirror_image_" + match.str(4); - labels["pool"] = quote(match.str(1)); - labels["namespace"] = quote(match.str(2)); - labels["image"] = quote(match.str(3)); - } + } + else if (daemon_name.find("rbd-mirror") != std::string::npos) { + std::regex re( + "^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/" + ")?)(.*)\\.(replay(?:_bytes|_latency)?)$"); + std::smatch match; + if (std::regex_search(daemon_name, match, re) == true) { + new_metric_name = "ceph_rbd_mirror_image_" + match.str(4); + labels["pool"] = quote(match.str(1)); + labels["namespace"] = quote(match.str(2)); + labels["image"] = quote(match.str(3)); } } return {labels, new_metric_name}; } +// Add fixed name metrics from existing ones that have details in their names +// that should be in labels (not in name). For backward compatibility, +// a new fixed name metric is created (instead of replacing)and details are put +// in new labels. Intended for RGW sync perf. counters but extendable as required. +// See: https://tracker.ceph.com/issues/45311 +std::pair +DaemonMetricCollector::add_fixed_name_metrics(std::string metric_name) { + std::string new_metric_name; + labels_t labels; + new_metric_name = metric_name; + + std::regex re("^data_sync_from_(.*)\\."); + std::smatch match; + if (std::regex_search(metric_name, match, re) == true) { + new_metric_name = std::regex_replace(metric_name, re, "from_([^.]*)', 'from_zone"); + labels["source_zone"] = quote(match.str(1)); + return {labels, new_metric_name}; + } + return {}; +} + /* perf_values can be either a int/double or a json_object. Since json_value is a wrapper of both we use that class. diff --git a/src/exporter/DaemonMetricCollector.h b/src/exporter/DaemonMetricCollector.h index c8238669c419c..3d35a9c79de7d 100644 --- a/src/exporter/DaemonMetricCollector.h +++ b/src/exporter/DaemonMetricCollector.h @@ -49,6 +49,7 @@ private: labels_t labels); std::pair get_labels_and_metric_name(std::string daemon_name, std::string metric_name); + std::pair add_fixed_name_metrics(std::string metric_name); void get_process_metrics(std::vector> daemon_pids); std::string asok_request(AdminSocketClient &asok, std::string command, std::string daemon_name); };