#include "DaemonMetricCollector.h"
-#include "common/admin_socket_client.h"
-#include "common/debug.h"
-#include "common/hostname.h"
-#include "common/perf_counters.h"
-#include "global/global_init.h"
-#include "global/global_context.h"
-#include "common/split.h"
-#include "include/common_fwd.h"
-#include "util.h"
#include <boost/json/src.hpp>
#include <chrono>
#include <string>
#include <utility>
+#include "common/admin_socket_client.h"
+#include "common/debug.h"
+#include "common/hostname.h"
+#include "common/perf_counters.h"
+#include "common/split.h"
+#include "global/global_context.h"
+#include "global/global_init.h"
+#include "include/common_fwd.h"
+#include "util.h"
+
#define dout_context g_ceph_context
#define dout_subsys ceph_subsys_ceph_exporter
std::vector<std::pair<std::string, int>> daemon_pids;
+ int failures = 0;
bool sort = g_conf().get_val<bool>("exporter_sort_metrics");
if (sort) {
- builder = std::unique_ptr<OrderedMetricsBuilder>(new OrderedMetricsBuilder());
+ builder =
+ std::unique_ptr<OrderedMetricsBuilder>(new OrderedMetricsBuilder());
} else {
- builder = std::unique_ptr<UnorderedMetricsBuilder>(new UnorderedMetricsBuilder());
+ builder =
+ std::unique_ptr<UnorderedMetricsBuilder>(new UnorderedMetricsBuilder());
}
for (auto &[daemon_name, sock_client] : clients) {
bool ok;
sock_client.ping(&ok);
if (!ok) {
+ failures++;
continue;
}
- std::string perf_dump_response = asok_request(sock_client, "perf dump", daemon_name);
+ std::string perf_dump_response =
+ asok_request(sock_client, "perf dump", daemon_name);
if (perf_dump_response.size() == 0) {
+ failures++;
continue;
}
- std::string perf_schema_response = asok_request(sock_client, "perf schema", daemon_name);
+ std::string perf_schema_response =
+ asok_request(sock_client, "perf schema", daemon_name);
if (perf_schema_response.size() == 0) {
+ failures++;
+ continue;
+ }
+ std::string config_show =
+ asok_request(sock_client, "config show", daemon_name);
+ if (config_show.size() == 0) {
+ failures++;
continue;
}
- std::string config_show = asok_request(sock_client, "config show", daemon_name);
json_object pid_file_json = boost::json::parse(config_show).as_object();
std::string pid_path =
- boost_string_to_std(pid_file_json["pid_file"].as_string());
+ boost_string_to_std(pid_file_json["pid_file"].as_string());
std::string pid_str = read_file_to_string(pid_path);
if (!pid_path.size()) {
- continue;
+ dout(1) << "pid path is empty; process metrics won't be fetched for: "
+ << daemon_name << dendl;
}
daemon_pids.push_back({daemon_name, std::stoi(pid_str)});
json_object dump = boost::json::parse(perf_dump_response).as_object();
json_object schema = boost::json::parse(perf_schema_response).as_object();
for (auto &perf : schema) {
- auto sv = perf.key();
- std::string perf_group = {sv.begin(), sv.end()};
+ std::string perf_group = {perf.key().begin(), perf.key().end()};
json_object perf_group_object = perf.value().as_object();
for (auto &perf_counter : perf_group_object) {
- auto sv1 = perf_counter.key();
- std::string perf_name = {sv1.begin(), sv1.end()};
+ std::string perf_name = {perf_counter.key().begin(),
+ perf_counter.key().end()};
json_object perf_info = perf_counter.value().as_object();
auto prio_limit = g_conf().get_val<int64_t>("exporter_prio_limit");
- if (perf_info["priority"].as_int64() <
- prio_limit) {
+ if (perf_info["priority"].as_int64() < prio_limit) {
continue;
}
std::string name = "ceph_" + perf_group + "_" + perf_name;
}
}
}
- dout(10) << "Perf counters retrieved for " << clients.size() << " daemons." << dendl;
+ dout(10) << "Perf counters retrieved for " << clients.size() - failures << "/"
+ << clients.size() << " daemons." << dendl;
// get time spent on this function
timer.stop();
- std::string scrap_desc("Time spent scraping and transforming perfcounters to metrics");
+ std::string scrap_desc(
+ "Time spent scraping and transforming perf counters to metrics");
labels_t scrap_labels;
scrap_labels["host"] = quote(ceph_get_hostname());
scrap_labels["function"] = quote(__FUNCTION__);
"gauge", scrap_labels);
const std::lock_guard<std::mutex> lock(metrics_mutex);
- get_process_metrics(daemon_pids);
+ // only get metrics if there's pid path for some or all daemons isn't empty
+ if (daemon_pids.size() != 0) {
+ get_process_metrics(daemon_pids);
+ }
metrics = builder->dump();
}
return stat;
}
-void DaemonMetricCollector::get_process_metrics(std::vector<std::pair<std::string, int>> daemon_pids) {
+void DaemonMetricCollector::get_process_metrics(
+ std::vector<std::pair<std::string, int>> daemon_pids) {
std::string path("/proc");
std::stringstream ss;
for (auto &[daemon_name, pid] : daemon_pids) {
double total_time_seconds = user_time + kernel_time;
double uptime = std::stod(uptimes[0]);
double elapsed_time = uptime - start_time_seconds;
- double idle_time = elapsed_time - total_time_seconds;
+ double idle_time = elapsed_time - total_time_seconds;
double usage = total_time_seconds * 100 / elapsed_time;
labels_t labels;
"Number of major page faults of daemon", "counter", labels);
add_metric(builder, stat.num_threads, "ceph_exporter_num_threads",
"Number of threads used by daemon", "gauge", labels);
- add_metric(builder, usage, "ceph_exporter_cpu_usage", "CPU usage of a daemon",
- "gauge", labels);
+ add_metric(builder, usage, "ceph_exporter_cpu_usage",
+ "CPU usage of a daemon", "gauge", labels);
std::string cpu_time_desc = "Process time in kernel/user/idle mode";
labels_t cpu_total_labels;
cpu_total_labels["mode"] = quote("idle");
add_metric(builder, idle_time, "ceph_exporter_cpu_total", cpu_time_desc,
"counter", cpu_total_labels);
- add_metric(builder, stat.vm_size, "ceph_exporter_vm_size", "Virtual memory used in a daemon",
- "gauge", labels);
+ add_metric(builder, stat.vm_size, "ceph_exporter_vm_size",
+ "Virtual memory used in a daemon", "gauge", labels);
add_metric(builder, stat.resident_size, "ceph_exporter_resident_size",
"Resident memory in a daemon", "gauge", labels);
}
}
std::string DaemonMetricCollector::asok_request(AdminSocketClient &asok,
- std::string command, std::string daemon_name) {
+ std::string command,
+ std::string daemon_name) {
std::string request("{\"prefix\": \"" + command + "\"}");
std::string response;
std::string err = asok.do_request(request, &response);
if (err.length() > 0 || response.substr(0, 5) == "ERROR") {
- dout(1) << "command " << command << "failed for daemon " << daemon_name
- << "with error: " << err << dendl;
+ dout(1) << "command " << command << "failed for daemon " << daemon_name
+ << "with error: " << err << dendl;
return "";
}
return response;
} else {
labels["ceph_daemon"] = quote(daemon_name);
if (daemon_name.find("rbd-mirror") != std::string::npos) {
- std::regex re("^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/"
- ")?)(.*)\\.(replay(?:_bytes|_latency)?)$");
+ std::regex re(
+ "^rbd_mirror_image_([^/]+)/(?:(?:([^/]+)/"
+ ")?)(.*)\\.(replay(?:_bytes|_latency)?)$");
std::smatch match;
if (std::regex_search(daemon_name, match, re) == true) {
new_metric_name = "ceph_rbd_mirror_image_" + match.str(4);
labels_t labels) {
int64_t type = perf_info["type"].as_int64();
std::string metric_type =
- boost_string_to_std(perf_info["metric_type"].as_string());
+ boost_string_to_std(perf_info["metric_type"].as_string());
std::string description =
- boost_string_to_std(perf_info["description"].as_string());
+ boost_string_to_std(perf_info["description"].as_string());
if (type & PERFCOUNTER_LONGRUNAVG) {
int64_t count = perf_values.as_object()["avgcount"].as_int64();
std::string sock_dir = g_conf().get_val<std::string>("exporter_sock_dir");
clients.clear();
std::filesystem::path sock_path = sock_dir;
- if(!std::filesystem::is_directory(sock_path.parent_path())) {
+ if (!std::filesystem::is_directory(sock_path.parent_path())) {
dout(1) << "ERROR: No such directory exist" << sock_dir << dendl;
return;
}
- for (const auto &entry :
- std::filesystem::directory_iterator(sock_dir)) {
+ for (const auto &entry : std::filesystem::directory_iterator(sock_dir)) {
if (entry.path().extension() == ".asok") {
std::string daemon_socket_name = entry.path().filename().string();
std::string daemon_name =
- daemon_socket_name.substr(0, daemon_socket_name.size() - 5);
+ daemon_socket_name.substr(0, daemon_socket_name.size() - 5);
if (clients.find(daemon_name) == clients.end() &&
!(daemon_name.find("mgr") != std::string::npos) &&
!(daemon_name.find("ceph-exporter") != std::string::npos)) {
void OrderedMetricsBuilder::add(std::string value, std::string name,
std::string description, std::string mtype,
labels_t labels) {
-
if (metrics.find(name) == metrics.end()) {
Metric metric(name, mtype, description);
metrics[name] = std::move(metric);
void UnorderedMetricsBuilder::add(std::string value, std::string name,
std::string description, std::string mtype,
labels_t labels) {
-
Metric metric(name, mtype, description);
metric.add(labels, value);
out += metric.dump() + "\n\n";