+/*
+ * CpuTrace: lightweight hardware performance counter profiling
+ *
+ * Implementation details.
+ *
+ * See detailed documentation and usage examples in:
+ * doc/dev/cputrace.rst
+ *
+ * This file contains the low-level implementation of CpuTrace,
+ * including perf_event setup, context management, and RAII
+ * profiling helpers.
+ */
+
#include "cputrace.h"
-#include "common/Formatter.h"
#include <linux/perf_event.h>
#include <asm/unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
-#include <cstring>
#include <thread>
#define PROFILE_ASSERT(x) if (!(x)) { fprintf(stderr, "Assert failed %s:%d\n", __FILE__, __LINE__); exit(1); }
static thread_local uint64_t thread_id_hash;
static thread_local bool thread_id_initialized;
static cputrace_profiler g_profiler;
+static std::unordered_map<std::string, measurement_t> g_named_measurements;
+static std::mutex g_named_measurements_lock;
struct read_format {
uint64_t nr;
}
}
-void HW_init(HW_ctx* ctx, uint64_t flags) {
+HW_ctx HW_ctx_empty = {
+ -1, -1, -1, -1, -1, -1,
+ 0, 0, 0, 0, 0
+};
+
+void HW_init(HW_ctx* ctx, cputrace_flags flags) {
struct perf_event_attr pe;
int parent_fd = -1;
setup_perf_event(&pe, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES);
open_perf_fd(ctx->fd_swi, ctx->id_swi, &pe, "SWI", -1);
parent_fd = ctx->fd_swi;
- }
- else if (flags & HW_PROFILE_CYC) {
+ } else if (flags & HW_PROFILE_CYC) {
setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES);
open_perf_fd(ctx->fd_cyc, ctx->id_cyc, &pe, "CYC", -1);
parent_fd = ctx->fd_cyc;
+ } else if (flags & HW_PROFILE_INS) {
+ setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS);
+ open_perf_fd(ctx->fd_ins, ctx->id_ins, &pe, "INS", -1);
+ parent_fd = ctx->fd_ins;
} else if (flags & HW_PROFILE_CMISS) {
setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES);
open_perf_fd(ctx->fd_cmiss, ctx->id_cmiss, &pe, "CMISS", -1);
setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES);
open_perf_fd(ctx->fd_bmiss, ctx->id_bmiss, &pe, "BMISS", -1);
parent_fd = ctx->fd_bmiss;
- } else if (flags & HW_PROFILE_INS) {
- setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS);
- open_perf_fd(ctx->fd_ins, ctx->id_ins, &pe, "INS", -1);
- parent_fd = ctx->fd_ins;
}
ctx->parent_fd = parent_fd;
}
static void collect_samples(sample_t* start, sample_t* end, cputrace_anchor* anchor) {
- if (end->swi) {
- anchor->global_results.swi += end->swi - start->swi;
- }
- if (end->cyc) {
- anchor->global_results.cyc += end->cyc - start->cyc;
- }
- if (end->cmiss) {
- anchor->global_results.cmiss += end->cmiss - start->cmiss;
- }
- if (end->bmiss) {
- anchor->global_results.bmiss += end->bmiss - start->bmiss;
- }
- if (end->ins) {
- anchor->global_results.ins += end->ins - start->ins;
- }
+ sample_t elapsed = *end - *start;
+ anchor->global_results.sample(elapsed);
}
-HW_profile::HW_profile(const char* function, uint64_t index, uint64_t flags)
+HW_profile::HW_profile(const char* function, uint64_t index, cputrace_flags flags)
: function(function), index(index), flags(flags) {
- if (index >= CPUTRACE_MAX_ANCHORS || !g_profiler.profiling)
+ pthread_mutex_lock(&g_profiler.global_lock);
+ if (index >= CPUTRACE_MAX_ANCHORS || !g_profiler.profiling) {
+ pthread_mutex_unlock(&g_profiler.global_lock);
return;
-
+ }
+ pthread_mutex_unlock(&g_profiler.global_lock);
uint64_t tid = get_thread_id();
-
cputrace_anchor& anchor = g_profiler.anchors[index];
pthread_mutex_lock(&anchor.lock);
anchor.name = function;
}
HW_profile::~HW_profile() {
- if (!g_profiler.profiling || index >= CPUTRACE_MAX_ANCHORS)
- return;
-
cputrace_anchor& anchor = g_profiler.anchors[index];
uint64_t tid = get_thread_id();
-
+ pthread_mutex_lock(&g_profiler.global_lock);
+ if (!g_profiler.profiling || index >= CPUTRACE_MAX_ANCHORS){
+ pthread_mutex_lock(&anchor.lock);
+ anchor.is_capturing[tid] = false;
+ pthread_mutex_unlock(&anchor.lock);
+ pthread_mutex_unlock(&g_profiler.global_lock);
+ return;
+ }
+ pthread_mutex_unlock(&g_profiler.global_lock);
pthread_mutex_lock(&anchor.lock);
anchor.nest_level[tid]--;
if (anchor.nest_level[tid] == 0) {
HW_read(ctx, &anchor.end[tid]);
collect_samples(&anchor.start[tid], &anchor.end[tid], &anchor);
- std::memcpy(&anchor.start[tid], &anchor.end[tid], sizeof(anchor.start[tid]));
+ anchor.start[tid] = anchor.end[tid];
anchor.is_capturing[tid] = false;
}
pthread_mutex_unlock(&anchor.lock);
}
+measurement_t* get_named_measurement(const std::string& name) {
+ std::lock_guard<std::mutex> g(g_named_measurements_lock);
+ return &g_named_measurements[name];
+}
+
+HW_named_guard::HW_named_guard(const char* name, HW_ctx* ctx)
+ : name(name)
+{
+ measurement_t* meas = get_named_measurement(name);
+ guard = new HW_guard(ctx, meas);
+}
+
+HW_named_guard::~HW_named_guard() {
+ delete guard;
+}
+
void cputrace_start() {
pthread_mutex_lock(&g_profiler.global_lock);
if (g_profiler.profiling) {
for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
if (!g_profiler.anchors[i].name) continue;
pthread_mutex_lock(&g_profiler.anchors[i].lock);
- g_profiler.anchors[i].global_results = results{};
+ g_profiler.anchors[i].global_results.reset();
pthread_mutex_unlock(&g_profiler.anchors[i].lock);
}
pthread_mutex_unlock(&g_profiler.global_lock);
for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
if (!g_profiler.anchors[i].name) continue;
pthread_mutex_lock(&g_profiler.anchors[i].lock);
- g_profiler.anchors[i].global_results = results{};
+ g_profiler.anchors[i].global_results.reset();
pthread_mutex_unlock(&g_profiler.anchors[i].lock);
}
f->open_object_section("cputrace_reset");
pthread_mutex_lock(&anchor.lock);
for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
- if (anchor.is_capturing[j]) {
+ if (anchor.is_capturing[j] && g_profiler.profiling) {
HW_read(anchor.active_contexts[j], &anchor.end[j]);
collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
- std::memcpy(&anchor.start[j], &anchor.end[j], sizeof(anchor.start[j]));
+ anchor.start[j] = anchor.end[j];
}
}
pthread_mutex_unlock(&anchor.lock);
f->open_object_section(anchor.name);
- f->dump_unsigned("call_count", anchor.global_results.call_count);
-
- if (anchor.flags & HW_PROFILE_SWI && (counter.empty() || counter == "context_switches")) {
- f->dump_unsigned("context_switches", anchor.global_results.swi);
- if (anchor.global_results.call_count)
- f->dump_float("avg_context_switches", (double)anchor.global_results.swi / anchor.global_results.call_count);
- }
- if (anchor.flags & HW_PROFILE_CYC && (counter.empty() || counter == "cpu_cycles")) {
- f->dump_unsigned("cpu_cycles", anchor.global_results.cyc);
- if (anchor.global_results.call_count)
- f->dump_float("avg_cpu_cycles", (double)anchor.global_results.cyc / anchor.global_results.call_count);
- }
- if (anchor.flags & HW_PROFILE_CMISS && (counter.empty() || counter == "cache_misses")) {
- f->dump_unsigned("cache_misses", anchor.global_results.cmiss);
- if (anchor.global_results.call_count)
- f->dump_float("avg_cache_misses", (double)anchor.global_results.cmiss / anchor.global_results.call_count);
- }
- if (anchor.flags & HW_PROFILE_BMISS && (counter.empty() || counter == "branch_misses")) {
- f->dump_unsigned("branch_misses", anchor.global_results.bmiss);
- if (anchor.global_results.call_count)
- f->dump_float("avg_branch_misses", (double)anchor.global_results.bmiss / anchor.global_results.call_count);
- }
- if (anchor.flags & HW_PROFILE_INS && (counter.empty() || counter == "instructions")) {
- f->dump_unsigned("instructions", anchor.global_results.ins);
- if (anchor.global_results.call_count)
- f->dump_float("avg_instructions", (double)anchor.global_results.ins / anchor.global_results.call_count);
- }
+ anchor.global_results.dump(f, anchor.flags, counter);
f->close_section();
dumped = true;
}
if (anchor.is_capturing[j]) {
HW_read(anchor.active_contexts[j], &anchor.end[j]);
collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
- std::memcpy(&anchor.start[j], &anchor.end[j], sizeof(anchor.start[j]));
+ anchor.start[j] = anchor.end[j];
}
}
pthread_mutex_unlock(&anchor.lock);
ss << " " << anchor.name << ":\n";
- ss << " call_count: " << anchor.global_results.call_count << "\n";
-
- if (anchor.flags & HW_PROFILE_SWI) {
- ss << " context_switches: " << anchor.global_results.swi;
- if (anchor.global_results.call_count) {
- ss << "\n avg_context_switches: " << (double)anchor.global_results.swi / anchor.global_results.call_count;
- }
- ss << "\n";
- }
- if (anchor.flags & HW_PROFILE_CYC) {
- ss << " cpu_cycles: " << anchor.global_results.cyc;
- if (anchor.global_results.call_count) {
- ss << "\n avg_cpu_cycles: " << (double)anchor.global_results.cyc / anchor.global_results.call_count;
- }
- ss << "\n";
- }
- if (anchor.flags & HW_PROFILE_CMISS) {
- ss << " cache_misses: " << anchor.global_results.cmiss;
- if (anchor.global_results.call_count) {
- ss << "\n avg_cache_misses: " << (double)anchor.global_results.cmiss / anchor.global_results.call_count;
- }
- ss << "\n";
- }
- if (anchor.flags & HW_PROFILE_BMISS) {
- ss << " branch_misses: " << anchor.global_results.bmiss;
- if (anchor.global_results.call_count) {
- ss << "\n avg_branch_misses: " << (double)anchor.global_results.bmiss / anchor.global_results.call_count;
- }
- ss << "\n";
- }
- if (anchor.flags & HW_PROFILE_INS) {
- ss << " instructions: " << anchor.global_results.ins;
- if (anchor.global_results.call_count) {
- ss << "\n avg_instructions: " << (double)anchor.global_results.ins / anchor.global_results.call_count;
- }
- ss << "\n";
- }
+ anchor.global_results.dump_to_stringstream(ss, anchor.flags);
dumped = true;
}
+#ifndef CPUTRACE_H
#define CPUTRACE_H
-#ifdef CPUTRACE_H
+
+/*
+ * CpuTrace: lightweight hardware performance counter profiling
+ *
+ * See detailed documentation and usage examples in:
+ * doc/dev/cputrace.rst
+ *
+ * This header provides the public interface for CpuTrace,
+ * including profiling helpers (HW_profile, HW_guard),
+ * measurement structures, and low-level initialization routines.
+ */
#include <pthread.h>
#include <stdint.h>
#include <string>
+#include <unordered_map>
+#include <mutex>
#include "common/Formatter.h"
#define CPUTRACE_MAX_ANCHORS 10
HW_PROFILE_INS = (1ULL << 4),
};
-#define HWProfileFunctionF(var, name, flags) HW_profile var(name, __COUNTER__ + 1, flags)
+inline cputrace_flags operator|(cputrace_flags a, cputrace_flags b) {
+ return static_cast<cputrace_flags>(
+ static_cast<uint64_t>(a) | static_cast<uint64_t>(b));
+}
-struct results {
- uint64_t call_count;
- uint64_t swi;
- uint64_t cyc;
- uint64_t cmiss;
- uint64_t bmiss;
- uint64_t ins;
-};
+inline cputrace_flags operator&(cputrace_flags a, cputrace_flags b) {
+ return static_cast<cputrace_flags>(
+ static_cast<uint64_t>(a) & static_cast<uint64_t>(b));
+}
+
+#define HWProfileFunctionF(var, name, flags) HW_profile var(name, __COUNTER__ + 1, flags)
struct sample_t {
uint64_t swi = 0;
uint64_t cmiss = 0;
uint64_t bmiss = 0;
uint64_t ins = 0;
+
+ void operator=(const sample_t& other) {
+ swi = other.swi;
+ cyc = other.cyc;
+ cmiss = other.cmiss;
+ bmiss = other.bmiss;
+ ins = other.ins;
+ }
+
+ sample_t operator-(const sample_t& other) const {
+ sample_t result;
+ result.swi = swi - other.swi;
+ result.cyc = cyc - other.cyc;
+ result.cmiss = cmiss - other.cmiss;
+ result.bmiss = bmiss - other.bmiss;
+ result.ins = ins - other.ins;
+ return result;
+ }
};
-struct HW_ctx {
- int parent_fd;
- int fd_swi;
- int fd_cyc;
- int fd_cmiss;
- int fd_bmiss;
- int fd_ins;
- uint64_t id_swi;
- uint64_t id_cyc;
- uint64_t id_cmiss;
- uint64_t id_bmiss;
- uint64_t id_ins;
+struct measurement_t {
+ uint64_t call_count = 0;
+ uint64_t sample_count = 0;
+ uint64_t sum_swi = 0, sum_cyc = 0, sum_cmiss = 0, sum_bmiss = 0, sum_ins = 0;
+ uint64_t non_zero_swi_count = 0;
+ uint64_t zero_swi_count = 0;
+
+ void sample(const sample_t& s) {
+ sample_count += 1;
+ if (s.swi > 0) {
+ sum_swi += s.swi;
+ non_zero_swi_count += 1;
+ }
+ if (s.swi == 0) {
+ zero_swi_count += 1;
+ }
+ sum_cyc += s.cyc;
+ sum_cmiss += s.cmiss;
+ sum_bmiss += s.bmiss;
+ sum_ins += s.ins;
+ }
+
+ void reset() {
+ call_count = 0;
+ sample_count = 0;
+ non_zero_swi_count = 0;
+ zero_swi_count = 0;
+ sum_swi = sum_cyc = sum_cmiss = sum_bmiss = sum_ins = 0;
+ }
+
+ void dump(ceph::Formatter* f, cputrace_flags flags, const std::string& counter = "") const {
+ f->open_object_section("metrics");
+ f->dump_unsigned("sample_count", sample_count);
+ if (flags & HW_PROFILE_SWI) {
+ f->open_object_section("context_switches");
+ f->dump_unsigned("non_zero_count", non_zero_swi_count);
+ f->dump_unsigned("zero_count", zero_swi_count);
+ f->dump_unsigned("total", sum_swi);
+ if (sample_count) {
+ f->dump_float("avg", (double)sum_swi / sample_count);
+ }
+ f->close_section();
+ }
+
+ auto dump_counter = [&](const std::string& name, uint64_t sum) {
+ f->open_object_section(name.c_str());
+ f->dump_unsigned("total", sum);
+ if (sample_count) {
+ f->dump_float("avg", static_cast<double>(sum) / sample_count);
+ }
+ f->close_section();
+ };
+
+ if (flags & HW_PROFILE_CYC && (counter.empty() || counter == "cpu_cycles"))
+ dump_counter("cpu_cycles", sum_cyc);
+
+ if (flags & HW_PROFILE_CMISS && (counter.empty() || counter == "cache_misses"))
+ dump_counter("cache_misses", sum_cmiss);
+
+ if (flags & HW_PROFILE_BMISS && (counter.empty() || counter == "branch_misses"))
+ dump_counter("branch_misses", sum_bmiss);
+
+ if (flags & HW_PROFILE_INS && (counter.empty() || counter == "instructions"))
+ dump_counter("instructions", sum_ins);
+
+ f->close_section();
+ }
+
+ void dump_to_stringstream(std::stringstream& ss, cputrace_flags flags) const {
+ ss << "sample_count: " << sample_count << "\n";
+ if (flags & HW_PROFILE_SWI) {
+ ss << "\ncontext_switches:\n";
+ ss << " non_zero_count: " << non_zero_swi_count << "\n";
+ ss << " zero_count: " << zero_swi_count << "\n";
+ ss << " total: " << sum_swi << "\n";
+ if (sample_count) {
+ ss << " avg : " << (double)sum_swi / sample_count << "\n";
+ }
+ }
+
+ auto dump_counter = [&](const std::string& name, uint64_t sum) {
+ ss << name << ":\n";
+ ss << " total: " << sum << "\n";
+ if (sample_count) {
+ ss << " avg : " << (double)sum / sample_count << "\n";
+ }
+ };
+
+ if (flags & HW_PROFILE_CYC)
+ dump_counter("cpu_cycles", sum_cyc);
+
+ if (flags & HW_PROFILE_CMISS)
+ dump_counter("cache_misses", sum_cmiss);
+
+ if (flags & HW_PROFILE_BMISS)
+ dump_counter("branch_misses", sum_bmiss);
+
+ if (flags & HW_PROFILE_INS)
+ dump_counter("instructions", sum_ins);
+ }
};
-constexpr HW_ctx HW_ctx_empty = {
- -1, -1, -1, -1, -1, -1,
- 0, 0, 0, 0, 0
+struct HW_ctx {
+ int parent_fd = -1;
+ int fd_swi = -1;
+ int fd_cyc = -1;
+ int fd_cmiss = -1;
+ int fd_bmiss = -1;
+ int fd_ins = -1;
+ uint64_t id_swi = 0;
+ uint64_t id_cyc = 0;
+ uint64_t id_cmiss = 0;
+ uint64_t id_bmiss = 0;
+ uint64_t id_ins = 0;
};
+extern HW_ctx HW_ctx_empty;
+
struct cputrace_anchor {
- const char* name;
- pthread_mutex_t lock;
- results global_results;
- uint64_t flags;
- HW_ctx per_thread_ctx[CPUTRACE_MAX_THREADS];
+ const char* name = nullptr;
+ pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+ measurement_t global_results{};
+ cputrace_flags flags = static_cast<cputrace_flags>(0);
+ HW_ctx per_thread_ctx[CPUTRACE_MAX_THREADS]{};
HW_ctx* active_contexts[CPUTRACE_MAX_THREADS] = {nullptr};
- sample_t start[CPUTRACE_MAX_THREADS];
- sample_t end[CPUTRACE_MAX_THREADS];
+ sample_t start[CPUTRACE_MAX_THREADS]{};
+ sample_t end[CPUTRACE_MAX_THREADS]{};
bool is_capturing[CPUTRACE_MAX_THREADS] = {false};
uint32_t nest_level[CPUTRACE_MAX_THREADS] = {0};
};
struct cputrace_profiler {
- cputrace_anchor* anchors;
- bool profiling;
- pthread_mutex_t global_lock;
+ cputrace_anchor* anchors = nullptr;
+ bool profiling = false;
+ pthread_mutex_t global_lock = PTHREAD_MUTEX_INITIALIZER;
};
class HW_profile {
public:
- HW_profile(const char* function, uint64_t index, uint64_t flags);
+ HW_profile(const char* function, uint64_t index, cputrace_flags flags);
~HW_profile();
private:
const char* function;
uint64_t index;
- uint64_t flags;
+ cputrace_flags flags;
struct HW_ctx* ctx;
};
-void HW_init(HW_ctx* ctx, uint64_t flags);
-void HW_read(HW_ctx* ctx, sample_t* mesaure);
+void HW_init(HW_ctx* ctx, cputrace_flags flags);
+void HW_read(HW_ctx* ctx, sample_t* measure);
void HW_clean(HW_ctx* ctx);
+class HW_guard {
+public:
+ HW_guard(HW_ctx* ctx, measurement_t* out_measurement)
+ : ctx(ctx), meas(out_measurement) {
+ if (ctx && meas) {
+ HW_read(ctx, &start);
+ }
+ }
+ ~HW_guard() {
+ if (ctx && meas) {
+ HW_read(ctx, &end);
+ sample_t elapsed = end - start;
+ meas->sample(elapsed);
+ }
+ }
+private:
+ HW_ctx* ctx{nullptr};
+ measurement_t* meas{nullptr};
+ sample_t start{}, end{};
+};
+
+class HW_named_guard {
+public:
+ HW_named_guard(const char* name, HW_ctx* ctx = nullptr);
+ ~HW_named_guard();
+
+private:
+ const char* name = nullptr;
+ HW_guard* guard{nullptr};
+};
+
+measurement_t* get_named_measurement(const std::string& name);
+
void cputrace_start();
void cputrace_stop();
void cputrace_reset();
void cputrace_dump(ceph::Formatter* f, const std::string& logger = "", const std::string& counter = "");
void cputrace_print_to_stringstream(std::stringstream& ss);
-#endif
\ No newline at end of file
+#endif