From: Jaya Prakash Date: Wed, 20 Aug 2025 17:28:41 +0000 (+0000) Subject: common: cputrace refactor results to measurement_t X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2491a6c607742e2b82c29bb6a3f0745b30eaae51;p=ceph.git common: cputrace refactor results to measurement_t Signed-off-by: Jaya Prakash --- diff --git a/src/common/cputrace.cc b/src/common/cputrace.cc index 615f499316f1..528c6c190352 100644 --- a/src/common/cputrace.cc +++ b/src/common/cputrace.cc @@ -1,5 +1,17 @@ +/* + * CpuTrace: lightweight hardware performance counter profiling + * + * Implementation details. + * + * See detailed documentation and usage examples in: + * doc/dev/cputrace.rst + * + * This file contains the low-level implementation of CpuTrace, + * including perf_event setup, context management, and RAII + * profiling helpers. + */ + #include "cputrace.h" -#include "common/Formatter.h" #include #include @@ -10,7 +22,6 @@ #include #include #include -#include #include #define PROFILE_ASSERT(x) if (!(x)) { fprintf(stderr, "Assert failed %s:%d\n", __FILE__, __LINE__); exit(1); } @@ -18,6 +29,8 @@ static thread_local uint64_t thread_id_hash; static thread_local bool thread_id_initialized; static cputrace_profiler g_profiler; +static std::unordered_map g_named_measurements; +static std::mutex g_named_measurements_lock; struct read_format { uint64_t nr; @@ -76,7 +89,12 @@ static void close_perf_fd(int& fd) { } } -void HW_init(HW_ctx* ctx, uint64_t flags) { +HW_ctx HW_ctx_empty = { + -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0 +}; + +void HW_init(HW_ctx* ctx, cputrace_flags flags) { struct perf_event_attr pe; int parent_fd = -1; @@ -84,11 +102,14 @@ void HW_init(HW_ctx* ctx, uint64_t flags) { setup_perf_event(&pe, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); open_perf_fd(ctx->fd_swi, ctx->id_swi, &pe, "SWI", -1); parent_fd = ctx->fd_swi; - } - else if (flags & HW_PROFILE_CYC) { + } else if (flags & HW_PROFILE_CYC) { setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); open_perf_fd(ctx->fd_cyc, ctx->id_cyc, &pe, "CYC", -1); parent_fd = ctx->fd_cyc; + } else if (flags & HW_PROFILE_INS) { + setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); + open_perf_fd(ctx->fd_ins, ctx->id_ins, &pe, "INS", -1); + parent_fd = ctx->fd_ins; } else if (flags & HW_PROFILE_CMISS) { setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); open_perf_fd(ctx->fd_cmiss, ctx->id_cmiss, &pe, "CMISS", -1); @@ -97,10 +118,6 @@ void HW_init(HW_ctx* ctx, uint64_t flags) { setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); open_perf_fd(ctx->fd_bmiss, ctx->id_bmiss, &pe, "BMISS", -1); parent_fd = ctx->fd_bmiss; - } else if (flags & HW_PROFILE_INS) { - setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); - open_perf_fd(ctx->fd_ins, ctx->id_ins, &pe, "INS", -1); - parent_fd = ctx->fd_ins; } ctx->parent_fd = parent_fd; @@ -165,30 +182,19 @@ void HW_read(HW_ctx* ctx, sample_t* measure) { } static void collect_samples(sample_t* start, sample_t* end, cputrace_anchor* anchor) { - if (end->swi) { - anchor->global_results.swi += end->swi - start->swi; - } - if (end->cyc) { - anchor->global_results.cyc += end->cyc - start->cyc; - } - if (end->cmiss) { - anchor->global_results.cmiss += end->cmiss - start->cmiss; - } - if (end->bmiss) { - anchor->global_results.bmiss += end->bmiss - start->bmiss; - } - if (end->ins) { - anchor->global_results.ins += end->ins - start->ins; - } + sample_t elapsed = *end - *start; + anchor->global_results.sample(elapsed); } -HW_profile::HW_profile(const char* function, uint64_t index, uint64_t flags) +HW_profile::HW_profile(const char* function, uint64_t index, cputrace_flags flags) : function(function), index(index), flags(flags) { - if (index >= CPUTRACE_MAX_ANCHORS || !g_profiler.profiling) + pthread_mutex_lock(&g_profiler.global_lock); + if (index >= CPUTRACE_MAX_ANCHORS || !g_profiler.profiling) { + pthread_mutex_unlock(&g_profiler.global_lock); return; - + } + pthread_mutex_unlock(&g_profiler.global_lock); uint64_t tid = get_thread_id(); - cputrace_anchor& anchor = g_profiler.anchors[index]; pthread_mutex_lock(&anchor.lock); anchor.name = function; @@ -213,23 +219,44 @@ HW_profile::HW_profile(const char* function, uint64_t index, uint64_t flags) } HW_profile::~HW_profile() { - if (!g_profiler.profiling || index >= CPUTRACE_MAX_ANCHORS) - return; - cputrace_anchor& anchor = g_profiler.anchors[index]; uint64_t tid = get_thread_id(); - + pthread_mutex_lock(&g_profiler.global_lock); + if (!g_profiler.profiling || index >= CPUTRACE_MAX_ANCHORS){ + pthread_mutex_lock(&anchor.lock); + anchor.is_capturing[tid] = false; + pthread_mutex_unlock(&anchor.lock); + pthread_mutex_unlock(&g_profiler.global_lock); + return; + } + pthread_mutex_unlock(&g_profiler.global_lock); pthread_mutex_lock(&anchor.lock); anchor.nest_level[tid]--; if (anchor.nest_level[tid] == 0) { HW_read(ctx, &anchor.end[tid]); collect_samples(&anchor.start[tid], &anchor.end[tid], &anchor); - std::memcpy(&anchor.start[tid], &anchor.end[tid], sizeof(anchor.start[tid])); + anchor.start[tid] = anchor.end[tid]; anchor.is_capturing[tid] = false; } pthread_mutex_unlock(&anchor.lock); } +measurement_t* get_named_measurement(const std::string& name) { + std::lock_guard g(g_named_measurements_lock); + return &g_named_measurements[name]; +} + +HW_named_guard::HW_named_guard(const char* name, HW_ctx* ctx) + : name(name) +{ + measurement_t* meas = get_named_measurement(name); + guard = new HW_guard(ctx, meas); +} + +HW_named_guard::~HW_named_guard() { + delete guard; +} + void cputrace_start() { pthread_mutex_lock(&g_profiler.global_lock); if (g_profiler.profiling) { @@ -287,7 +314,7 @@ void cputrace_reset() { for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) { if (!g_profiler.anchors[i].name) continue; pthread_mutex_lock(&g_profiler.anchors[i].lock); - g_profiler.anchors[i].global_results = results{}; + g_profiler.anchors[i].global_results.reset(); pthread_mutex_unlock(&g_profiler.anchors[i].lock); } pthread_mutex_unlock(&g_profiler.global_lock); @@ -298,7 +325,7 @@ void cputrace_reset(ceph::Formatter* f) { for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) { if (!g_profiler.anchors[i].name) continue; pthread_mutex_lock(&g_profiler.anchors[i].lock); - g_profiler.anchors[i].global_results = results{}; + g_profiler.anchors[i].global_results.reset(); pthread_mutex_unlock(&g_profiler.anchors[i].lock); } f->open_object_section("cputrace_reset"); @@ -320,42 +347,16 @@ void cputrace_dump(ceph::Formatter* f, const std::string& logger, const std::str pthread_mutex_lock(&anchor.lock); for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) { - if (anchor.is_capturing[j]) { + if (anchor.is_capturing[j] && g_profiler.profiling) { HW_read(anchor.active_contexts[j], &anchor.end[j]); collect_samples(&anchor.start[j], &anchor.end[j], &anchor); - std::memcpy(&anchor.start[j], &anchor.end[j], sizeof(anchor.start[j])); + anchor.start[j] = anchor.end[j]; } } pthread_mutex_unlock(&anchor.lock); f->open_object_section(anchor.name); - f->dump_unsigned("call_count", anchor.global_results.call_count); - - if (anchor.flags & HW_PROFILE_SWI && (counter.empty() || counter == "context_switches")) { - f->dump_unsigned("context_switches", anchor.global_results.swi); - if (anchor.global_results.call_count) - f->dump_float("avg_context_switches", (double)anchor.global_results.swi / anchor.global_results.call_count); - } - if (anchor.flags & HW_PROFILE_CYC && (counter.empty() || counter == "cpu_cycles")) { - f->dump_unsigned("cpu_cycles", anchor.global_results.cyc); - if (anchor.global_results.call_count) - f->dump_float("avg_cpu_cycles", (double)anchor.global_results.cyc / anchor.global_results.call_count); - } - if (anchor.flags & HW_PROFILE_CMISS && (counter.empty() || counter == "cache_misses")) { - f->dump_unsigned("cache_misses", anchor.global_results.cmiss); - if (anchor.global_results.call_count) - f->dump_float("avg_cache_misses", (double)anchor.global_results.cmiss / anchor.global_results.call_count); - } - if (anchor.flags & HW_PROFILE_BMISS && (counter.empty() || counter == "branch_misses")) { - f->dump_unsigned("branch_misses", anchor.global_results.bmiss); - if (anchor.global_results.call_count) - f->dump_float("avg_branch_misses", (double)anchor.global_results.bmiss / anchor.global_results.call_count); - } - if (anchor.flags & HW_PROFILE_INS && (counter.empty() || counter == "instructions")) { - f->dump_unsigned("instructions", anchor.global_results.ins); - if (anchor.global_results.call_count) - f->dump_float("avg_instructions", (double)anchor.global_results.ins / anchor.global_results.call_count); - } + anchor.global_results.dump(f, anchor.flags, counter); f->close_section(); dumped = true; } @@ -381,49 +382,13 @@ void cputrace_print_to_stringstream(std::stringstream& ss) { if (anchor.is_capturing[j]) { HW_read(anchor.active_contexts[j], &anchor.end[j]); collect_samples(&anchor.start[j], &anchor.end[j], &anchor); - std::memcpy(&anchor.start[j], &anchor.end[j], sizeof(anchor.start[j])); + anchor.start[j] = anchor.end[j]; } } pthread_mutex_unlock(&anchor.lock); ss << " " << anchor.name << ":\n"; - ss << " call_count: " << anchor.global_results.call_count << "\n"; - - if (anchor.flags & HW_PROFILE_SWI) { - ss << " context_switches: " << anchor.global_results.swi; - if (anchor.global_results.call_count) { - ss << "\n avg_context_switches: " << (double)anchor.global_results.swi / anchor.global_results.call_count; - } - ss << "\n"; - } - if (anchor.flags & HW_PROFILE_CYC) { - ss << " cpu_cycles: " << anchor.global_results.cyc; - if (anchor.global_results.call_count) { - ss << "\n avg_cpu_cycles: " << (double)anchor.global_results.cyc / anchor.global_results.call_count; - } - ss << "\n"; - } - if (anchor.flags & HW_PROFILE_CMISS) { - ss << " cache_misses: " << anchor.global_results.cmiss; - if (anchor.global_results.call_count) { - ss << "\n avg_cache_misses: " << (double)anchor.global_results.cmiss / anchor.global_results.call_count; - } - ss << "\n"; - } - if (anchor.flags & HW_PROFILE_BMISS) { - ss << " branch_misses: " << anchor.global_results.bmiss; - if (anchor.global_results.call_count) { - ss << "\n avg_branch_misses: " << (double)anchor.global_results.bmiss / anchor.global_results.call_count; - } - ss << "\n"; - } - if (anchor.flags & HW_PROFILE_INS) { - ss << " instructions: " << anchor.global_results.ins; - if (anchor.global_results.call_count) { - ss << "\n avg_instructions: " << (double)anchor.global_results.ins / anchor.global_results.call_count; - } - ss << "\n"; - } + anchor.global_results.dump_to_stringstream(ss, anchor.flags); dumped = true; } diff --git a/src/common/cputrace.h b/src/common/cputrace.h index 8190681415b4..2525fd9f1287 100644 --- a/src/common/cputrace.h +++ b/src/common/cputrace.h @@ -1,9 +1,22 @@ +#ifndef CPUTRACE_H #define CPUTRACE_H -#ifdef CPUTRACE_H + +/* + * CpuTrace: lightweight hardware performance counter profiling + * + * See detailed documentation and usage examples in: + * doc/dev/cputrace.rst + * + * This header provides the public interface for CpuTrace, + * including profiling helpers (HW_profile, HW_guard), + * measurement structures, and low-level initialization routines. + */ #include #include #include +#include +#include #include "common/Formatter.h" #define CPUTRACE_MAX_ANCHORS 10 @@ -17,16 +30,17 @@ enum cputrace_flags { HW_PROFILE_INS = (1ULL << 4), }; -#define HWProfileFunctionF(var, name, flags) HW_profile var(name, __COUNTER__ + 1, flags) +inline cputrace_flags operator|(cputrace_flags a, cputrace_flags b) { + return static_cast( + static_cast(a) | static_cast(b)); +} -struct results { - uint64_t call_count; - uint64_t swi; - uint64_t cyc; - uint64_t cmiss; - uint64_t bmiss; - uint64_t ins; -}; +inline cputrace_flags operator&(cputrace_flags a, cputrace_flags b) { + return static_cast( + static_cast(a) & static_cast(b)); +} + +#define HWProfileFunctionF(var, name, flags) HW_profile var(name, __COUNTER__ + 1, flags) struct sample_t { uint64_t swi = 0; @@ -34,62 +48,212 @@ struct sample_t { uint64_t cmiss = 0; uint64_t bmiss = 0; uint64_t ins = 0; + + void operator=(const sample_t& other) { + swi = other.swi; + cyc = other.cyc; + cmiss = other.cmiss; + bmiss = other.bmiss; + ins = other.ins; + } + + sample_t operator-(const sample_t& other) const { + sample_t result; + result.swi = swi - other.swi; + result.cyc = cyc - other.cyc; + result.cmiss = cmiss - other.cmiss; + result.bmiss = bmiss - other.bmiss; + result.ins = ins - other.ins; + return result; + } }; -struct HW_ctx { - int parent_fd; - int fd_swi; - int fd_cyc; - int fd_cmiss; - int fd_bmiss; - int fd_ins; - uint64_t id_swi; - uint64_t id_cyc; - uint64_t id_cmiss; - uint64_t id_bmiss; - uint64_t id_ins; +struct measurement_t { + uint64_t call_count = 0; + uint64_t sample_count = 0; + uint64_t sum_swi = 0, sum_cyc = 0, sum_cmiss = 0, sum_bmiss = 0, sum_ins = 0; + uint64_t non_zero_swi_count = 0; + uint64_t zero_swi_count = 0; + + void sample(const sample_t& s) { + sample_count += 1; + if (s.swi > 0) { + sum_swi += s.swi; + non_zero_swi_count += 1; + } + if (s.swi == 0) { + zero_swi_count += 1; + } + sum_cyc += s.cyc; + sum_cmiss += s.cmiss; + sum_bmiss += s.bmiss; + sum_ins += s.ins; + } + + void reset() { + call_count = 0; + sample_count = 0; + non_zero_swi_count = 0; + zero_swi_count = 0; + sum_swi = sum_cyc = sum_cmiss = sum_bmiss = sum_ins = 0; + } + + void dump(ceph::Formatter* f, cputrace_flags flags, const std::string& counter = "") const { + f->open_object_section("metrics"); + f->dump_unsigned("sample_count", sample_count); + if (flags & HW_PROFILE_SWI) { + f->open_object_section("context_switches"); + f->dump_unsigned("non_zero_count", non_zero_swi_count); + f->dump_unsigned("zero_count", zero_swi_count); + f->dump_unsigned("total", sum_swi); + if (sample_count) { + f->dump_float("avg", (double)sum_swi / sample_count); + } + f->close_section(); + } + + auto dump_counter = [&](const std::string& name, uint64_t sum) { + f->open_object_section(name.c_str()); + f->dump_unsigned("total", sum); + if (sample_count) { + f->dump_float("avg", static_cast(sum) / sample_count); + } + f->close_section(); + }; + + if (flags & HW_PROFILE_CYC && (counter.empty() || counter == "cpu_cycles")) + dump_counter("cpu_cycles", sum_cyc); + + if (flags & HW_PROFILE_CMISS && (counter.empty() || counter == "cache_misses")) + dump_counter("cache_misses", sum_cmiss); + + if (flags & HW_PROFILE_BMISS && (counter.empty() || counter == "branch_misses")) + dump_counter("branch_misses", sum_bmiss); + + if (flags & HW_PROFILE_INS && (counter.empty() || counter == "instructions")) + dump_counter("instructions", sum_ins); + + f->close_section(); + } + + void dump_to_stringstream(std::stringstream& ss, cputrace_flags flags) const { + ss << "sample_count: " << sample_count << "\n"; + if (flags & HW_PROFILE_SWI) { + ss << "\ncontext_switches:\n"; + ss << " non_zero_count: " << non_zero_swi_count << "\n"; + ss << " zero_count: " << zero_swi_count << "\n"; + ss << " total: " << sum_swi << "\n"; + if (sample_count) { + ss << " avg : " << (double)sum_swi / sample_count << "\n"; + } + } + + auto dump_counter = [&](const std::string& name, uint64_t sum) { + ss << name << ":\n"; + ss << " total: " << sum << "\n"; + if (sample_count) { + ss << " avg : " << (double)sum / sample_count << "\n"; + } + }; + + if (flags & HW_PROFILE_CYC) + dump_counter("cpu_cycles", sum_cyc); + + if (flags & HW_PROFILE_CMISS) + dump_counter("cache_misses", sum_cmiss); + + if (flags & HW_PROFILE_BMISS) + dump_counter("branch_misses", sum_bmiss); + + if (flags & HW_PROFILE_INS) + dump_counter("instructions", sum_ins); + } }; -constexpr HW_ctx HW_ctx_empty = { - -1, -1, -1, -1, -1, -1, - 0, 0, 0, 0, 0 +struct HW_ctx { + int parent_fd = -1; + int fd_swi = -1; + int fd_cyc = -1; + int fd_cmiss = -1; + int fd_bmiss = -1; + int fd_ins = -1; + uint64_t id_swi = 0; + uint64_t id_cyc = 0; + uint64_t id_cmiss = 0; + uint64_t id_bmiss = 0; + uint64_t id_ins = 0; }; +extern HW_ctx HW_ctx_empty; + struct cputrace_anchor { - const char* name; - pthread_mutex_t lock; - results global_results; - uint64_t flags; - HW_ctx per_thread_ctx[CPUTRACE_MAX_THREADS]; + const char* name = nullptr; + pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + measurement_t global_results{}; + cputrace_flags flags = static_cast(0); + HW_ctx per_thread_ctx[CPUTRACE_MAX_THREADS]{}; HW_ctx* active_contexts[CPUTRACE_MAX_THREADS] = {nullptr}; - sample_t start[CPUTRACE_MAX_THREADS]; - sample_t end[CPUTRACE_MAX_THREADS]; + sample_t start[CPUTRACE_MAX_THREADS]{}; + sample_t end[CPUTRACE_MAX_THREADS]{}; bool is_capturing[CPUTRACE_MAX_THREADS] = {false}; uint32_t nest_level[CPUTRACE_MAX_THREADS] = {0}; }; struct cputrace_profiler { - cputrace_anchor* anchors; - bool profiling; - pthread_mutex_t global_lock; + cputrace_anchor* anchors = nullptr; + bool profiling = false; + pthread_mutex_t global_lock = PTHREAD_MUTEX_INITIALIZER; }; class HW_profile { public: - HW_profile(const char* function, uint64_t index, uint64_t flags); + HW_profile(const char* function, uint64_t index, cputrace_flags flags); ~HW_profile(); private: const char* function; uint64_t index; - uint64_t flags; + cputrace_flags flags; struct HW_ctx* ctx; }; -void HW_init(HW_ctx* ctx, uint64_t flags); -void HW_read(HW_ctx* ctx, sample_t* mesaure); +void HW_init(HW_ctx* ctx, cputrace_flags flags); +void HW_read(HW_ctx* ctx, sample_t* measure); void HW_clean(HW_ctx* ctx); +class HW_guard { +public: + HW_guard(HW_ctx* ctx, measurement_t* out_measurement) + : ctx(ctx), meas(out_measurement) { + if (ctx && meas) { + HW_read(ctx, &start); + } + } + ~HW_guard() { + if (ctx && meas) { + HW_read(ctx, &end); + sample_t elapsed = end - start; + meas->sample(elapsed); + } + } +private: + HW_ctx* ctx{nullptr}; + measurement_t* meas{nullptr}; + sample_t start{}, end{}; +}; + +class HW_named_guard { +public: + HW_named_guard(const char* name, HW_ctx* ctx = nullptr); + ~HW_named_guard(); + +private: + const char* name = nullptr; + HW_guard* guard{nullptr}; +}; + +measurement_t* get_named_measurement(const std::string& name); + void cputrace_start(); void cputrace_stop(); void cputrace_reset(); @@ -99,4 +263,4 @@ void cputrace_reset(ceph::Formatter* f); void cputrace_dump(ceph::Formatter* f, const std::string& logger = "", const std::string& counter = ""); void cputrace_print_to_stringstream(std::stringstream& ss); -#endif \ No newline at end of file +#endif