#include <stdlib.h>
#include <stdio.h>
#include <string.h>
+#include <cstring>
#include <thread>
#define PROFILE_ASSERT(x) if (!(x)) { fprintf(stderr, "Assert failed %s:%d\n", __FILE__, __LINE__); exit(1); }
static thread_local uint64_t thread_id_hash;
static thread_local bool thread_id_initialized;
static cputrace_profiler g_profiler;
-static HW_ctx* active_contexts[CPUTRACE_MAX_ANCHORS][CPUTRACE_MAX_THREADS] = {{nullptr}};
struct read_format {
uint64_t nr;
if (fd != -1) {
ioctl(fd, PERF_EVENT_IOC_ID, &id);
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
- } else {
+ } else {
fprintf(stderr, "Failed to open perf event for %s: %s\n", name, strerror(errno));
id = 0;
}
}
}
-static void read_perf_event(HW_ctx* ctx, cputrace_anchor* anchor) {
- pthread_mutex_lock(&anchor->lock);
- if (ctx->parent_fd != -1) {
- sample_t measure;
- HW_read(ctx, &measure);
- if (measure.swi) {
- anchor->global_results.swi += measure.swi;
- }
- if (measure.cyc) {
- anchor->global_results.cyc += measure.cyc;
- }
- if (measure.cmiss) {
- anchor->global_results.cmiss += measure.cmiss;
- }
- if (measure.bmiss) {
- anchor->global_results.bmiss += measure.bmiss;
- }
- if (measure.ins) {
- anchor->global_results.ins += measure.ins;
- }
- if (ioctl(ctx->parent_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) != 0) {
- fprintf(stderr, "Failed to reset perf counters: %s\n", strerror(errno));
- }
+static void collect_samples(sample_t* start, sample_t* end, cputrace_anchor* anchor) {
+ if (end->swi) {
+ anchor->global_results.swi += end->swi - start->swi;
+ }
+ if (end->cyc) {
+ anchor->global_results.cyc += end->cyc - start->cyc;
+ }
+ if (end->cmiss) {
+ anchor->global_results.cmiss += end->cmiss - start->cmiss;
+ }
+ if (end->bmiss) {
+ anchor->global_results.bmiss += end->bmiss - start->bmiss;
+ }
+ if (end->ins) {
+ anchor->global_results.ins += end->ins - start->ins;
}
- pthread_mutex_unlock(&anchor->lock);
}
HW_profile::HW_profile(const char* function, uint64_t index, uint64_t flags)
uint64_t tid = get_thread_id();
cputrace_anchor& anchor = g_profiler.anchors[index];
+ pthread_mutex_lock(&anchor.lock);
anchor.name = function;
anchor.flags = flags;
- pthread_mutex_lock(&anchor.lock);
anchor.global_results.call_count += 1;
- pthread_mutex_unlock(&anchor.lock);
-
- ctx = HW_ctx_empty;
- HW_init(&ctx, flags);
- pthread_mutex_lock(&g_profiler.global_lock);
- active_contexts[index][tid] = &ctx;
- pthread_mutex_unlock(&g_profiler.global_lock);
+ if (anchor.active_contexts[tid] == nullptr) {
+ ctx = &anchor.per_thread_ctx[tid];
+ *ctx = HW_ctx_empty;
+ HW_init(ctx, flags);
+ anchor.active_contexts[tid] = ctx;
+ } else {
+ ctx = anchor.active_contexts[tid];
+ }
+ anchor.nest_level[tid]++;
+ if (anchor.nest_level[tid] == 1) {
+ HW_read(ctx, &anchor.start[tid]);
+ }
+ anchor.is_capturing[tid] = true;
+ pthread_mutex_unlock(&anchor.lock);
}
HW_profile::~HW_profile() {
if (!g_profiler.profiling || index >= CPUTRACE_MAX_ANCHORS)
return;
+ cputrace_anchor& anchor = g_profiler.anchors[index];
uint64_t tid = get_thread_id();
- pthread_mutex_lock(&g_profiler.global_lock);
- read_perf_event(&ctx, &g_profiler.anchors[index]);
- HW_clean(&ctx);
- active_contexts[index][tid] = nullptr;
- pthread_mutex_unlock(&g_profiler.global_lock);
+
+ pthread_mutex_lock(&anchor.lock);
+ anchor.nest_level[tid]--;
+ if (anchor.nest_level[tid] == 0) {
+ HW_read(ctx, &anchor.end[tid]);
+ collect_samples(&anchor.start[tid], &anchor.end[tid], &anchor);
+ std::memcpy(&anchor.start[tid], &anchor.end[tid], sizeof(anchor.start[tid]));
+ anchor.is_capturing[tid] = false;
+ }
+ pthread_mutex_unlock(&anchor.lock);
}
void cputrace_start() {
bool dumped = false;
for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
- const auto& anchor = g_profiler.anchors[i];
+ cputrace_anchor& anchor = g_profiler.anchors[i];
if (!anchor.name || (!logger.empty() && anchor.name != logger)) {
continue;
}
+ pthread_mutex_lock(&anchor.lock);
for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
- if (active_contexts[i][j]) {
- read_perf_event(active_contexts[i][j], &g_profiler.anchors[i]);
+ if (anchor.is_capturing[j]) {
+ HW_read(anchor.active_contexts[j], &anchor.end[j]);
+ collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
+ std::memcpy(&anchor.start[j], &anchor.end[j], sizeof(anchor.start[j]));
}
}
+ pthread_mutex_unlock(&anchor.lock);
f->open_object_section(anchor.name);
f->dump_unsigned("call_count", anchor.global_results.call_count);
bool dumped = false;
for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
- const auto& anchor = g_profiler.anchors[i];
+ cputrace_anchor& anchor = g_profiler.anchors[i];
if (!anchor.name) {
continue;
}
+ pthread_mutex_lock(&anchor.lock);
for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
- if (active_contexts[i][j]) {
- read_perf_event(active_contexts[i][j], &g_profiler.anchors[i]);
+ if (anchor.is_capturing[j]) {
+ HW_read(anchor.active_contexts[j], &anchor.end[j]);
+ collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
+ std::memcpy(&anchor.start[j], &anchor.end[j], sizeof(anchor.start[j]));
}
}
+ pthread_mutex_unlock(&anchor.lock);
ss << " " << anchor.name << ":\n";
ss << " call_count: " << anchor.global_results.call_count << "\n";
fprintf(stderr, "Failed to initialize mutex for anchor %d: %s\n", i, strerror(errno));
exit(1);
}
+
}
if (pthread_mutex_init(&g_profiler.global_lock, nullptr) != 0) {
fprintf(stderr, "Failed to initialize global mutex: %s\n", strerror(errno));
__attribute__((destructor)) static void cputrace_fini() {
for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
+ cputrace_anchor& anchor = g_profiler.anchors[i];
+ pthread_mutex_lock(&anchor.lock);
+ for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
+ if (anchor.active_contexts[j] != nullptr) {
+ HW_clean(&anchor.per_thread_ctx[j]);
+ anchor.active_contexts[j] = nullptr;
+ anchor.is_capturing[j] = false;
+ }
+ }
+ pthread_mutex_unlock(&anchor.lock);
if (pthread_mutex_destroy(&g_profiler.anchors[i].lock) != 0) {
fprintf(stderr, "Failed to destroy mutex for anchor %d: %s\n", i, strerror(errno));
}
-#pragma once
+#define CPUTRACE_H
+#ifdef CPUTRACE_H
+
#include <pthread.h>
#include <stdint.h>
#include <string>
uint64_t ins;
};
-struct cputrace_anchor {
- const char* name;
- pthread_mutex_t lock;
- results global_results;
- uint64_t flags;
-};
-
-struct cputrace_profiler {
- cputrace_anchor* anchors;
- bool profiling;
- pthread_mutex_t global_lock;
+struct sample_t {
+ uint64_t swi = 0;
+ uint64_t cyc = 0;
+ uint64_t cmiss = 0;
+ uint64_t bmiss = 0;
+ uint64_t ins = 0;
};
struct HW_ctx {
0, 0, 0, 0, 0
};
-struct sample_t {
- uint64_t swi = 0;
- uint64_t cyc = 0;
- uint64_t cmiss = 0;
- uint64_t bmiss = 0;
- uint64_t ins = 0;
+struct cputrace_anchor {
+ const char* name;
+ pthread_mutex_t lock;
+ results global_results;
+ uint64_t flags;
+ HW_ctx per_thread_ctx[CPUTRACE_MAX_THREADS];
+ HW_ctx* active_contexts[CPUTRACE_MAX_THREADS] = {nullptr};
+ sample_t start[CPUTRACE_MAX_THREADS];
+ sample_t end[CPUTRACE_MAX_THREADS];
+ bool is_capturing[CPUTRACE_MAX_THREADS] = {false};
+ uint32_t nest_level[CPUTRACE_MAX_THREADS] = {0};
};
+struct cputrace_profiler {
+ cputrace_anchor* anchors;
+ bool profiling;
+ pthread_mutex_t global_lock;
+};
class HW_profile {
public:
const char* function;
uint64_t index;
uint64_t flags;
- struct HW_ctx ctx;
+ struct HW_ctx* ctx;
};
void HW_init(HW_ctx* ctx, uint64_t flags);
void cputrace_stop(ceph::Formatter* f);
void cputrace_reset(ceph::Formatter* f);
void cputrace_dump(ceph::Formatter* f, const std::string& logger = "", const std::string& counter = "");
-void cputrace_print_to_stringstream(std::stringstream& ss);
\ No newline at end of file
+void cputrace_print_to_stringstream(std::stringstream& ss);
+
+#endif
\ No newline at end of file