static std::mutex name_id_mtx;
int register_anchor(const char* name) {
- std::lock_guard<std::mutex> lock(name_id_mtx);
- auto it = name_to_id.find(name);
- ceph_assert(it == name_to_id.end());
- int id = next_id++;
- name_to_id[name] = id;
- return id;
+ std::lock_guard<std::mutex> lock(name_id_mtx);
+ auto it = name_to_id.find(name);
+ ceph_assert(it == name_to_id.end());
+ int id = next_id++;
+ name_to_id[name] = id;
+ return id;
}
std::vector<cpucounter_group*>& get_groups() {
}
struct read_format {
- uint64_t nr;
- struct values {
- uint64_t value;
- uint64_t id;
- } values[];
+ uint64_t nr;
+ struct values {
+ uint64_t value;
+ uint64_t id;
+ } values[];
};
static long perf_event_open(struct perf_event_attr* hw_event, pid_t pid,
- int cpu, int group_fd, unsigned long flags) {
- return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
+ int cpu, int group_fd, unsigned long flags) {
+ return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
}
inline int get_thread_id() {
- if (thread_id_local == -1) {
- std::lock_guard<std::mutex> lck(thread_id_mtx);
- thread_id_local = thread_next_id++ % CPUTRACE_MAX_THREADS;
- }
- return thread_id_local;
+ if (thread_id_local == -1) {
+ std::lock_guard<std::mutex> lck(thread_id_mtx);
+ thread_id_local = thread_next_id++ % CPUTRACE_MAX_THREADS;
+ }
+ return thread_id_local;
}
static void setup_perf_event(struct perf_event_attr* pe, uint32_t type, uint64_t config) {
- memset(pe, 0, sizeof(*pe));
- pe->size = sizeof(*pe);
- pe->type = type;
- pe->config = config;
- pe->disabled = 1;
- pe->exclude_kernel = 0;
- pe->exclude_hv = 1;
- pe->read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
- if (type != PERF_TYPE_SOFTWARE) {
- pe->exclude_kernel = 1;
- }
+ memset(pe, 0, sizeof(*pe));
+ pe->size = sizeof(*pe);
+ pe->type = type;
+ pe->config = config;
+ pe->disabled = 1;
+ pe->exclude_kernel = 0;
+ pe->exclude_hv = 1;
+ pe->read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
+ if (type != PERF_TYPE_SOFTWARE) {
+ pe->exclude_kernel = 1;
+ }
}
static void open_perf_fd(int& fd, uint64_t& id, struct perf_event_attr* pe, const char* name, int group_fd) {
- fd = perf_event_open(pe, gettid(), -1, group_fd, 0);
- if (fd != -1) {
- ioctl(fd, PERF_EVENT_IOC_ID, &id);
- ioctl(fd, PERF_EVENT_IOC_RESET, 0);
- } else {
- fprintf(stderr, "Failed to open perf event for %s: %s\n", name, strerror(errno));
- id = 0;
- }
+ fd = perf_event_open(pe, gettid(), -1, group_fd, 0);
+ if (fd != -1) {
+ ioctl(fd, PERF_EVENT_IOC_ID, &id);
+ ioctl(fd, PERF_EVENT_IOC_RESET, 0);
+ } else {
+ fprintf(stderr, "Failed to open perf event for %s: %s\n", name, strerror(errno));
+ id = 0;
+ }
}
static void close_perf_fd(int& fd) {
- if (fd != -1) {
- ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
- close(fd);
- fd = -1;
- }
+ if (fd != -1) {
+ ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
+ close(fd);
+ fd = -1;
+ }
}
HW_ctx HW_ctx_empty = {
- -1, -1, -1, -1, -1, -1,
- 0, 0, 0, 0, 0
-};
+ -1, -1, -1, -1, -1, -1,
+ 0, 0, 0, 0, 0
+ };
void HW_init(HW_ctx* ctx, cputrace_flags flags) {
- struct perf_event_attr pe;
- int parent_fd = -1;
-
- if (flags & HW_PROFILE_SWI) {
- setup_perf_event(&pe, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES);
- open_perf_fd(ctx->fd_swi, ctx->id_swi, &pe, "SWI", -1);
- parent_fd = ctx->fd_swi;
- } else if (flags & HW_PROFILE_CYC) {
- setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES);
- open_perf_fd(ctx->fd_cyc, ctx->id_cyc, &pe, "CYC", -1);
- parent_fd = ctx->fd_cyc;
- } else if (flags & HW_PROFILE_INS) {
- setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS);
- open_perf_fd(ctx->fd_ins, ctx->id_ins, &pe, "INS", -1);
- parent_fd = ctx->fd_ins;
- } else if (flags & HW_PROFILE_CMISS) {
- setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES);
- open_perf_fd(ctx->fd_cmiss, ctx->id_cmiss, &pe, "CMISS", -1);
- parent_fd = ctx->fd_cmiss;
- } else if (flags & HW_PROFILE_BMISS) {
- setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES);
- open_perf_fd(ctx->fd_bmiss, ctx->id_bmiss, &pe, "BMISS", -1);
- parent_fd = ctx->fd_bmiss;
- }
+ struct perf_event_attr pe;
+ int parent_fd = -1;
+
+ if (flags & HW_PROFILE_SWI) {
+ setup_perf_event(&pe, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES);
+ open_perf_fd(ctx->fd_swi, ctx->id_swi, &pe, "SWI", -1);
+ parent_fd = ctx->fd_swi;
+ } else if (flags & HW_PROFILE_CYC) {
+ setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES);
+ open_perf_fd(ctx->fd_cyc, ctx->id_cyc, &pe, "CYC", -1);
+ parent_fd = ctx->fd_cyc;
+ } else if (flags & HW_PROFILE_INS) {
+ setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS);
+ open_perf_fd(ctx->fd_ins, ctx->id_ins, &pe, "INS", -1);
+ parent_fd = ctx->fd_ins;
+ } else if (flags & HW_PROFILE_CMISS) {
+ setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES);
+ open_perf_fd(ctx->fd_cmiss, ctx->id_cmiss, &pe, "CMISS", -1);
+ parent_fd = ctx->fd_cmiss;
+ } else if (flags & HW_PROFILE_BMISS) {
+ setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES);
+ open_perf_fd(ctx->fd_bmiss, ctx->id_bmiss, &pe, "BMISS", -1);
+ parent_fd = ctx->fd_bmiss;
+ }
- ctx->parent_fd = parent_fd;
+ ctx->parent_fd = parent_fd;
- if (flags & HW_PROFILE_SWI && ctx->fd_swi == -1 && parent_fd != -1) {
- setup_perf_event(&pe, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES);
- open_perf_fd(ctx->fd_swi, ctx->id_swi, &pe, "SWI", parent_fd);
- }
- if (flags & HW_PROFILE_CYC && ctx->fd_cyc == -1 && parent_fd != -1) {
- setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES);
- open_perf_fd(ctx->fd_cyc, ctx->id_cyc, &pe, "CYC", parent_fd);
- }
- if (flags & HW_PROFILE_CMISS && ctx->fd_cmiss == -1 && parent_fd != -1) {
- setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES);
- open_perf_fd(ctx->fd_cmiss, ctx->id_cmiss, &pe, "CMISS", parent_fd);
- }
- if (flags & HW_PROFILE_BMISS && ctx->fd_bmiss == -1 && parent_fd != -1) {
- setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES);
- open_perf_fd(ctx->fd_bmiss, ctx->id_bmiss, &pe, "BMISS", parent_fd);
- }
- if (flags & HW_PROFILE_INS && ctx->fd_ins == -1 && parent_fd != -1) {
- setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS);
- open_perf_fd(ctx->fd_ins, ctx->id_ins, &pe, "INS", parent_fd);
- }
+ if (flags & HW_PROFILE_SWI && ctx->fd_swi == -1 && parent_fd != -1) {
+ setup_perf_event(&pe, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES);
+ open_perf_fd(ctx->fd_swi, ctx->id_swi, &pe, "SWI", parent_fd);
+ }
+ if (flags & HW_PROFILE_CYC && ctx->fd_cyc == -1 && parent_fd != -1) {
+ setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES);
+ open_perf_fd(ctx->fd_cyc, ctx->id_cyc, &pe, "CYC", parent_fd);
+ }
+ if (flags & HW_PROFILE_CMISS && ctx->fd_cmiss == -1 && parent_fd != -1) {
+ setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES);
+ open_perf_fd(ctx->fd_cmiss, ctx->id_cmiss, &pe, "CMISS", parent_fd);
+ }
+ if (flags & HW_PROFILE_BMISS && ctx->fd_bmiss == -1 && parent_fd != -1) {
+ setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES);
+ open_perf_fd(ctx->fd_bmiss, ctx->id_bmiss, &pe, "BMISS", parent_fd);
+ }
+ if (flags & HW_PROFILE_INS && ctx->fd_ins == -1 && parent_fd != -1) {
+ setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS);
+ open_perf_fd(ctx->fd_ins, ctx->id_ins, &pe, "INS", parent_fd);
+ }
- if (parent_fd != -1) {
- ioctl(parent_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
- ioctl(parent_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
- }
+ if (parent_fd != -1) {
+ ioctl(parent_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
+ ioctl(parent_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
+ }
}
void HW_clean(HW_ctx* ctx) {
- close_perf_fd(ctx->fd_swi);
- close_perf_fd(ctx->fd_cyc);
- close_perf_fd(ctx->fd_cmiss);
- close_perf_fd(ctx->fd_bmiss);
- close_perf_fd(ctx->fd_ins);
+ close_perf_fd(ctx->fd_swi);
+ close_perf_fd(ctx->fd_cyc);
+ close_perf_fd(ctx->fd_cmiss);
+ close_perf_fd(ctx->fd_bmiss);
+ close_perf_fd(ctx->fd_ins);
}
void HW_read(HW_ctx* ctx, sample_t* measure) {
- if (ctx->parent_fd == -1) {
- return;
- }
- static constexpr uint64_t MAX_COUNTERS = 5;
- static constexpr size_t BUFFER_SIZE =
- sizeof(read_format) + MAX_COUNTERS * sizeof(struct read_format::values);
- char buf[BUFFER_SIZE];
-
- struct read_format* rf = (struct read_format*)buf;
- if (read(ctx->parent_fd, buf, sizeof(buf)) > 0) {
- for (uint64_t i = 0; i < rf->nr; i++) {
- if (rf->values[i].id == ctx->id_swi) {
- measure->swi = rf->values[i].value;
- } else if (rf->values[i].id == ctx->id_cyc) {
- measure->cyc = rf->values[i].value;
- } else if (rf->values[i].id == ctx->id_cmiss) {
- measure->cmiss = rf->values[i].value;
- } else if (rf->values[i].id == ctx->id_bmiss) {
- measure->bmiss = rf->values[i].value;
- } else if (rf->values[i].id == ctx->id_ins) {
- measure->ins = rf->values[i].value;
- }
- }
+ if (ctx->parent_fd == -1) {
+ return;
+ }
+ static constexpr uint64_t MAX_COUNTERS = 5;
+ static constexpr size_t BUFFER_SIZE =
+ sizeof(read_format) + MAX_COUNTERS * sizeof(struct read_format::values);
+ char buf[BUFFER_SIZE];
+
+ struct read_format* rf = (struct read_format*)buf;
+ if (read(ctx->parent_fd, buf, sizeof(buf)) > 0) {
+ for (uint64_t i = 0; i < rf->nr; i++) {
+ if (rf->values[i].id == ctx->id_swi) {
+ measure->swi = rf->values[i].value;
+ } else if (rf->values[i].id == ctx->id_cyc) {
+ measure->cyc = rf->values[i].value;
+ } else if (rf->values[i].id == ctx->id_cmiss) {
+ measure->cmiss = rf->values[i].value;
+ } else if (rf->values[i].id == ctx->id_bmiss) {
+ measure->bmiss = rf->values[i].value;
+ } else if (rf->values[i].id == ctx->id_ins) {
+ measure->ins = rf->values[i].value;
+ }
}
+ }
}
static void collect_samples(sample_t* start, sample_t* end, cputrace_anchor* anchor) {
- sample_t elapsed = *end - *start;
- anchor->global_results.sample(elapsed);
+ sample_t elapsed = *end - *start;
+ anchor->global_results.sample(elapsed);
}
HW_profile::HW_profile(const char* function, uint64_t index, cputrace_flags flags)
- : function(function), index(index), flags(flags) {
- if (!g_profiler.profiling.load()) {
- return;
- }
- ceph_assert(index < CPUTRACE_MAX_ANCHORS);
- uint64_t tid = get_thread_id();
- cputrace_anchor& anchor = g_profiler.anchors[index];
- pthread_mutex_lock(&anchor.lock);
- anchor.name = function;
- anchor.flags = flags;
-
- anchor.global_results.call_count += 1;
-
- if (anchor.active_contexts[tid] == nullptr) {
- ctx = &anchor.per_thread_ctx[tid];
- *ctx = HW_ctx_empty;
- HW_init(ctx, flags);
- anchor.active_contexts[tid] = ctx;
- } else {
- ctx = anchor.active_contexts[tid];
- }
- anchor.nest_level[tid]++;
- if (anchor.nest_level[tid] == 1) {
- HW_read(ctx, &anchor.start[tid]);
- }
- anchor.is_capturing[tid] = true;
- pthread_mutex_unlock(&anchor.lock);
+ : function(function), index(index), flags(flags) {
+ if (!g_profiler.profiling.load()) {
+ return;
+ }
+ ceph_assert(index < CPUTRACE_MAX_ANCHORS);
+ uint64_t tid = get_thread_id();
+ cputrace_anchor& anchor = g_profiler.anchors[index];
+ pthread_mutex_lock(&anchor.lock);
+ anchor.name = function;
+ anchor.flags = flags;
+
+ anchor.global_results.call_count += 1;
+
+ if (anchor.active_contexts[tid] == nullptr) {
+ ctx = &anchor.per_thread_ctx[tid];
+ *ctx = HW_ctx_empty;
+ HW_init(ctx, flags);
+ anchor.active_contexts[tid] = ctx;
+ } else {
+ ctx = anchor.active_contexts[tid];
+ }
+ anchor.nest_level[tid]++;
+ if (anchor.nest_level[tid] == 1) {
+ HW_read(ctx, &anchor.start[tid]);
+ }
+ anchor.is_capturing[tid] = true;
+ pthread_mutex_unlock(&anchor.lock);
}
HW_profile::~HW_profile() {
- if (!g_profiler.profiling.load()) {
- return;
- }
- ceph_assert(index < CPUTRACE_MAX_ANCHORS);
- cputrace_anchor& anchor = g_profiler.anchors[index];
- uint64_t tid = get_thread_id();
- pthread_mutex_lock(&anchor.lock);
- anchor.nest_level[tid]--;
- if (anchor.nest_level[tid] == 0) {
- HW_read(ctx, &anchor.end[tid]);
- collect_samples(&anchor.start[tid], &anchor.end[tid], &anchor);
- anchor.start[tid] = anchor.end[tid];
- anchor.is_capturing[tid] = false;
- }
- pthread_mutex_unlock(&anchor.lock);
+ if (!g_profiler.profiling.load()) {
+ return;
+ }
+ ceph_assert(index < CPUTRACE_MAX_ANCHORS);
+ cputrace_anchor& anchor = g_profiler.anchors[index];
+ uint64_t tid = get_thread_id();
+ pthread_mutex_lock(&anchor.lock);
+ anchor.nest_level[tid]--;
+ if (anchor.nest_level[tid] == 0) {
+ HW_read(ctx, &anchor.end[tid]);
+ collect_samples(&anchor.start[tid], &anchor.end[tid], &anchor);
+ anchor.start[tid] = anchor.end[tid];
+ anchor.is_capturing[tid] = false;
+ }
+ pthread_mutex_unlock(&anchor.lock);
}
measurement_t* get_named_measurement(const std::string& name) {
- std::lock_guard<std::mutex> g(g_named_measurements_lock);
- return &g_named_measurements[name];
+ std::lock_guard<std::mutex> g(g_named_measurements_lock);
+ return &g_named_measurements[name];
}
HW_named_guard::HW_named_guard(const char* name, HW_ctx* ctx)
- : name(name),
+ : name(name),
guard(ctx, get_named_measurement(name))
{
}
}
void cputrace_start(ceph::Formatter* f) {
- if (g_profiler.profiling.load()) {
- if (f) {
- f->open_object_section("cputrace_start");
- f->dump_format("status", "Profiling already active");
- f->close_section();
- }
- return;
- }
- g_profiler.profiling = true;
+ if (g_profiler.profiling.load()) {
if (f) {
- f->open_object_section("cputrace_start");
- f->dump_format("status", "Profiling started");
- f->close_section();
+ f->open_object_section("cputrace_start");
+ f->dump_format("status", "Profiling already active");
+ f->close_section();
}
+ return;
+ }
+ g_profiler.profiling = true;
+ if (f) {
+ f->open_object_section("cputrace_start");
+ f->dump_format("status", "Profiling started");
+ f->close_section();
+ }
}
void cputrace_stop(ceph::Formatter* f) {
- if (!g_profiler.profiling.load()) {
- if (f) {
- f->open_object_section("cputrace_stop");
- f->dump_format("status", "Profiling not active");
- f->close_section();
- }
- return;
+ if (!g_profiler.profiling.load()) {
+ if (f) {
+ f->open_object_section("cputrace_stop");
+ f->dump_format("status", "Profiling not active");
+ f->close_section();
}
- for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
- cputrace_anchor& anchor = g_profiler.anchors[i];
- if (!anchor.name) {
- continue;
- }
- pthread_mutex_lock(&anchor.lock);
- for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
- if (anchor.is_capturing[j]) {
- HW_read(anchor.active_contexts[j], &anchor.end[j]);
- collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
- anchor.start[j] = anchor.end[j];
- anchor.is_capturing[j] = false;
- }
- }
- pthread_mutex_unlock(&anchor.lock);
+ return;
+ }
+ for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
+ cputrace_anchor& anchor = g_profiler.anchors[i];
+ if (!anchor.name) {
+ continue;
}
- g_profiler.profiling = false;
- if (f) {
- f->open_object_section("cputrace_stop");
- f->dump_format("status", "Profiling stopped");
- f->close_section();
+ pthread_mutex_lock(&anchor.lock);
+ for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
+ if (anchor.is_capturing[j]) {
+ HW_read(anchor.active_contexts[j], &anchor.end[j]);
+ collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
+ anchor.start[j] = anchor.end[j];
+ anchor.is_capturing[j] = false;
+ }
}
+ pthread_mutex_unlock(&anchor.lock);
+ }
+ g_profiler.profiling = false;
+ if (f) {
+ f->open_object_section("cputrace_stop");
+ f->dump_format("status", "Profiling stopped");
+ f->close_section();
+ }
}
void cputrace_reset(ceph::Formatter* f) {
- for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
- if (!g_profiler.anchors[i].name) continue;
- pthread_mutex_lock(&g_profiler.anchors[i].lock);
- g_profiler.anchors[i].global_results.reset();
- pthread_mutex_unlock(&g_profiler.anchors[i].lock);
- }
+ for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
+ if (!g_profiler.anchors[i].name) continue;
+ pthread_mutex_lock(&g_profiler.anchors[i].lock);
+ g_profiler.anchors[i].global_results.reset();
+ pthread_mutex_unlock(&g_profiler.anchors[i].lock);
+ }
{
std::lock_guard<std::mutex> lck(name_id_mtx);
for (auto& g : get_groups()) {
}
}
}
- if (f) {
- f->open_object_section("cputrace_reset");
- f->dump_format("status", "Counters reset");
- f->close_section();
- }
+ if (f) {
+ f->open_object_section("cputrace_reset");
+ f->dump_format("status", "Counters reset");
+ f->close_section();
+ }
}
void cputrace_dump(ceph::Formatter* f, const std::string& logger, const std::string& counter) {
- f->open_object_section("cputrace");
- bool dumped = false;
-
- for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
- cputrace_anchor& anchor = g_profiler.anchors[i];
- if (!anchor.name || (!logger.empty() && anchor.name != logger)) {
- continue;
- }
-
- pthread_mutex_lock(&anchor.lock);
- for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
- if (anchor.is_capturing[j] && g_profiler.profiling.load()) {
- HW_read(anchor.active_contexts[j], &anchor.end[j]);
- collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
- anchor.start[j] = anchor.end[j];
- }
- }
- pthread_mutex_unlock(&anchor.lock);
-
- f->open_object_section(anchor.name);
- anchor.global_results.dump(f, anchor.flags, counter);
- f->close_section();
- dumped = true;
+ f->open_object_section("cputrace");
+ bool dumped = false;
+
+ for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
+ cputrace_anchor& anchor = g_profiler.anchors[i];
+ if (!anchor.name || (!logger.empty() && anchor.name != logger)) {
+ continue;
+ }
+ pthread_mutex_lock(&anchor.lock);
+ for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
+ if (anchor.is_capturing[j] && g_profiler.profiling.load()) {
+ HW_read(anchor.active_contexts[j], &anchor.end[j]);
+ collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
+ anchor.start[j] = anchor.end[j];
+ }
}
+ pthread_mutex_unlock(&anchor.lock);
+
+ f->open_object_section(anchor.name);
+ anchor.global_results.dump(f, anchor.flags, counter);
+ f->close_section();
+ dumped = true;
+ }
{
std::lock_guard<std::mutex> lck(name_id_mtx);
for (auto& g : get_groups()) {
f->close_section();
dumped = true;
}
- f->close_section();
}
}
f->dump_format("status", dumped ? "Profiling data dumped" : "No profiling data available");
}
void cputrace_print_to_stringstream(std::stringstream& ss) {
- ss << "cputrace:\n";
- bool dumped = false;
-
- for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
- cputrace_anchor& anchor = g_profiler.anchors[i];
- if (!anchor.name) {
- continue;
- }
-
- pthread_mutex_lock(&anchor.lock);
- for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
- if (anchor.is_capturing[j] && g_profiler.profiling.load()) {
- HW_read(anchor.active_contexts[j], &anchor.end[j]);
- collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
- anchor.start[j] = anchor.end[j];
- }
- }
- pthread_mutex_unlock(&anchor.lock);
-
- ss << " " << anchor.name << ":\n";
- anchor.global_results.dump_to_stringstream(ss, anchor.flags);
- dumped = true;
+ ss << "cputrace:\n";
+ bool dumped = false;
+
+ for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
+ cputrace_anchor& anchor = g_profiler.anchors[i];
+ if (!anchor.name) {
+ continue;
+ }
+
+ pthread_mutex_lock(&anchor.lock);
+ for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
+ if (anchor.is_capturing[j] && g_profiler.profiling.load()) {
+ HW_read(anchor.active_contexts[j], &anchor.end[j]);
+ collect_samples(&anchor.start[j], &anchor.end[j], &anchor);
+ anchor.start[j] = anchor.end[j];
+ }
}
+ pthread_mutex_unlock(&anchor.lock);
+
+ ss << " " << anchor.name << ":\n";
+ anchor.global_results.dump_to_stringstream(ss, anchor.flags);
+ dumped = true;
+ }
- ss << "status: " << (dumped ? "Profiling data dumped" : "No profiling data available") << "\n";
+ ss << "status: " << (dumped ? "Profiling data dumped" : "No profiling data available") << "\n";
}
__attribute__((constructor)) static void cputrace_init() {
- g_profiler.anchors = (cputrace_anchor*)calloc(CPUTRACE_MAX_ANCHORS, sizeof(cputrace_anchor));
- if (!g_profiler.anchors) {
- fprintf(stderr, "Failed to allocate memory for profiler anchors: %s\n", strerror(errno));
- exit(1);
+ g_profiler.anchors = (cputrace_anchor*)calloc(CPUTRACE_MAX_ANCHORS, sizeof(cputrace_anchor));
+ if (!g_profiler.anchors) {
+ fprintf(stderr, "Failed to allocate memory for profiler anchors: %s\n", strerror(errno));
+ exit(1);
+ }
+ for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
+ if (pthread_mutex_init(&g_profiler.anchors[i].lock, nullptr) != 0) {
+ fprintf(stderr, "Failed to initialize mutex for anchor %d: %s\n", i, strerror(errno));
+ exit(1);
}
- for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
- if (pthread_mutex_init(&g_profiler.anchors[i].lock, nullptr) != 0) {
- fprintf(stderr, "Failed to initialize mutex for anchor %d: %s\n", i, strerror(errno));
- exit(1);
- }
- }
+ }
}
__attribute__((destructor)) static void cputrace_fini() {
- for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
- cputrace_anchor& anchor = g_profiler.anchors[i];
- pthread_mutex_lock(&anchor.lock);
- for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
- if (anchor.active_contexts[j] != nullptr) {
- HW_clean(&anchor.per_thread_ctx[j]);
- anchor.active_contexts[j] = nullptr;
- anchor.is_capturing[j] = false;
- }
- }
- pthread_mutex_unlock(&anchor.lock);
- if (pthread_mutex_destroy(&g_profiler.anchors[i].lock) != 0) {
- fprintf(stderr, "Failed to destroy mutex for anchor %d: %s\n", i, strerror(errno));
- }
+ for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) {
+ cputrace_anchor& anchor = g_profiler.anchors[i];
+ pthread_mutex_lock(&anchor.lock);
+ for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) {
+ if (anchor.active_contexts[j] != nullptr) {
+ HW_clean(&anchor.per_thread_ctx[j]);
+ anchor.active_contexts[j] = nullptr;
+ anchor.is_capturing[j] = false;
+ }
}
- free(g_profiler.anchors);
- g_profiler.anchors = nullptr;
+ pthread_mutex_unlock(&anchor.lock);
+ if (pthread_mutex_destroy(&g_profiler.anchors[i].lock) != 0) {
+ fprintf(stderr, "Failed to destroy mutex for anchor %d: %s\n", i, strerror(errno));
+ }
+ }
+ free(g_profiler.anchors);
+ g_profiler.anchors = nullptr;
}
hw_per_thread_ctx* hw_per_thread_ctx::get_thread_local() {
#define CPUTRACE_MAX_THREADS 64
enum cputrace_flags {
- HW_PROFILE_SWI = (1ULL << 0),
- HW_PROFILE_CYC = (1ULL << 1),
- HW_PROFILE_CMISS = (1ULL << 2),
- HW_PROFILE_BMISS = (1ULL << 3),
- HW_PROFILE_INS = (1ULL << 4),
+ HW_PROFILE_SWI = (1ULL << 0),
+ HW_PROFILE_CYC = (1ULL << 1),
+ HW_PROFILE_CMISS = (1ULL << 2),
+ HW_PROFILE_BMISS = (1ULL << 3),
+ HW_PROFILE_INS = (1ULL << 4),
};
inline cputrace_flags operator|(cputrace_flags a, cputrace_flags b) {
- return static_cast<cputrace_flags>(
- static_cast<uint64_t>(a) | static_cast<uint64_t>(b));
+ return static_cast<cputrace_flags>(
+ static_cast<uint64_t>(a) | static_cast<uint64_t>(b));
}
inline cputrace_flags operator&(cputrace_flags a, cputrace_flags b) {
- return static_cast<cputrace_flags>(
- static_cast<uint64_t>(a) & static_cast<uint64_t>(b));
+ return static_cast<cputrace_flags>(
+ static_cast<uint64_t>(a) & static_cast<uint64_t>(b));
}
int register_anchor(const char* name);
HW_profile var(name, var##_id, flags)
struct sample_t {
- uint64_t swi = 0;
- uint64_t cyc = 0;
- uint64_t cmiss = 0;
- uint64_t bmiss = 0;
- uint64_t ins = 0;
-
- void operator=(const sample_t& other) {
- swi = other.swi;
- cyc = other.cyc;
- cmiss = other.cmiss;
- bmiss = other.bmiss;
- ins = other.ins;
- }
+ uint64_t swi = 0;
+ uint64_t cyc = 0;
+ uint64_t cmiss = 0;
+ uint64_t bmiss = 0;
+ uint64_t ins = 0;
+
+ void operator=(const sample_t& other) {
+ swi = other.swi;
+ cyc = other.cyc;
+ cmiss = other.cmiss;
+ bmiss = other.bmiss;
+ ins = other.ins;
+ }
- sample_t operator-(const sample_t& other) const {
- sample_t result;
- result.swi = swi - other.swi;
- result.cyc = cyc - other.cyc;
- result.cmiss = cmiss - other.cmiss;
- result.bmiss = bmiss - other.bmiss;
- result.ins = ins - other.ins;
- return result;
- }
+ sample_t operator-(const sample_t& other) const {
+ sample_t result;
+ result.swi = swi - other.swi;
+ result.cyc = cyc - other.cyc;
+ result.cmiss = cmiss - other.cmiss;
+ result.bmiss = bmiss - other.bmiss;
+ result.ins = ins - other.ins;
+ return result;
+ }
sample_t& operator-=(const sample_t& other) {
swi -= other.swi;
cyc -= other.cyc;
};
struct measurement_t {
- uint64_t call_count = 0;
- uint64_t sample_count = 0;
- uint64_t sum_swi = 0, sum_cyc = 0, sum_cmiss = 0, sum_bmiss = 0, sum_ins = 0;
- uint64_t non_zero_swi_count = 0;
- uint64_t zero_swi_count = 0;
-
- void sample(const sample_t& s) {
- sample_count += 1;
- if (s.swi > 0) {
- sum_swi += s.swi;
- non_zero_swi_count += 1;
- }
- if (s.swi == 0) {
- zero_swi_count += 1;
- }
- sum_cyc += s.cyc;
- sum_cmiss += s.cmiss;
- sum_bmiss += s.bmiss;
- sum_ins += s.ins;
+ uint64_t call_count = 0;
+ uint64_t sample_count = 0;
+ uint64_t sum_swi = 0, sum_cyc = 0, sum_cmiss = 0, sum_bmiss = 0, sum_ins = 0;
+ uint64_t non_zero_swi_count = 0;
+ uint64_t zero_swi_count = 0;
+
+ void sample(const sample_t& s) {
+ sample_count += 1;
+ if (s.swi > 0) {
+ sum_swi += s.swi;
+ non_zero_swi_count += 1;
}
-
- void reset() {
- call_count = 0;
- sample_count = 0;
- non_zero_swi_count = 0;
- zero_swi_count = 0;
- sum_swi = sum_cyc = sum_cmiss = sum_bmiss = sum_ins = 0;
+ if (s.swi == 0) {
+ zero_swi_count += 1;
}
+ sum_cyc += s.cyc;
+ sum_cmiss += s.cmiss;
+ sum_bmiss += s.bmiss;
+ sum_ins += s.ins;
+ }
- void dump(ceph::Formatter* f, cputrace_flags flags, const std::string& counter = "") const {
- f->dump_unsigned("sample_count", sample_count);
- if (flags & HW_PROFILE_SWI) {
- f->open_object_section("context_switches");
- f->dump_unsigned("non_zero_count", non_zero_swi_count);
- f->dump_unsigned("zero_count", zero_swi_count);
- f->dump_unsigned("total", sum_swi);
- if (sample_count) {
- f->dump_float("avg", (double)sum_swi / sample_count);
- }
- f->close_section();
- }
-
- auto dump_counter = [&](const std::string& name, uint64_t sum) {
- f->open_object_section(name.c_str());
- f->dump_unsigned("total", sum);
- if (sample_count) {
- f->dump_float("avg", static_cast<double>(sum) / sample_count);
- }
- f->close_section();
- };
-
- if (flags & HW_PROFILE_CYC && (counter.empty() || counter == "cpu_cycles"))
- dump_counter("cpu_cycles", sum_cyc);
-
- if (flags & HW_PROFILE_CMISS && (counter.empty() || counter == "cache_misses"))
- dump_counter("cache_misses", sum_cmiss);
-
- if (flags & HW_PROFILE_BMISS && (counter.empty() || counter == "branch_misses"))
- dump_counter("branch_misses", sum_bmiss);
-
- if (flags & HW_PROFILE_INS && (counter.empty() || counter == "instructions"))
- dump_counter("instructions", sum_ins);
+ void reset() {
+ call_count = 0;
+ sample_count = 0;
+ non_zero_swi_count = 0;
+ zero_swi_count = 0;
+ sum_swi = sum_cyc = sum_cmiss = sum_bmiss = sum_ins = 0;
+ }
+
+ void dump(ceph::Formatter* f, cputrace_flags flags, const std::string& counter = "") const {
+ f->dump_unsigned("sample_count", sample_count);
+ if (flags & HW_PROFILE_SWI) {
+ f->open_object_section("context_switches");
+ f->dump_unsigned("non_zero_count", non_zero_swi_count);
+ f->dump_unsigned("zero_count", zero_swi_count);
+ f->dump_unsigned("total", sum_swi);
+ if (sample_count) {
+ f->dump_float("avg", (double)sum_swi / sample_count);
+ }
+ f->close_section();
}
- void dump_to_stringstream(std::stringstream& ss, cputrace_flags flags) const {
- ss << "sample_count: " << sample_count << "\n";
- if (flags & HW_PROFILE_SWI) {
- ss << "\ncontext_switches:\n";
- ss << " non_zero_count: " << non_zero_swi_count << "\n";
- ss << " zero_count: " << zero_swi_count << "\n";
- ss << " total: " << sum_swi << "\n";
- if (sample_count) {
- ss << " avg : " << (double)sum_swi / sample_count << "\n";
- }
- }
-
- auto dump_counter = [&](const std::string& name, uint64_t sum) {
- ss << name << ":\n";
- ss << " total: " << sum << "\n";
- if (sample_count) {
- ss << " avg : " << (double)sum / sample_count << "\n";
- }
- };
-
- if (flags & HW_PROFILE_CYC)
- dump_counter("cpu_cycles", sum_cyc);
-
- if (flags & HW_PROFILE_CMISS)
- dump_counter("cache_misses", sum_cmiss);
-
- if (flags & HW_PROFILE_BMISS)
- dump_counter("branch_misses", sum_bmiss);
-
- if (flags & HW_PROFILE_INS)
- dump_counter("instructions", sum_ins);
+ auto dump_counter = [&](const std::string& name, uint64_t sum) {
+ f->open_object_section(name.c_str());
+ f->dump_unsigned("total", sum);
+ if (sample_count) {
+ f->dump_float("avg", static_cast<double>(sum) / sample_count);
+ }
+ f->close_section();
+ };
+
+ if (flags & HW_PROFILE_CYC && (counter.empty() || counter == "cpu_cycles"))
+ dump_counter("cpu_cycles", sum_cyc);
+
+ if (flags & HW_PROFILE_CMISS && (counter.empty() || counter == "cache_misses"))
+ dump_counter("cache_misses", sum_cmiss);
+
+ if (flags & HW_PROFILE_BMISS && (counter.empty() || counter == "branch_misses"))
+ dump_counter("branch_misses", sum_bmiss);
+
+ if (flags & HW_PROFILE_INS && (counter.empty() || counter == "instructions"))
+ dump_counter("instructions", sum_ins);
+ }
+
+ void dump_to_stringstream(std::stringstream& ss, cputrace_flags flags) const {
+ ss << "sample_count: " << sample_count << "\n";
+ if (flags & HW_PROFILE_SWI) {
+ ss << "\ncontext_switches:\n";
+ ss << " non_zero_count: " << non_zero_swi_count << "\n";
+ ss << " zero_count: " << zero_swi_count << "\n";
+ ss << " total: " << sum_swi << "\n";
+ if (sample_count) {
+ ss << " avg : " << (double)sum_swi / sample_count << "\n";
+ }
}
+
+ auto dump_counter = [&](const std::string& name, uint64_t sum) {
+ ss << name << ":\n";
+ ss << " total: " << sum << "\n";
+ if (sample_count) {
+ ss << " avg : " << (double)sum / sample_count << "\n";
+ }
+ };
+
+ if (flags & HW_PROFILE_CYC)
+ dump_counter("cpu_cycles", sum_cyc);
+
+ if (flags & HW_PROFILE_CMISS)
+ dump_counter("cache_misses", sum_cmiss);
+
+ if (flags & HW_PROFILE_BMISS)
+ dump_counter("branch_misses", sum_bmiss);
+
+ if (flags & HW_PROFILE_INS)
+ dump_counter("instructions", sum_ins);
+ }
};
struct HW_ctx {
- int parent_fd = -1;
- int fd_swi = -1;
- int fd_cyc = -1;
- int fd_cmiss = -1;
- int fd_bmiss = -1;
- int fd_ins = -1;
- uint64_t id_swi = 0;
- uint64_t id_cyc = 0;
- uint64_t id_cmiss = 0;
- uint64_t id_bmiss = 0;
- uint64_t id_ins = 0;
+ int parent_fd = -1;
+ int fd_swi = -1;
+ int fd_cyc = -1;
+ int fd_cmiss = -1;
+ int fd_bmiss = -1;
+ int fd_ins = -1;
+ uint64_t id_swi = 0;
+ uint64_t id_cyc = 0;
+ uint64_t id_cmiss = 0;
+ uint64_t id_bmiss = 0;
+ uint64_t id_ins = 0;
};
extern HW_ctx HW_ctx_empty;
struct cputrace_anchor {
- const char* name = nullptr;
- pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
- measurement_t global_results{};
- cputrace_flags flags = static_cast<cputrace_flags>(0);
- HW_ctx per_thread_ctx[CPUTRACE_MAX_THREADS]{};
- HW_ctx* active_contexts[CPUTRACE_MAX_THREADS] = {nullptr};
- sample_t start[CPUTRACE_MAX_THREADS]{};
- sample_t end[CPUTRACE_MAX_THREADS]{};
- bool is_capturing[CPUTRACE_MAX_THREADS] = {false};
- uint32_t nest_level[CPUTRACE_MAX_THREADS] = {0};
+ const char* name = nullptr;
+ pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+ measurement_t global_results{};
+ cputrace_flags flags = static_cast<cputrace_flags>(0);
+ HW_ctx per_thread_ctx[CPUTRACE_MAX_THREADS] {};
+ HW_ctx* active_contexts[CPUTRACE_MAX_THREADS] = {nullptr};
+ sample_t start[CPUTRACE_MAX_THREADS] {};
+ sample_t end[CPUTRACE_MAX_THREADS] {};
+ bool is_capturing[CPUTRACE_MAX_THREADS] = {false};
+ uint32_t nest_level[CPUTRACE_MAX_THREADS] = {0};
};
struct cputrace_profiler {
- cputrace_anchor* anchors = nullptr;
- std::atomic<bool> profiling{false};
+ cputrace_anchor* anchors = nullptr;
+ std::atomic<bool> profiling{false};
};
class HW_profile {
public:
- HW_profile(const char* function, uint64_t index, cputrace_flags flags);
- ~HW_profile();
+ HW_profile(const char* function, uint64_t index, cputrace_flags flags);
+ ~HW_profile();
private:
- const char* function;
- uint64_t index;
- cputrace_flags flags;
- struct HW_ctx* ctx;
+ const char* function;
+ uint64_t index;
+ cputrace_flags flags;
+ struct HW_ctx* ctx;
};
void HW_init(HW_ctx* ctx, cputrace_flags flags);
class HW_guard {
public:
- HW_guard(HW_ctx* ctx, measurement_t* out_measurement)
- : ctx(ctx), meas(out_measurement) {
- if (ctx && meas) {
- HW_read(ctx, &start);
- }
+ HW_guard(HW_ctx* ctx, measurement_t* out_measurement)
+ : ctx(ctx), meas(out_measurement) {
+ if (ctx && meas) {
+ HW_read(ctx, &start);
}
- ~HW_guard() {
- if (ctx && meas) {
- HW_read(ctx, &end);
- sample_t elapsed = end - start;
- meas->sample(elapsed);
- }
+ }
+ ~HW_guard() {
+ if (ctx && meas) {
+ HW_read(ctx, &end);
+ sample_t elapsed = end - start;
+ meas->sample(elapsed);
}
+ }
private:
- HW_ctx* ctx{nullptr};
- measurement_t* meas{nullptr};
- sample_t start{}, end{};
+ HW_ctx* ctx{nullptr};
+ measurement_t* meas{nullptr};
+ sample_t start{}, end{};
};
class HW_named_guard {
public:
- HW_named_guard(const char* name, HW_ctx* ctx = nullptr);
- ~HW_named_guard();
+ HW_named_guard(const char* name, HW_ctx* ctx = nullptr);
+ ~HW_named_guard();
private:
- const char* name = nullptr;
- HW_guard guard;
+ const char* name = nullptr;
+ HW_guard guard;
};
measurement_t* get_named_measurement(const std::string& name);