From: Jaya Prakash Date: Thu, 7 Aug 2025 18:32:22 +0000 (+0000) Subject: common: add simple cputrace API, refactor HW_ctx, remove arena logging X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6e05fe91086748bb63f5299108a9108e430046b1;p=ceph.git common: add simple cputrace API, refactor HW_ctx, remove arena logging Add overloads for cputrace_start(), cputrace_stop(), and cputrace_reset() that omit ceph::Formatter logging. These provide a simpler API when formatted output is not needed. Replace HW_conf with flags and add HW_ctx_empty initializer Introduce sample_t and refactor read_perf_event to use HW_read Removal of Arena Based Logging Signed-off-by: Jaya Prakash --- diff --git a/src/common/cputrace.cc b/src/common/cputrace.cc index 2be8f9c3c74..3c284b9d980 100644 --- a/src/common/cputrace.cc +++ b/src/common/cputrace.cc @@ -32,39 +32,6 @@ static long perf_event_open(struct perf_event_attr* hw_event, pid_t pid, return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); } -static Arena* arena_create(size_t size) { - void* start = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - PROFILE_ASSERT(start != MAP_FAILED); - ArenaRegion* region = (ArenaRegion*)malloc(sizeof(ArenaRegion)); - region->start = start; - region->end = (char*)start + size; - region->current = start; - region->next = nullptr; - Arena* a = (Arena*)malloc(sizeof(Arena)); - a->region = region; - return a; -} - -static void* arena_alloc(Arena* arena, size_t size) { - if ((char*)arena->region->current + size > (char*)arena->region->end) { - fprintf(stderr, "Arena allocation failed: insufficient space for %zu bytes\n", size); - return nullptr; - } - void* ptr = arena->region->current; - arena->region->current = (char*)arena->region->current + size; - return ptr; -} - -static void arena_reset(Arena* arena) { - arena->region->current = arena->region->start; -} - -static void arena_destroy(Arena* arena) { - munmap(arena->region->start, (char*)arena->region->end - (char*)arena->region->start); - free(arena->region); - free(arena); -} - static uint64_t get_thread_id() { if (!thread_id_initialized) { uint64_t tid = pthread_self(); @@ -109,32 +76,28 @@ static void close_perf_fd(int& fd) { } } -static void HW_init(HW_ctx* ctx, HW_conf* conf) { - *ctx = { -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, *conf }; -} - -static void HW_start(HW_ctx* ctx) { +void HW_init(HW_ctx* ctx, uint64_t flags) { struct perf_event_attr pe; int parent_fd = -1; - if (ctx->conf.capture_swi) { + if (flags & HW_PROFILE_SWI) { setup_perf_event(&pe, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); open_perf_fd(ctx->fd_swi, ctx->id_swi, &pe, "SWI", -1); parent_fd = ctx->fd_swi; } - else if (ctx->conf.capture_cyc) { + else if (flags & HW_PROFILE_CYC) { setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); open_perf_fd(ctx->fd_cyc, ctx->id_cyc, &pe, "CYC", -1); parent_fd = ctx->fd_cyc; - } else if (ctx->conf.capture_cmiss) { + } else if (flags & HW_PROFILE_CMISS) { setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); open_perf_fd(ctx->fd_cmiss, ctx->id_cmiss, &pe, "CMISS", -1); parent_fd = ctx->fd_cmiss; - } else if (ctx->conf.capture_bmiss) { + } else if (flags & HW_PROFILE_BMISS) { setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); open_perf_fd(ctx->fd_bmiss, ctx->id_bmiss, &pe, "BMISS", -1); parent_fd = ctx->fd_bmiss; - } else if (ctx->conf.capture_ins) { + } else if (flags & HW_PROFILE_INS) { setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); open_perf_fd(ctx->fd_ins, ctx->id_ins, &pe, "INS", -1); parent_fd = ctx->fd_ins; @@ -142,23 +105,23 @@ static void HW_start(HW_ctx* ctx) { ctx->parent_fd = parent_fd; - if (ctx->conf.capture_swi && ctx->fd_swi == -1 && parent_fd != -1) { + if (flags & HW_PROFILE_SWI && ctx->fd_swi == -1 && parent_fd != -1) { setup_perf_event(&pe, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); open_perf_fd(ctx->fd_swi, ctx->id_swi, &pe, "SWI", parent_fd); } - if (ctx->conf.capture_cyc && ctx->fd_cyc == -1 && parent_fd != -1) { + if (flags & HW_PROFILE_CYC && ctx->fd_cyc == -1 && parent_fd != -1) { setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); open_perf_fd(ctx->fd_cyc, ctx->id_cyc, &pe, "CYC", parent_fd); } - if (ctx->conf.capture_cmiss && ctx->fd_cmiss == -1 && parent_fd != -1) { + if (flags & HW_PROFILE_CMISS && ctx->fd_cmiss == -1 && parent_fd != -1) { setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); open_perf_fd(ctx->fd_cmiss, ctx->id_cmiss, &pe, "CMISS", parent_fd); } - if (ctx->conf.capture_bmiss && ctx->fd_bmiss == -1 && parent_fd != -1) { + if (flags & HW_PROFILE_BMISS && ctx->fd_bmiss == -1 && parent_fd != -1) { setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); open_perf_fd(ctx->fd_bmiss, ctx->id_bmiss, &pe, "BMISS", parent_fd); } - if (ctx->conf.capture_ins && ctx->fd_ins == -1 && parent_fd != -1) { + if (flags & HW_PROFILE_INS && ctx->fd_ins == -1 && parent_fd != -1) { setup_perf_event(&pe, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); open_perf_fd(ctx->fd_ins, ctx->id_ins, &pe, "INS", parent_fd); } @@ -169,7 +132,7 @@ static void HW_start(HW_ctx* ctx) { } } -static void HW_clean(HW_ctx* ctx) { +void HW_clean(HW_ctx* ctx) { close_perf_fd(ctx->fd_swi); close_perf_fd(ctx->fd_cyc); close_perf_fd(ctx->fd_cmiss); @@ -178,48 +141,54 @@ static void HW_clean(HW_ctx* ctx) { close_perf_fd(ctx->parent_fd); } -static void read_perf_event(HW_ctx* ctx, cputrace_anchor* anchor, uint64_t tid) { - pthread_mutex_lock(&anchor->mutex[tid]); - Arena* arena = anchor->thread_arena[tid]; - if (ctx->parent_fd != -1) { - char buf[256]; - struct read_format* rf = (struct read_format*)buf; - - if (read(ctx->parent_fd, buf, sizeof(buf)) > 0) { - for (uint64_t i = 0; i < rf->nr; i++) { - cputrace_result_type type; - if (rf->values[i].id == ctx->id_swi) type = CPUTRACE_RESULT_SWI; - else if (rf->values[i].id == ctx->id_cyc) type = CPUTRACE_RESULT_CYC; - else if (rf->values[i].id == ctx->id_cmiss) type = CPUTRACE_RESULT_CMISS; - else if (rf->values[i].id == ctx->id_bmiss) type = CPUTRACE_RESULT_BMISS; - else if (rf->values[i].id == ctx->id_ins) type = CPUTRACE_RESULT_INS; - else continue; - auto* r = (cputrace_anchor_result*)arena_alloc(arena, sizeof(cputrace_anchor_result)); - r->type = type; - r->value = rf->values[i].value; +void HW_read(HW_ctx* ctx, sample_t* measure) { + if (ctx->parent_fd == -1) { + return; + } + char buf[256]; + struct read_format* rf = (struct read_format*)buf; + if (read(ctx->parent_fd, buf, sizeof(buf)) > 0) { + for (uint64_t i = 0; i < rf->nr; i++) { + if (rf->values[i].id == ctx->id_swi) { + measure->swi = rf->values[i].value; + } else if (rf->values[i].id == ctx->id_cyc) { + measure->cyc = rf->values[i].value; + } else if (rf->values[i].id == ctx->id_cmiss) { + measure->cmiss = rf->values[i].value; + } else if (rf->values[i].id == ctx->id_bmiss) { + measure->bmiss = rf->values[i].value; + } else if (rf->values[i].id == ctx->id_ins) { + measure->ins = rf->values[i].value; } - ioctl(ctx->parent_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); - } else { - fprintf(stderr, "Failed to read perf events: %s\n", strerror(errno)); } } - pthread_mutex_unlock(&anchor->mutex[tid]); } -static void aggregate_thread_results(cputrace_anchor* anchor, uint64_t tid) { - pthread_mutex_lock(&anchor->mutex[tid]); - auto* arena = anchor->thread_arena[tid]; - auto* arr = (cputrace_anchor_result*)arena->region->start; - size_t count = ((char*)arena->region->current - (char*)arena->region->start) / sizeof(arr[0]); - for (size_t i = 0; i < count; ++i) { - if (arr[i].type == CPUTRACE_RESULT_CALL_COUNT && arr[i].value == 1) { - anchor->call_count += 1; - } else { - anchor->global_sum[arr[i].type] += arr[i].value; +static void read_perf_event(HW_ctx* ctx, cputrace_anchor* anchor) { + pthread_mutex_lock(&anchor->lock); + if (ctx->parent_fd != -1) { + sample_t measure; + HW_read(ctx, &measure); + if (measure.swi) { + anchor->global_results.swi += measure.swi; + } + if (measure.cyc) { + anchor->global_results.cyc += measure.cyc; + } + if (measure.cmiss) { + anchor->global_results.cmiss += measure.cmiss; + } + if (measure.bmiss) { + anchor->global_results.bmiss += measure.bmiss; + } + if (measure.ins) { + anchor->global_results.ins += measure.ins; + } + if (ioctl(ctx->parent_fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) != 0) { + fprintf(stderr, "Failed to reset perf counters: %s\n", strerror(errno)); } } - arena_reset(arena); - pthread_mutex_unlock(&anchor->mutex[tid]); + pthread_mutex_unlock(&anchor->lock); } HW_profile::HW_profile(const char* function, uint64_t index, uint64_t flags) @@ -227,29 +196,18 @@ HW_profile::HW_profile(const char* function, uint64_t index, uint64_t flags) if (index >= CPUTRACE_MAX_ANCHORS || !g_profiler.profiling) return; + uint64_t tid = get_thread_id(); + cputrace_anchor& anchor = g_profiler.anchors[index]; anchor.name = function; anchor.flags = flags; - uint64_t tid = get_thread_id(); - - pthread_mutex_lock(&anchor.mutex[tid]); - auto* a = anchor.thread_arena[tid]; - auto* r = (cputrace_anchor_result*)arena_alloc(a, sizeof(cputrace_anchor_result)); - if (r) { - r->type = CPUTRACE_RESULT_CALL_COUNT; - r->value = 1; - } - pthread_mutex_unlock(&anchor.mutex[tid]); - HW_conf conf = {0}; - if (flags & HW_PROFILE_SWI) conf.capture_swi = true; - if (flags & HW_PROFILE_CYC) conf.capture_cyc = true; - if (flags & HW_PROFILE_CMISS) conf.capture_cmiss = true; - if (flags & HW_PROFILE_BMISS) conf.capture_bmiss = true; - if (flags & HW_PROFILE_INS) conf.capture_ins = true; + pthread_mutex_lock(&anchor.lock); + anchor.global_results.call_count += 1; + pthread_mutex_unlock(&anchor.lock); - HW_init(&ctx, &conf); - HW_start(&ctx); + ctx = HW_ctx_empty; + HW_init(&ctx, flags); pthread_mutex_lock(&g_profiler.global_lock); active_contexts[index][tid] = &ctx; @@ -262,13 +220,22 @@ HW_profile::~HW_profile() { uint64_t tid = get_thread_id(); pthread_mutex_lock(&g_profiler.global_lock); - read_perf_event(&ctx, &g_profiler.anchors[index], tid); - aggregate_thread_results(&g_profiler.anchors[index], tid); + read_perf_event(&ctx, &g_profiler.anchors[index]); HW_clean(&ctx); active_contexts[index][tid] = nullptr; pthread_mutex_unlock(&g_profiler.global_lock); } +void cputrace_start() { + pthread_mutex_lock(&g_profiler.global_lock); + if (g_profiler.profiling) { + pthread_mutex_unlock(&g_profiler.global_lock); + return; + } + g_profiler.profiling = true; + pthread_mutex_unlock(&g_profiler.global_lock); +} + void cputrace_start(ceph::Formatter* f) { pthread_mutex_lock(&g_profiler.global_lock); if (g_profiler.profiling) { @@ -285,6 +252,16 @@ void cputrace_start(ceph::Formatter* f) { pthread_mutex_unlock(&g_profiler.global_lock); } +void cputrace_stop() { + pthread_mutex_lock(&g_profiler.global_lock); + if (!g_profiler.profiling) { + pthread_mutex_unlock(&g_profiler.global_lock); + return; + } + g_profiler.profiling = false; + pthread_mutex_unlock(&g_profiler.global_lock); +} + void cputrace_stop(ceph::Formatter* f) { pthread_mutex_lock(&g_profiler.global_lock); if (!g_profiler.profiling) { @@ -301,20 +278,24 @@ void cputrace_stop(ceph::Formatter* f) { f->close_section(); } +void cputrace_reset() { + pthread_mutex_lock(&g_profiler.global_lock); + for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) { + if (!g_profiler.anchors[i].name) continue; + pthread_mutex_lock(&g_profiler.anchors[i].lock); + g_profiler.anchors[i].global_results = results{}; + pthread_mutex_unlock(&g_profiler.anchors[i].lock); + } + pthread_mutex_unlock(&g_profiler.global_lock); +} + void cputrace_reset(ceph::Formatter* f) { pthread_mutex_lock(&g_profiler.global_lock); for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) { if (!g_profiler.anchors[i].name) continue; - for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) { - pthread_mutex_lock(&g_profiler.anchors[i].mutex[j]); - arena_reset(g_profiler.anchors[i].thread_arena[j]); - active_contexts[i][j] = nullptr; - pthread_mutex_unlock(&g_profiler.anchors[i].mutex[j]); - } - g_profiler.anchors[i].call_count = 0; - for (int t = 0; t < CPUTRACE_RESULT_COUNT; ++t) { - g_profiler.anchors[i].global_sum[t] = 0; - } + pthread_mutex_lock(&g_profiler.anchors[i].lock); + g_profiler.anchors[i].global_results = results{}; + pthread_mutex_unlock(&g_profiler.anchors[i].lock); } f->open_object_section("cputrace_reset"); f->dump_format("status", "Counters reset"); @@ -335,38 +316,37 @@ void cputrace_dump(ceph::Formatter* f, const std::string& logger, const std::str for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) { if (active_contexts[i][j]) { - read_perf_event(active_contexts[i][j], &g_profiler.anchors[i], j); + read_perf_event(active_contexts[i][j], &g_profiler.anchors[i]); } - aggregate_thread_results(&g_profiler.anchors[i], j); } f->open_object_section(anchor.name); - f->dump_unsigned("call_count", anchor.call_count); + f->dump_unsigned("call_count", anchor.global_results.call_count); if (anchor.flags & HW_PROFILE_SWI && (counter.empty() || counter == "context_switches")) { - f->dump_unsigned("context_switches", anchor.global_sum[CPUTRACE_RESULT_SWI]); - if (anchor.call_count) - f->dump_float("avg_context_switches", (double)anchor.global_sum[CPUTRACE_RESULT_SWI] / anchor.call_count); + f->dump_unsigned("context_switches", anchor.global_results.swi); + if (anchor.global_results.call_count) + f->dump_float("avg_context_switches", (double)anchor.global_results.swi / anchor.global_results.call_count); } if (anchor.flags & HW_PROFILE_CYC && (counter.empty() || counter == "cpu_cycles")) { - f->dump_unsigned("cpu_cycles", anchor.global_sum[CPUTRACE_RESULT_CYC]); - if (anchor.call_count) - f->dump_float("avg_cpu_cycles", (double)anchor.global_sum[CPUTRACE_RESULT_CYC] / anchor.call_count); + f->dump_unsigned("cpu_cycles", anchor.global_results.cyc); + if (anchor.global_results.call_count) + f->dump_float("avg_cpu_cycles", (double)anchor.global_results.cyc / anchor.global_results.call_count); } if (anchor.flags & HW_PROFILE_CMISS && (counter.empty() || counter == "cache_misses")) { - f->dump_unsigned("cache_misses", anchor.global_sum[CPUTRACE_RESULT_CMISS]); - if (anchor.call_count) - f->dump_float("avg_cache_misses", (double)anchor.global_sum[CPUTRACE_RESULT_CMISS] / anchor.call_count); + f->dump_unsigned("cache_misses", anchor.global_results.cmiss); + if (anchor.global_results.call_count) + f->dump_float("avg_cache_misses", (double)anchor.global_results.cmiss / anchor.global_results.call_count); } if (anchor.flags & HW_PROFILE_BMISS && (counter.empty() || counter == "branch_misses")) { - f->dump_unsigned("branch_misses", anchor.global_sum[CPUTRACE_RESULT_BMISS]); - if (anchor.call_count) - f->dump_float("avg_branch_misses", (double)anchor.global_sum[CPUTRACE_RESULT_BMISS] / anchor.call_count); + f->dump_unsigned("branch_misses", anchor.global_results.bmiss); + if (anchor.global_results.call_count) + f->dump_float("avg_branch_misses", (double)anchor.global_results.bmiss / anchor.global_results.call_count); } if (anchor.flags & HW_PROFILE_INS && (counter.empty() || counter == "instructions")) { - f->dump_unsigned("instructions", anchor.global_sum[CPUTRACE_RESULT_INS]); - if (anchor.call_count) - f->dump_float("avg_instructions", (double)anchor.global_sum[CPUTRACE_RESULT_INS] / anchor.call_count); + f->dump_unsigned("instructions", anchor.global_results.ins); + if (anchor.global_results.call_count) + f->dump_float("avg_instructions", (double)anchor.global_results.ins / anchor.global_results.call_count); } f->close_section(); dumped = true; @@ -390,46 +370,45 @@ void cputrace_print_to_stringstream(std::stringstream& ss) { for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) { if (active_contexts[i][j]) { - read_perf_event(active_contexts[i][j], &g_profiler.anchors[i], j); + read_perf_event(active_contexts[i][j], &g_profiler.anchors[i]); } - aggregate_thread_results(&g_profiler.anchors[i], j); } ss << " " << anchor.name << ":\n"; - ss << " call_count: " << anchor.call_count << "\n"; + ss << " call_count: " << anchor.global_results.call_count << "\n"; if (anchor.flags & HW_PROFILE_SWI) { - ss << " context_switches: " << anchor.global_sum[CPUTRACE_RESULT_SWI]; - if (anchor.call_count) { - ss << "\n avg_context_switches: " << (double)anchor.global_sum[CPUTRACE_RESULT_SWI] / anchor.call_count; + ss << " context_switches: " << anchor.global_results.swi; + if (anchor.global_results.call_count) { + ss << "\n avg_context_switches: " << (double)anchor.global_results.swi / anchor.global_results.call_count; } ss << "\n"; } if (anchor.flags & HW_PROFILE_CYC) { - ss << " cpu_cycles: " << anchor.global_sum[CPUTRACE_RESULT_CYC]; - if (anchor.call_count) { - ss << "\n avg_cpu_cycles: " << (double)anchor.global_sum[CPUTRACE_RESULT_CYC] / anchor.call_count; + ss << " cpu_cycles: " << anchor.global_results.cyc; + if (anchor.global_results.call_count) { + ss << "\n avg_cpu_cycles: " << (double)anchor.global_results.cyc / anchor.global_results.call_count; } ss << "\n"; } if (anchor.flags & HW_PROFILE_CMISS) { - ss << " cache_misses: " << anchor.global_sum[CPUTRACE_RESULT_CMISS]; - if (anchor.call_count) { - ss << "\n avg_cache_misses: " << (double)anchor.global_sum[CPUTRACE_RESULT_CMISS] / anchor.call_count; + ss << " cache_misses: " << anchor.global_results.cmiss; + if (anchor.global_results.call_count) { + ss << "\n avg_cache_misses: " << (double)anchor.global_results.cmiss / anchor.global_results.call_count; } ss << "\n"; } if (anchor.flags & HW_PROFILE_BMISS) { - ss << " branch_misses: " << anchor.global_sum[CPUTRACE_RESULT_BMISS]; - if (anchor.call_count) { - ss << "\n avg_branch_misses: " << (double)anchor.global_sum[CPUTRACE_RESULT_BMISS] / anchor.call_count; + ss << " branch_misses: " << anchor.global_results.bmiss; + if (anchor.global_results.call_count) { + ss << "\n avg_branch_misses: " << (double)anchor.global_results.bmiss / anchor.global_results.call_count; } ss << "\n"; } if (anchor.flags & HW_PROFILE_INS) { - ss << " instructions: " << anchor.global_sum[CPUTRACE_RESULT_INS]; - if (anchor.call_count) { - ss << "\n avg_instructions: " << (double)anchor.global_sum[CPUTRACE_RESULT_INS] / anchor.call_count; + ss << " instructions: " << anchor.global_results.ins; + if (anchor.global_results.call_count) { + ss << "\n avg_instructions: " << (double)anchor.global_results.ins / anchor.global_results.call_count; } ss << "\n"; } @@ -447,29 +426,26 @@ __attribute__((constructor)) static void cputrace_init() { exit(1); } for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) { - for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) { - if (pthread_mutex_init(&g_profiler.anchors[i].mutex[j], nullptr) != 0) { - fprintf(stderr, "Failed to initialize mutex for anchor %d, thread %d: %s\n", i, j, strerror(errno)); - } - g_profiler.anchors[i].thread_arena[j] = arena_create(4 * 1024 * 1024); + if (pthread_mutex_init(&g_profiler.anchors[i].lock, nullptr) != 0) { + fprintf(stderr, "Failed to initialize mutex for anchor %d: %s\n", i, strerror(errno)); + exit(1); } } if (pthread_mutex_init(&g_profiler.global_lock, nullptr) != 0) { fprintf(stderr, "Failed to initialize global mutex: %s\n", strerror(errno)); + exit(1); } } __attribute__((destructor)) static void cputrace_fini() { for (int i = 0; i < CPUTRACE_MAX_ANCHORS; ++i) { - for (int j = 0; j < CPUTRACE_MAX_THREADS; ++j) { - if (pthread_mutex_destroy(&g_profiler.anchors[i].mutex[j]) != 0) { - fprintf(stderr, "Failed to destroy mutex for anchor %d, thread %d: %s\n", i, j, strerror(errno)); - } - arena_destroy(g_profiler.anchors[i].thread_arena[j]); + if (pthread_mutex_destroy(&g_profiler.anchors[i].lock) != 0) { + fprintf(stderr, "Failed to destroy mutex for anchor %d: %s\n", i, strerror(errno)); } } if (pthread_mutex_destroy(&g_profiler.global_lock) != 0) { fprintf(stderr, "Failed to destroy global mutex: %s\n", strerror(errno)); } free(g_profiler.anchors); + g_profiler.anchors = nullptr; } \ No newline at end of file diff --git a/src/common/cputrace.h b/src/common/cputrace.h index 39db7948dc5..10d7a771cfd 100644 --- a/src/common/cputrace.h +++ b/src/common/cputrace.h @@ -4,51 +4,32 @@ #include #include "common/Formatter.h" -#define CPUTRACE_MAX_ANCHORS 128 +#define CPUTRACE_MAX_ANCHORS 10 #define CPUTRACE_MAX_THREADS 64 -enum cputrace_result_type { - CPUTRACE_RESULT_SWI = 0, - CPUTRACE_RESULT_CYC, - CPUTRACE_RESULT_CMISS, - CPUTRACE_RESULT_BMISS, - CPUTRACE_RESULT_INS, - CPUTRACE_RESULT_CALL_COUNT, - CPUTRACE_RESULT_COUNT -}; - enum cputrace_flags { - HW_PROFILE_SWI = (1ULL << CPUTRACE_RESULT_SWI), - HW_PROFILE_CYC = (1ULL << CPUTRACE_RESULT_CYC), - HW_PROFILE_CMISS = (1ULL << CPUTRACE_RESULT_CMISS), - HW_PROFILE_BMISS = (1ULL << CPUTRACE_RESULT_BMISS), - HW_PROFILE_INS = (1ULL << CPUTRACE_RESULT_INS), + HW_PROFILE_SWI = (1ULL << 0), + HW_PROFILE_CYC = (1ULL << 1), + HW_PROFILE_CMISS = (1ULL << 2), + HW_PROFILE_BMISS = (1ULL << 3), + HW_PROFILE_INS = (1ULL << 4), }; #define HWProfileFunctionF(var, name, flags) HW_profile var(name, __COUNTER__ + 1, flags) -struct cputrace_anchor_result { - cputrace_result_type type; - uint64_t value; -}; - -struct ArenaRegion { - void* start; - void* end; - void* current; - ArenaRegion* next; -}; - -struct Arena { - ArenaRegion* region; +struct results { + uint64_t call_count; + uint64_t swi; + uint64_t cyc; + uint64_t cmiss; + uint64_t bmiss; + uint64_t ins; }; struct cputrace_anchor { const char* name; - pthread_mutex_t mutex[CPUTRACE_MAX_THREADS]; - Arena* thread_arena[CPUTRACE_MAX_THREADS]; - uint64_t global_sum[CPUTRACE_RESULT_COUNT]; - uint64_t call_count; + pthread_mutex_t lock; + results global_results; uint64_t flags; }; @@ -58,14 +39,6 @@ struct cputrace_profiler { pthread_mutex_t global_lock; }; -struct HW_conf { - bool capture_swi; - bool capture_cyc; - bool capture_cmiss; - bool capture_bmiss; - bool capture_ins; -}; - struct HW_ctx { int parent_fd; int fd_swi; @@ -78,9 +51,22 @@ struct HW_ctx { uint64_t id_cmiss; uint64_t id_bmiss; uint64_t id_ins; - struct HW_conf conf; }; +constexpr HW_ctx HW_ctx_empty = { + -1, -1, -1, -1, -1, -1, + 0, 0, 0, 0, 0 +}; + +struct sample_t { + uint64_t swi = 0; + uint64_t cyc = 0; + uint64_t cmiss = 0; + uint64_t bmiss = 0; + uint64_t ins = 0; +}; + + class HW_profile { public: HW_profile(const char* function, uint64_t index, uint64_t flags); @@ -93,10 +79,15 @@ private: struct HW_ctx ctx; }; +void HW_init(HW_ctx* ctx, uint64_t flags); +void HW_read(HW_ctx* ctx, sample_t* mesaure); +void HW_clean(HW_ctx* ctx); + +void cputrace_start(); +void cputrace_stop(); +void cputrace_reset(); void cputrace_start(ceph::Formatter* f); void cputrace_stop(ceph::Formatter* f); void cputrace_reset(ceph::Formatter* f); void cputrace_dump(ceph::Formatter* f, const std::string& logger = "", const std::string& counter = ""); -void cputrace_print_to_stringstream(std::stringstream& ss); -void cputrace_flush_thread_start(); -void cputrace_flush_thread_stop(); \ No newline at end of file +void cputrace_print_to_stringstream(std::stringstream& ss); \ No newline at end of file