From 16cf2504e6431cdbadd09bdea82afcf3004b70f6 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Mon, 14 Apr 2025 04:47:24 -0500 Subject: [PATCH] osd/scrub: turning additional counters into unlabeled i.e. - moving counters from the 'scrbcnt_' enums into the ScrubCounterSet object. Signed-off-by: Ronen Friedman --- src/osd/osd_perf_counters.cc | 19 +++++++++++------- src/osd/osd_perf_counters.h | 32 +++++++++++++++---------------- src/osd/scrubber/pg_scrubber.h | 14 ++++++++++++-- src/osd/scrubber/scrub_machine.cc | 2 +- src/osd/scrubber_common.h | 5 +++++ 5 files changed, 46 insertions(+), 26 deletions(-) diff --git a/src/osd/osd_perf_counters.cc b/src/osd/osd_perf_counters.cc index bbab02a0bbcb5..d79cfbee7c40c 100644 --- a/src/osd/osd_perf_counters.cc +++ b/src/osd/osd_perf_counters.cc @@ -376,9 +376,20 @@ PerfCounters *build_osd_logger(CephContext *cct) { // scrub (no EC vs. replicated differentiation) // scrub - replicated pools - osd_plb.add_u64_counter(l_osd_scrub_rppool_active_started, "num_scrubs_past_reservation_replicated", "scrubs count replicated"); + osd_plb.add_u64_counter(l_osd_scrub_rppool_started, "num_scrubs_started_replicated", "replicated scrubs attempted count"); + osd_plb.add_u64_counter(l_osd_scrub_rppool_active_started, "num_scrubs_past_reservation_replicated", "replicated scrubs count"); + osd_plb.add_u64_counter(l_osd_scrub_rppool_successful, "successful_scrubs_replicated", "successful replicated scrubs count"); + osd_plb.add_time_avg(l_osd_scrub_rppool_successful_elapsed, "successful_scrubs_replicated_elapsed", "time to complete a successful replicated scrub"); + osd_plb.add_u64_counter(l_osd_scrub_rppool_failed, "failed_scrubs_replicated", "failed replicated scrubs count"); + osd_plb.add_time_avg(l_osd_scrub_rppool_failed_elapsed, "failed_scrubs_replicated_elapsed", "time to scrub failure replicated"); + // scrub - EC + osd_plb.add_u64_counter(l_osd_scrub_ec_started, "num_scrubs_started_ec", "scrubs attempted count ec"); osd_plb.add_u64_counter(l_osd_scrub_ec_active_started, "num_scrubs_past_reservation_ec", "scrubs count ec"); + osd_plb.add_u64_counter(l_osd_scrub_ec_successful, "successful_scrubs_ec", "successful scrubs count ec"); + osd_plb.add_time_avg(l_osd_scrub_ec_successful_elapsed, "successful_scrubs_ec_elapsed", "time to complete a successful ec scrub"); + osd_plb.add_u64_counter(l_osd_scrub_ec_failed, "failed_scrubs_ec", "failed scrubs count ec"); + osd_plb.add_time_avg(l_osd_scrub_ec_failed_elapsed, "failed_scrubs_ec_elapsed", "time to scrub failure ec"); return osd_plb.create_perf_counters(); } @@ -431,12 +442,6 @@ PerfCounters *build_scrub_labeled_perf(CephContext *cct, std::string label) scrub_perf.set_prio_default(PerfCountersBuilder::PRIO_INTERESTING); - scrub_perf.add_u64_counter(scrbcnt_started, "num_scrubs_started", "scrubs attempted count"); - scrub_perf.add_u64_counter(scrbcnt_failed, "failed_scrubs", "failed scrubs count"); - scrub_perf.add_u64_counter(scrbcnt_successful, "successful_scrubs", "successful scrubs count"); - scrub_perf.add_time_avg(scrbcnt_failed_elapsed, "failed_scrubs_elapsed", "time to scrub failure"); - scrub_perf.add_time_avg(scrbcnt_successful_elapsed, "successful_scrubs_elapsed", "time to scrub completion"); - scrub_perf.add_u64_counter(scrbcnt_preempted, "preemptions", "preemptions on scrubs"); scrub_perf.add_u64_counter(scrbcnt_chunks_selected, "chunk_selected", "chunk selection during scrubs"); scrub_perf.add_u64_counter(scrbcnt_chunks_busy, "chunk_busy", "chunk busy during scrubs"); diff --git a/src/osd/osd_perf_counters.h b/src/osd/osd_perf_counters.h index 93d0ac311a126..dc551c16d480d 100644 --- a/src/osd/osd_perf_counters.h +++ b/src/osd/osd_perf_counters.h @@ -148,22 +148,34 @@ enum osd_counter_idx_t { l_osd_scrub_omapgetheader_bytes, ///< bytes read by omap get header l_osd_scrub_omapget_cnt, ///< omap get calls count l_osd_scrub_omapget_bytes, ///< total bytes read by omap get - // scrub I/O - replicated pools + + // ---- scrub I/O - replicated pools l_osd_scrub_rppool_getattr_cnt, ///< get_attr calls count l_osd_scrub_rppool_stats_cnt, ///< stats calls count l_osd_scrub_rppool_read_cnt, ///< read calls count l_osd_scrub_rppool_read_bytes, ///< total bytes read - // scrub I/O - EC + + // ---- scrub I/O - EC l_osd_scrub_ec_getattr_cnt, ///< get_attr calls count l_osd_scrub_ec_stats_cnt, ///< stats calls count l_osd_scrub_ec_read_cnt, ///< read calls count l_osd_scrub_ec_read_bytes, ///< total bytes read - // scrub (no EC vs. replicated differentiation) - // scrub - replicated pools + // ---- scrub - replicated pools + l_osd_scrub_rppool_started, ///< scrubs that got started l_osd_scrub_rppool_active_started, ///< scrubs that got past replicas reservation + l_osd_scrub_rppool_successful, ///< successful scrubs count + l_osd_scrub_rppool_successful_elapsed, ///< time to complete a successful scrub + l_osd_scrub_rppool_failed, ///< failed scrubs count + l_osd_scrub_rppool_failed_elapsed, ///< time from start to failure + // scrub - EC + l_osd_scrub_ec_started, ///< scrubs that got started l_osd_scrub_ec_active_started, /// scrubs that got past secondaries reservation + l_osd_scrub_ec_successful, ///< successful scrubs count + l_osd_scrub_ec_successful_elapsed, ///< time to complete a successful scrub + l_osd_scrub_ec_failed, ///< failed scrubs count + l_osd_scrub_ec_failed_elapsed, ///< time from start to failure l_osd_last, }; @@ -214,18 +226,6 @@ PerfCounters *build_recoverystate_perf(CephContext *cct); enum { scrbcnt_first = 20500, - // -- basic statistics -- - /// The number of times we started a scrub - scrbcnt_started, - /// # successful scrubs - scrbcnt_successful, - /// time to complete a successful scrub - scrbcnt_successful_elapsed, - /// # failed scrubs - scrbcnt_failed, - /// time for a scrub to fail - scrbcnt_failed_elapsed, - // -- interruptions of various types /// # preemptions scrbcnt_preempted, diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index 5070892631971..6607f0fa179a1 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -144,7 +144,12 @@ static inline constexpr ScrubCounterSet io_counters_replicated{ .omapgetheader_bytes = l_osd_scrub_omapgetheader_bytes, .omapget_cnt = l_osd_scrub_omapget_cnt, .omapget_bytes = l_osd_scrub_omapget_bytes, - .active_started_cnt = l_osd_scrub_rppool_active_started + .started_cnt = l_osd_scrub_rppool_started, + .active_started_cnt = l_osd_scrub_rppool_active_started, + .successful_cnt = l_osd_scrub_rppool_successful, + .successful_elapsed = l_osd_scrub_rppool_successful_elapsed, + .failed_cnt = l_osd_scrub_rppool_failed, + .failed_elapsed = l_osd_scrub_rppool_failed_elapsed }; static inline constexpr ScrubCounterSet io_counters_ec{ @@ -156,7 +161,12 @@ static inline constexpr ScrubCounterSet io_counters_ec{ .omapgetheader_bytes = l_osd_scrub_omapgetheader_bytes, .omapget_cnt = l_osd_scrub_omapget_cnt, .omapget_bytes = l_osd_scrub_omapget_bytes, - .active_started_cnt = l_osd_scrub_ec_active_started + .started_cnt = l_osd_scrub_ec_started, + .active_started_cnt = l_osd_scrub_ec_active_started, + .successful_cnt = l_osd_scrub_ec_successful, + .successful_elapsed = l_osd_scrub_ec_successful_elapsed, + .failed_cnt = l_osd_scrub_ec_failed, + .failed_elapsed = l_osd_scrub_ec_failed_elapsed }; } // namespace Scrub diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc index 439a935665142..72ab17295f2b1 100644 --- a/src/osd/scrubber/scrub_machine.cc +++ b/src/osd/scrubber/scrub_machine.cc @@ -199,7 +199,7 @@ Session::Session(my_context ctx) m_perf_set = scrbr->get_labeled_counters(); m_osd_counters = scrbr->get_osd_perf_counters(); m_counters_idx = &scrbr->get_unlabeled_counters(); - m_perf_set->inc(scrbcnt_started); + m_osd_counters->inc(m_counters_idx->started_cnt); } Session::~Session() diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index ceca766f20573..b46151c3b02a0 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -301,7 +301,12 @@ struct ScrubCounterSet { osd_counter_idx_t omapgetheader_bytes; ///< bytes read by omap get header osd_counter_idx_t omapget_cnt; ///< omap get calls count osd_counter_idx_t omapget_bytes; ///< total bytes read by omap get + osd_counter_idx_t started_cnt; ///< the number of times we started a scrub osd_counter_idx_t active_started_cnt; ///< scrubs that got past reservation + osd_counter_idx_t successful_cnt; ///< successful scrubs count + osd_counter_idx_t successful_elapsed; ///< time to complete a successful scrub + osd_counter_idx_t failed_cnt; ///< failed scrubs count + osd_counter_idx_t failed_elapsed; ///< time from start to failure }; } // namespace Scrub -- 2.39.5