From: Jon Bailey Date: Tue, 10 Feb 2026 15:46:30 +0000 (+0000) Subject: osd: Split counter for stats invalidations so we can more accurately identify the... X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d45d149b0042f2009a2aec9cc010f592a2cd33bd;p=ceph.git osd: Split counter for stats invalidations so we can more accurately identify the causes of frequent failures more granularly. Signed-off-by: Jon Bailey --- diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index d723710003fc..66a14168580d 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -3569,7 +3569,8 @@ void PeeringState::proc_master_log( } info.stats.stats_invalid |= invalidate_stats; - increment_stats_invalidations_counter(invalidate_stats); + increment_stats_invalidations_counter(rs_process_log_stats_invalidated, + invalidate_stats); if (invalidate_stats) { psdout(10) << "invalidating stats for " << pg_whoami << dendl; @@ -3828,8 +3829,10 @@ void PeeringState::split_into( child->info.last_epoch_started = info.last_epoch_started; child->info.last_interval_started = info.last_interval_started; - increment_stats_invalidations_counter(info.stats.stats_invalid); - increment_stats_invalidations_counter(child->info.stats.stats_invalid); + increment_stats_invalidations_counter(rs_pg_split_parent_stats_invalidated, + info.stats.stats_invalid); + increment_stats_invalidations_counter(rs_pg_split_child_stats_invalidated, + child->info.stats.stats_invalid); // There can't be recovery/backfill going on now int primary, up_primary; @@ -4501,9 +4504,10 @@ std::optional PeeringState::prepare_stats_for_publish( } } -void PeeringState::increment_stats_invalidations_counter(bool invalidation_state) { +void PeeringState::increment_stats_invalidations_counter(int stats_invalidation_counter, + bool invalidation_state) { if (invalidation_state) { - pl->get_peering_perf().inc(rs_stats_invalidated); + pl->get_peering_perf().inc(stats_invalidation_counter); } } @@ -4606,7 +4610,8 @@ void PeeringState::update_stats( } if (previous_stats_invalidation != info.stats.stats_invalid) { - increment_stats_invalidations_counter(info.stats.stats_invalid); + increment_stats_invalidations_counter(rs_update_stats_invalidated, + info.stats.stats_invalid); } if (t) { @@ -4655,7 +4660,8 @@ bool PeeringState::append_log_entries_update_missing( info.last_complete = info.last_update; } info.stats.stats_invalid = info.stats.stats_invalid || invalidate_stats; - increment_stats_invalidations_counter(invalidate_stats); + increment_stats_invalidations_counter(rs_append_log_stats_invalidated, + invalidate_stats); psdout(20) << "trim_to bool = " << bool(trim_to) << " trim_to = " << (trim_to ? *trim_to : eversion_t()) << dendl; if (trim_to) { @@ -4709,7 +4715,8 @@ void PeeringState::merge_new_log_entries( dpp); pinfo.last_update = info.last_update; pinfo.stats.stats_invalid = pinfo.stats.stats_invalid || invalidate_stats; - increment_stats_invalidations_counter(invalidate_stats); + increment_stats_invalidations_counter(rs_merge_log_stats_invalidated, + invalidate_stats); rebuild_missing = rebuild_missing || invalidate_stats; } diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index cfc3d9ae96e0..ae426060abab 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -1812,7 +1812,7 @@ private: void update_blocked_by(); void update_calc_stats(); - void increment_stats_invalidations_counter(bool invalidation_state); + void increment_stats_invalidations_counter(int stats_invalidation_counter, bool invalidation_state); void add_log_entry(const pg_log_entry_t& e, ObjectStore::Transaction &t, bool applied); diff --git a/src/osd/osd_perf_counters.cc b/src/osd/osd_perf_counters.cc index 1018add02ae3..51d984317136 100644 --- a/src/osd/osd_perf_counters.cc +++ b/src/osd/osd_perf_counters.cc @@ -536,7 +536,12 @@ PerfCounters *build_recoverystate_perf(CephContext *cct) { rs_perf.add_time_avg(rs_getmissing_latency, "getmissing_latency", "Getmissing recovery state latency"); rs_perf.add_time_avg(rs_waitupthru_latency, "waitupthru_latency", "Waitupthru recovery state latency"); rs_perf.add_time_avg(rs_notrecovering_latency, "notrecovering_latency", "Notrecovering recovery state latency"); - rs_perf.add_u64_counter(rs_stats_invalidated, "stats_invalidated", "Number of times pg stats received invalidations"); + rs_perf.add_u64_counter(rs_process_log_stats_invalidated, "process_log_stats_invalidated", "Number of times pg stats received invalidations during log processing"); + rs_perf.add_u64_counter(rs_pg_split_parent_stats_invalidated, "pg_split_parent_stats_invalidated", "Number of times parent pg stats received invalidations during pg splitting"); + rs_perf.add_u64_counter(rs_pg_split_child_stats_invalidated, "pg_split_child_stats_invalidated", "Number of times child pg stats received invalidations during pg splitting"); + rs_perf.add_u64_counter(rs_update_stats_invalidated, "update_stats_invalidated", "Number of times pg stats received invalidations during stats updates"); + rs_perf.add_u64_counter(rs_append_log_stats_invalidated, "append_log_stats_invalidated", "Number of times pg stats received invalidations when appending new log entries"); + rs_perf.add_u64_counter(rs_merge_log_stats_invalidated, "merge_log_stats_invalidated", "Number of times pg stats received invalidations during merging of log entries"); return rs_perf.create_perf_counters(); } diff --git a/src/osd/osd_perf_counters.h b/src/osd/osd_perf_counters.h index 8ae219b768d5..4b56b79215db 100644 --- a/src/osd/osd_perf_counters.h +++ b/src/osd/osd_perf_counters.h @@ -257,7 +257,12 @@ enum { rs_getmissing_latency, rs_waitupthru_latency, rs_notrecovering_latency, - rs_stats_invalidated, + rs_process_log_stats_invalidated, + rs_pg_split_parent_stats_invalidated, + rs_pg_split_child_stats_invalidated, + rs_update_stats_invalidated, + rs_append_log_stats_invalidated, + rs_merge_log_stats_invalidated, rs_last, };