From 5e689b56a197ed0ff4911586246aba7d54859ffc Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Wed, 29 Apr 2026 03:45:34 +0000 Subject: [PATCH] osd: collect total snap-trim queueus length Periodically collect the total snap-trim queue length across all PGs. Expose it through OSDService::get_snap_trim_queue_total(). Signed-off-by: Ronen Friedman --- src/osd/OSD.cc | 22 ++++++++++++++++++++++ src/osd/OSD.h | 13 +++++++++++++ src/osd/scrubber/osd_scrub.cc | 5 +++++ src/osd/scrubber/osd_scrub_sched.h | 2 ++ 4 files changed, 42 insertions(+) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index b281be2f3796..6e5e7b7b335d 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -6469,6 +6469,12 @@ void OSD::tick_without_osd_lock() } if (is_active()) { + constexpr uint_fast16_t snap_trim_scan_interval = 5; + if (++trim_queue_length_countdown >= snap_trim_scan_interval) { + trim_queue_length_countdown = 0; + service.snap_trim_queue_total = + service.calc_snap_trim_queue_total(); + } service.get_scrub_services().initiate_scrub(service.is_recovery_active()); service.promote_throttle_recalibrate(); resume_creating_pg(); @@ -7890,6 +7896,22 @@ std::optional OSDService::get_locked_pg(spg_t pgid) } +uint64_t OSDService::calc_snap_trim_queue_total() +{ + std::vector pgids; + osd->_get_pgids(&pgids); + uint64_t total = 0; + for (auto& pgid : pgids) { + if (auto locked_pg = get_locked_pg(pgid)) { + const auto& pg = locked_pg->pg(); + if (pg->is_primary()) { + total += pg->get_snap_trimq_size(); + } + } + } + return total; +} + MPGStats* OSD::collect_pg_stats() { dout(15) << __func__ << dendl; diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 3b3e7092650d..8bc7a7f48e05 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -152,6 +152,18 @@ public: } int get_nodeid() const final { return whoami; } + + /// iterate over all PGs, summing their snap trim queue lengths + uint64_t calc_snap_trim_queue_total(); + uint64_t get_snap_trim_queue_total() const { + // the cached value, calculated by calc_snap_trim_queue_total() + return snap_trim_queue_total; + } + + // not atomic: both write (tick_without_osd_lock) and read (initiate_scrub, + // called from the same timer callback) are on the same thread + uint64_t snap_trim_queue_total{0}; + private: OSDMapRef osdmap; @@ -1240,6 +1252,7 @@ class OSD : public Dispatcher, // Tick timer for those stuff that do not need osd_lock ceph::mutex tick_timer_lock = ceph::make_mutex("OSD::tick_timer_lock"); SafeTimer tick_timer_without_osd_lock; + uint_fast16_t trim_queue_length_countdown = 0; std::string gss_ktfile_client{}; public: diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc index 11e9756af20a..f8e2e40a31ea 100644 --- a/src/osd/scrubber/osd_scrub.cc +++ b/src/osd/scrubber/osd_scrub.cc @@ -107,6 +107,11 @@ void OsdScrub::initiate_scrub(bool is_recovery_active) << dendl; } + { + auto stqt = m_osd_svc.get_snap_trim_queue_total(); + dout(20) << fmt::format("snap_trim_queue_total: {}", stqt) << dendl; + } + const utime_t scrub_time = ceph_clock_now(); // check the OSD-wide environment conditions (scrub resources, time, etc.). diff --git a/src/osd/scrubber/osd_scrub_sched.h b/src/osd/scrubber/osd_scrub_sched.h index 929bdc17f863..92140ba52197 100644 --- a/src/osd/scrubber/osd_scrub_sched.h +++ b/src/osd/scrubber/osd_scrub_sched.h @@ -140,6 +140,8 @@ class ScrubSchedListener { */ virtual AsyncReserver& get_scrub_reserver() = 0; + virtual uint64_t get_snap_trim_queue_total() const = 0; + virtual ~ScrubSchedListener() {} }; -- 2.47.3