From b6d39ff0a9d1e471c9091e0fc5d625e0c0d4b099 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Sat, 2 May 2026 15:53:49 +0000 Subject: [PATCH] crimson/osd: defer snap trimming while scrubbing Classic OSD enforces mutual exclusion between scrubbing and snap trimming via the WaitScrub state in the snap trim state machine. Crimson was missing this, allowing both to run concurrently on the same PG (visible as active+clean+scrubbing+deep+snaptrim), which could prevent snap trimming from completing within the expected timeout. Defer snap trim initiation while PG_STATE_SCRUBBING is set, and re-trigger it from notify_scrub_end() via kick_snap_trim(). This is a temporary fix until the full scrub scheduling code, including is_scrub_queued_or_active(), is merged. Signed-off-by: Ronen Friedman --- src/crimson/osd/pg.cc | 17 +++++++++++++++++ src/crimson/osd/pg.h | 4 ++++ src/crimson/osd/scrub/pg_scrubber.cc | 1 + 3 files changed, 22 insertions(+) diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index f1b890eb2bea..bf3ea70d157b 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -623,6 +623,13 @@ void PG::on_active_actmap() logger().debug("{}: {} already trimming.", *this, __func__); return; } + // Temporary: defer snap trimming while scrubbing, until the full scrub + // scheduling code (including is_scrub_queued_or_active()) is merged. + // (mark https://tracker.ceph.com/issues/76428 as solved once fixed). + if (peering_state.state_test(PG_STATE_SCRUBBING)) { + logger().info("{}: {} scrubbing, deferring snap trim", *this, __func__); + return; + } // loops until snap_trimq is empty or SNAPTRIM_ERROR. Ref pg_ref = this; std::ignore = interruptor::with_interruption([this] { @@ -657,6 +664,16 @@ void PG::on_active_actmap() } } +void PG::kick_snap_trim() +{ + if (peering_state.is_active() && peering_state.is_clean() + && !snap_trimq.empty() + && !peering_state.state_test(PG_STATE_SNAPTRIM)) { + logger().info("{}: scrub complete, retriggering snap trim", *this); + on_active_actmap(); + } +} + void PG::on_active_advmap(const OSDMapRef &osdmap) { const auto new_removed_snaps = osdmap->get_new_removed_snaps(); diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index b68fa886ec16..eafcd5d45f2e 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -907,6 +907,10 @@ private: interruptible_future trim_snap( snapid_t to_trim, bool needs_pause); + /// Re-trigger snap trimming after scrub completion. Snap trimming is + /// deferred while the PG is scrubbing; call this from notify_scrub_end() + /// to resume. + void kick_snap_trim(); private: PG_OSDMapGate osdmap_gate; diff --git a/src/crimson/osd/scrub/pg_scrubber.cc b/src/crimson/osd/scrub/pg_scrubber.cc index 0a6b7a5a827d..4945918240a7 100644 --- a/src/crimson/osd/scrub/pg_scrubber.cc +++ b/src/crimson/osd/scrub/pg_scrubber.cc @@ -134,6 +134,7 @@ void PGScrubber::notify_scrub_end(bool deep) pg.peering_state.state_clear(PG_STATE_DEEP_SCRUB); } pg.publish_stats_to_osd(); + pg.kick_snap_trim(); } const std::set &PGScrubber::get_ids_to_scrub() const -- 2.47.3