]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd: defer snap trimming while scrubbing 68735/head
authorRonen Friedman <rfriedma@redhat.com>
Sat, 2 May 2026 15:53:49 +0000 (15:53 +0000)
committerRonen Friedman <rfriedma@redhat.com>
Tue, 5 May 2026 16:41:37 +0000 (16:41 +0000)
Classic OSD enforces mutual exclusion between scrubbing and snap
trimming via the WaitScrub state in the snap trim state machine.
Crimson was missing this, allowing both to run concurrently on the
same PG (visible as active+clean+scrubbing+deep+snaptrim), which
could prevent snap trimming from completing within the expected
timeout.

Defer snap trim initiation while PG_STATE_SCRUBBING is set, and
re-trigger it from notify_scrub_end() via kick_snap_trim().

This is a temporary fix until the full scrub scheduling code,
including is_scrub_queued_or_active(), is merged.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/crimson/osd/pg.cc
src/crimson/osd/pg.h
src/crimson/osd/scrub/pg_scrubber.cc

index f1b890eb2bea57400a3fbfecb5136831787f349b..bf3ea70d157b27dea6fb5ddc07868986bc83bd00 100644 (file)
@@ -623,6 +623,13 @@ void PG::on_active_actmap()
       logger().debug("{}: {} already trimming.", *this, __func__);
       return;
     }
+    // Temporary: defer snap trimming while scrubbing, until the full scrub
+    // scheduling code (including is_scrub_queued_or_active()) is merged.
+    // (mark https://tracker.ceph.com/issues/76428 as solved once fixed).
+    if (peering_state.state_test(PG_STATE_SCRUBBING)) {
+      logger().info("{}: {} scrubbing, deferring snap trim", *this, __func__);
+      return;
+    }
     // loops until snap_trimq is empty or SNAPTRIM_ERROR.
     Ref<PG> pg_ref = this;
     std::ignore = interruptor::with_interruption([this] {
@@ -657,6 +664,16 @@ void PG::on_active_actmap()
   }
 }
 
+void PG::kick_snap_trim()
+{
+  if (peering_state.is_active() && peering_state.is_clean()
+      && !snap_trimq.empty()
+      && !peering_state.state_test(PG_STATE_SNAPTRIM)) {
+    logger().info("{}: scrub complete, retriggering snap trim", *this);
+    on_active_actmap();
+  }
+}
+
 void PG::on_active_advmap(const OSDMapRef &osdmap)
 {
   const auto new_removed_snaps = osdmap->get_new_removed_snaps();
index b68fa886ec1620a6c0e6f58e1c421b89871c2e93..eafcd5d45f2e569a2b4431b8dd3753eaccf9d94e 100644 (file)
@@ -907,6 +907,10 @@ private:
   interruptible_future<seastar::stop_iteration> trim_snap(
     snapid_t to_trim,
     bool needs_pause);
+  /// Re-trigger snap trimming after scrub completion. Snap trimming is
+  /// deferred while the PG is scrubbing; call this from notify_scrub_end()
+  /// to resume.
+  void kick_snap_trim();
 
 private:
   PG_OSDMapGate osdmap_gate;
index 0a6b7a5a827db510b577615fe9d279648a2fa5b2..4945918240a716492f80d7c34bb9824e2eede0ad 100644 (file)
@@ -134,6 +134,7 @@ void PGScrubber::notify_scrub_end(bool deep)
     pg.peering_state.state_clear(PG_STATE_DEEP_SCRUB);
   }
   pg.publish_stats_to_osd();
+  pg.kick_snap_trim();
 }
 
 const std::set<pg_shard_t> &PGScrubber::get_ids_to_scrub() const