]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd: decouple snap trim initiation from scrub completion 68948/head
authorRonen Friedman <rfriedma@redhat.com>
Sat, 16 May 2026 14:39:51 +0000 (14:39 +0000)
committerRonen Friedman <rfriedma@redhat.com>
Wed, 20 May 2026 04:53:43 +0000 (04:53 +0000)
Add SnapTrimInitiate operation so kick_snap_trim() no longer calls
on_active_actmap() inline during scrub completion, which nested
conflicting with_interruption contexts and hit an assertion.

Fixes: https://tracker.ceph.com/issues/76550
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/crimson/osd/osd_operation.h
src/crimson/osd/osd_operations/snaptrim_event.cc
src/crimson/osd/osd_operations/snaptrim_event.h
src/crimson/osd/pg.cc
src/crimson/osd/pg.h

index 8ceaddd2c515fe526d8a92655041c37446810c3a..000cf4057efb20a9dc800b1c6f3bc6b541976fdf 100644 (file)
@@ -107,6 +107,7 @@ enum class OperationTypeCode {
   scrub_scan,
   pgpct_request,
   ecrep_request,
+  snap_trim_initiate,
   last_op
 };
 
@@ -133,6 +134,7 @@ static constexpr const char* const OP_NAMES[] = {
   "scrub_scan",
   "pgpct_request",
   "ecrep_request",
+  "snap_trim_initiate",
 };
 
 // prevent the addition of OperationTypeCode-s with no matching OP_NAMES entry:
index 650f37c299a399b5b6270540c16f89996f825805..c222f506a8f84053f2660af3d0f7dffeacc45d36 100644 (file)
@@ -28,6 +28,13 @@ namespace crimson {
       return {};
     }
   };
+
+  template <>
+  struct EventBackendRegistry<osd::SnapTrimInitiate> {
+    static std::tuple<> get_backends() {
+      return {};
+    }
+  };
 }
 
 namespace crimson::osd {
@@ -466,4 +473,23 @@ void SnapTrimObjSubEvent::dump_detail(Formatter *f) const
   f->close_section();
 }
 
+void SnapTrimInitiate::print(std::ostream &lhs) const
+{
+  fmt::print(lhs, "SnapTrimInitiate(pgid={})", pg->get_pgid());
+}
+
+void SnapTrimInitiate::dump_detail(Formatter *f) const
+{
+  Formatter::ObjectSection section(*f, "SnapTrimInitiate");
+  f->dump_stream("pgid") << pg->get_pgid();
+}
+
+seastar::future<> SnapTrimInitiate::start()
+{
+  logger().debug("{}: start", *this);
+  pg->initiate_snap_trim();
+  logger().debug("{}: complete", *this);
+  co_return;
+}
+
 } // namespace crimson::osd
index f306030c7a63cc34737b7ad4a6a7610353c6f1da..cca27ff5be9d5bef4a7cffbff174cd9129c9aa64 100644 (file)
@@ -6,6 +6,7 @@
 #include <iostream>
 #include <seastar/core/future.hh>
 
+#include "common/fmt_common.h"
 #include "crimson/osd/object_context_loader.h"
 #include "crimson/osd/osdmap_gate.h"
 #include "crimson/osd/osd_operation.h"
@@ -173,6 +174,36 @@ public:
   > tracking_events;
 };
 
+class SnapTrimInitiate final : public PhasedOperationT<SnapTrimInitiate> {
+public:
+  static constexpr OperationTypeCode type =
+    OperationTypeCode::snap_trim_initiate;
+
+  SnapTrimInitiate(Ref<PG> pg) : pg(std::move(pg)) {}
+
+  void print(std::ostream &) const final;
+  void dump_detail(ceph::Formatter* f) const final;
+  seastar::future<> start();
+
+  template <typename FormatContext>
+  auto fmt_print_ctx(FormatContext& ctx) const {
+    return fmt::format_to(ctx.out(), "SnapTrimInitiate(pgid={})",
+                          pg->get_pgid());
+  }
+
+private:
+  Ref<PG> pg;
+  PipelineHandle handle;
+
+public:
+  PipelineHandle& get_handle() { return handle; }
+
+  std::tuple<
+    StartEvent,
+    CompletionEvent
+  > tracking_events;
+};
+
 } // namespace crimson::osd
 
 #if FMT_VERSION >= 90000
index 3ae9e837c5c309339340a8d2ed94c87983c627b4..18001b857baad35fed7e408795620e9bd38bf0ed 100644 (file)
@@ -626,6 +626,12 @@ PG::interruptible_future<seastar::stop_iteration> PG::trim_snap(
 }
 
 void PG::on_active_actmap()
+{
+  logger().debug("{}: {}", *this, __func__);
+  initiate_snap_trim();
+}
+
+void PG::initiate_snap_trim()
 {
   logger().debug("{}: {} snap_trimq={}", *this, __func__, snap_trimq);
   peering_state.state_clear(PG_STATE_SNAPTRIM_ERROR);
@@ -641,7 +647,9 @@ void PG::on_active_actmap()
       logger().info("{}: {} scrubbing, deferring snap trim", *this, __func__);
       return;
     }
-    // loops until snap_trimq is empty or SNAPTRIM_ERROR.
+    if (snap_trimq.empty()) {
+      return;
+    }
     Ref<PG> pg_ref = this;
     std::ignore = interruptor::with_interruption([this] {
       return interruptor::repeat(
@@ -658,7 +666,7 @@ void PG::on_active_actmap()
           return trim_snap(to_trim, needs_pause);
         }
       ).then_interruptible([this] {
-        logger().debug("{}: PG::on_active_actmap() finished trimming",
+        logger().debug("{}: PG::initiate_snap_trim() finished trimming",
                        *this);
         peering_state.state_clear(PG_STATE_SNAPTRIM);
         peering_state.state_clear(PG_STATE_SNAPTRIM_ERROR);
@@ -681,7 +689,7 @@ void PG::kick_snap_trim()
       && !snap_trimq.empty()
       && !peering_state.state_test(PG_STATE_SNAPTRIM)) {
     logger().info("{}: scrub complete, retriggering snap trim", *this);
-    on_active_actmap();
+    (void) shard_services.start_operation<SnapTrimInitiate>(this);
   }
 }
 
index bb7c20dfd99f62b51da16a6f3302e094f9678f7a..28f01b21ff46b0fcbef68b06992e589e1cb136a8 100644 (file)
@@ -912,8 +912,12 @@ private:
     bool needs_pause);
   /// Re-trigger snap trimming after scrub completion. Snap trimming is
   /// deferred while the PG is scrubbing; call this from notify_scrub_end()
-  /// to resume.
+  /// to resume. Spawns a SnapTrimInitiate operation to avoid nesting
+  /// interrupt conditions.
   void kick_snap_trim();
+  /// Initiate the snap trim loop with all state checks. Called from
+  /// SnapTrimInitiate and on_active_actmap().
+  void initiate_snap_trim();
 
 private:
   PG_OSDMapGate osdmap_gate;
@@ -1156,6 +1160,7 @@ private:
   friend class WatchTimeoutRequest;
   friend class SnapTrimEvent;
   friend class SnapTrimObjSubEvent;
+  friend class SnapTrimInitiate;
   friend ECBackend;
 private: