From: Ronen Friedman Date: Sat, 16 May 2026 14:39:51 +0000 (+0000) Subject: crimson/osd: decouple snap trim initiation from scrub completion X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=68896302163074a56e4d0f5e0de588fc87770b80;p=ceph.git crimson/osd: decouple snap trim initiation from scrub completion Add SnapTrimInitiate operation so kick_snap_trim() no longer calls on_active_actmap() inline during scrub completion, which nested conflicting with_interruption contexts and hit an assertion. Fixes: https://tracker.ceph.com/issues/76550 Signed-off-by: Ronen Friedman --- diff --git a/src/crimson/osd/osd_operation.h b/src/crimson/osd/osd_operation.h index 8ceaddd2c515..000cf4057efb 100644 --- a/src/crimson/osd/osd_operation.h +++ b/src/crimson/osd/osd_operation.h @@ -107,6 +107,7 @@ enum class OperationTypeCode { scrub_scan, pgpct_request, ecrep_request, + snap_trim_initiate, last_op }; @@ -133,6 +134,7 @@ static constexpr const char* const OP_NAMES[] = { "scrub_scan", "pgpct_request", "ecrep_request", + "snap_trim_initiate", }; // prevent the addition of OperationTypeCode-s with no matching OP_NAMES entry: diff --git a/src/crimson/osd/osd_operations/snaptrim_event.cc b/src/crimson/osd/osd_operations/snaptrim_event.cc index 650f37c299a3..c222f506a8f8 100644 --- a/src/crimson/osd/osd_operations/snaptrim_event.cc +++ b/src/crimson/osd/osd_operations/snaptrim_event.cc @@ -28,6 +28,13 @@ namespace crimson { return {}; } }; + + template <> + struct EventBackendRegistry { + static std::tuple<> get_backends() { + return {}; + } + }; } namespace crimson::osd { @@ -466,4 +473,23 @@ void SnapTrimObjSubEvent::dump_detail(Formatter *f) const f->close_section(); } +void SnapTrimInitiate::print(std::ostream &lhs) const +{ + fmt::print(lhs, "SnapTrimInitiate(pgid={})", pg->get_pgid()); +} + +void SnapTrimInitiate::dump_detail(Formatter *f) const +{ + Formatter::ObjectSection section(*f, "SnapTrimInitiate"); + f->dump_stream("pgid") << pg->get_pgid(); +} + +seastar::future<> SnapTrimInitiate::start() +{ + logger().debug("{}: start", *this); + pg->initiate_snap_trim(); + logger().debug("{}: complete", *this); + co_return; +} + } // namespace crimson::osd diff --git a/src/crimson/osd/osd_operations/snaptrim_event.h b/src/crimson/osd/osd_operations/snaptrim_event.h index f306030c7a63..cca27ff5be9d 100644 --- a/src/crimson/osd/osd_operations/snaptrim_event.h +++ b/src/crimson/osd/osd_operations/snaptrim_event.h @@ -6,6 +6,7 @@ #include #include +#include "common/fmt_common.h" #include "crimson/osd/object_context_loader.h" #include "crimson/osd/osdmap_gate.h" #include "crimson/osd/osd_operation.h" @@ -173,6 +174,36 @@ public: > tracking_events; }; +class SnapTrimInitiate final : public PhasedOperationT { +public: + static constexpr OperationTypeCode type = + OperationTypeCode::snap_trim_initiate; + + SnapTrimInitiate(Ref pg) : pg(std::move(pg)) {} + + void print(std::ostream &) const final; + void dump_detail(ceph::Formatter* f) const final; + seastar::future<> start(); + + template + auto fmt_print_ctx(FormatContext& ctx) const { + return fmt::format_to(ctx.out(), "SnapTrimInitiate(pgid={})", + pg->get_pgid()); + } + +private: + Ref pg; + PipelineHandle handle; + +public: + PipelineHandle& get_handle() { return handle; } + + std::tuple< + StartEvent, + CompletionEvent + > tracking_events; +}; + } // namespace crimson::osd #if FMT_VERSION >= 90000 diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc index 3ae9e837c5c3..18001b857baa 100644 --- a/src/crimson/osd/pg.cc +++ b/src/crimson/osd/pg.cc @@ -626,6 +626,12 @@ PG::interruptible_future PG::trim_snap( } void PG::on_active_actmap() +{ + logger().debug("{}: {}", *this, __func__); + initiate_snap_trim(); +} + +void PG::initiate_snap_trim() { logger().debug("{}: {} snap_trimq={}", *this, __func__, snap_trimq); peering_state.state_clear(PG_STATE_SNAPTRIM_ERROR); @@ -641,7 +647,9 @@ void PG::on_active_actmap() logger().info("{}: {} scrubbing, deferring snap trim", *this, __func__); return; } - // loops until snap_trimq is empty or SNAPTRIM_ERROR. + if (snap_trimq.empty()) { + return; + } Ref pg_ref = this; std::ignore = interruptor::with_interruption([this] { return interruptor::repeat( @@ -658,7 +666,7 @@ void PG::on_active_actmap() return trim_snap(to_trim, needs_pause); } ).then_interruptible([this] { - logger().debug("{}: PG::on_active_actmap() finished trimming", + logger().debug("{}: PG::initiate_snap_trim() finished trimming", *this); peering_state.state_clear(PG_STATE_SNAPTRIM); peering_state.state_clear(PG_STATE_SNAPTRIM_ERROR); @@ -681,7 +689,7 @@ void PG::kick_snap_trim() && !snap_trimq.empty() && !peering_state.state_test(PG_STATE_SNAPTRIM)) { logger().info("{}: scrub complete, retriggering snap trim", *this); - on_active_actmap(); + (void) shard_services.start_operation(this); } } diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index bb7c20dfd99f..28f01b21ff46 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -912,8 +912,12 @@ private: bool needs_pause); /// Re-trigger snap trimming after scrub completion. Snap trimming is /// deferred while the PG is scrubbing; call this from notify_scrub_end() - /// to resume. + /// to resume. Spawns a SnapTrimInitiate operation to avoid nesting + /// interrupt conditions. void kick_snap_trim(); + /// Initiate the snap trim loop with all state checks. Called from + /// SnapTrimInitiate and on_active_actmap(). + void initiate_snap_trim(); private: PG_OSDMapGate osdmap_gate; @@ -1156,6 +1160,7 @@ private: friend class WatchTimeoutRequest; friend class SnapTrimEvent; friend class SnapTrimObjSubEvent; + friend class SnapTrimInitiate; friend ECBackend; private: