From: Samuel Just Date: Tue, 28 May 2024 16:48:42 +0000 (-0700) Subject: crimson/.../snaptrim_event: remove pipeline stages located on event X-Git-Tag: v20.0.0~1716^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=19084852ced9f6ae30e29629868aac0ef8620022;p=ceph.git crimson/.../snaptrim_event: remove pipeline stages located on event WaitSubop, WaitTrimTimer, and WaitRepop are pipeline stages local to the operation. As such they don't actually provide any ordering guarrantees as only one operation will ever enter them. Rather, the intent is to hook into the event system to expose information to an administrator. This poses a problem for OrderedConcurrentPhase as it is currently implemented. PipelineHandle::exit() is invoked prior to the op being destructed. PipelineHandle::exit() does: void exit() { barrier.reset(); } For OrderedConcurrentPhase, ~ExitBarrier() invokes ExitBarrier::exit(): void exit() final { if (barrier) { assert(phase); assert(phase->core == seastar::this_shard_id()); std::ignore = std::move(*barrier ).then([phase=this->phase] { phase->mutex.unlock(); }); barrier = std::nullopt; phase = nullptr; } else if (phase) { assert(phase->core == seastar::this_shard_id()); phase->mutex.unlock(); phase = nullptr; } } The problem comes in not waiting for the phase->mutex.unlock() to occur. For SnapTrimEvent, phase is actually in the operation itself. It's possible for that continuation ).then([phase=this->phase] { phase->mutex.unlock(); to occur after the last finally() in ShardServices::start_operation completes and releases the final reference to SnapTrimEvent. This is harmless normally provided that the PG or connection outlives it, but it's a problem for these stages. For now, let's just remove these stages. We can reintroduce another mechanism later to set these event flags without an actual pipeline stage. This is likely a bug even with pipelines not embedded in an operation, but we can fix it later -- https://tracker.ceph.com/issues/64545. Fixes: https://tracker.ceph.com/issues/63647 Signed-off-by: Samuel Just --- diff --git a/src/crimson/osd/osd_operations/snaptrim_event.cc b/src/crimson/osd/osd_operations/snaptrim_event.cc index fabf2b0eb7aa..99fe42d9a822 100644 --- a/src/crimson/osd/osd_operations/snaptrim_event.cc +++ b/src/crimson/osd/osd_operations/snaptrim_event.cc @@ -72,6 +72,8 @@ SnapTrimEvent::start() handle.exit(); }); + /* TODO: add a way to expose progress via the optracker without misusing + * pipeline stages. https://tracker.ceph.com/issues/66473 */ ShardServices &shard_services = pg->get_shard_services(); co_await enter_stage(client_pp().wait_for_active); @@ -124,18 +126,11 @@ SnapTrimEvent::start() snapid)); } - co_await enter_stage(wait_subop); - logger().debug("{}: awaiting completion", *this); co_await subop_blocker.interruptible_wait_completion(); } if (needs_pause) { - // let's know operators we're waiting - co_await enter_stage( - wait_trim_timer - ); - using crimson::common::local_conf; const auto time_to_sleep = local_conf().template get_val("osd_snap_trim_sleep"); @@ -452,11 +447,7 @@ SnapTrimObjSubEvent::start() std::move(log_entries)); return submitted.then_interruptible( [all_completed=std::move(all_completed), this] () mutable { - return enter_stage( - wait_repop - ).then_interruptible([all_completed=std::move(all_completed)] () mutable { - return std::move(all_completed); - }); + return std::move(all_completed); }); }); }); diff --git a/src/crimson/osd/osd_operations/snaptrim_event.h b/src/crimson/osd/osd_operations/snaptrim_event.h index 759e000ff27e..6d5df1cca5d3 100644 --- a/src/crimson/osd/osd_operations/snaptrim_event.h +++ b/src/crimson/osd/osd_operations/snaptrim_event.h @@ -61,19 +61,6 @@ private: SubOpBlocker subop_blocker; - // we don't need to synchronize with other instances of SnapTrimEvent; - // it's here for the sake of op tracking. - struct WaitSubop : OrderedConcurrentPhaseT { - static constexpr auto type_name = "SnapTrimEvent::wait_subop"; - } wait_subop; - - // an instantiator can instruct us to go over this stage and then - // wait for the future to implement throttling. It is implemented - // that way to for the sake of tracking ops. - struct WaitTrimTimer : OrderedExclusivePhaseT { - static constexpr auto type_name = "SnapTrimEvent::wait_trim_timer"; - } wait_trim_timer; - Ref pg; PipelineHandle handle; SnapMapper& snap_mapper; @@ -89,9 +76,7 @@ public: PGActivationBlocker::BlockingEvent, CommonPGPipeline::GetOBC::BlockingEvent, CommonPGPipeline::Process::BlockingEvent, - WaitSubop::BlockingEvent, PG::BackgroundProcessLock::Wait::BlockingEvent, - WaitTrimTimer::BlockingEvent, CompletionEvent > tracking_events; @@ -153,12 +138,6 @@ private: remove_or_update_iertr::future remove_or_update(ObjectContextRef obc, ObjectContextRef head_obc); - // we don't need to synchronize with other instances started by - // SnapTrimEvent; it's here for the sake of op tracking. - struct WaitRepop : OrderedConcurrentPhaseT { - static constexpr auto type_name = "SnapTrimObjSubEvent::wait_repop"; - } wait_repop; - void add_log_entry( int _op, const hobject_t& _soid, @@ -195,7 +174,6 @@ public: PGActivationBlocker::BlockingEvent, CommonPGPipeline::GetOBC::BlockingEvent, CommonPGPipeline::Process::BlockingEvent, - WaitRepop::BlockingEvent, CompletionEvent > tracking_events; };