From 2a148f7ee6dbe7d8f60a217c683c8cae58aa9680 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Sun, 6 Aug 2023 15:45:24 +0800 Subject: [PATCH] crimson/osd/osd_operations/snaptrim_event: don't process snaptrim events of the same PG concurrently Concurrently triming snaps of the same PG may lead to the following problem: 1. fiber 1 get objects to trim for PG 1.x and snapid 1.d, which involves object OBJ; 2. fiber 2 get objects to trim for PG 1.x and snapid 1.c, which also involes object OBJ; 3. fiber 1 removes snap 1.c since it has been removed in the osdmap 4. fiber 2 try to get obc for object OBJ and find that OBJ doesn't have snap for id 1.c Signed-off-by: Xuehan Xu (cherry picked from commit 267babe435a54130eda5770a537420fb516e2a8f) --- .../osd/osd_operations/snaptrim_event.cc | 48 ++++++++++++------- .../osd/osd_operations/snaptrim_event.h | 4 +- src/crimson/osd/pg.h | 15 ++++++ 3 files changed, 50 insertions(+), 17 deletions(-) diff --git a/src/crimson/osd/osd_operations/snaptrim_event.cc b/src/crimson/osd/osd_operations/snaptrim_event.cc index 82bfe8068a9d2..4a32b567cd468 100644 --- a/src/crimson/osd/osd_operations/snaptrim_event.cc +++ b/src/crimson/osd/osd_operations/snaptrim_event.cc @@ -30,6 +30,15 @@ namespace crimson { namespace crimson::osd { +PG::interruptible_future<> +PG::SnapTrimMutex::lock(SnapTrimEvent &st_event) noexcept +{ + return st_event.enter_stage(wait_pg + ).then_interruptible([this] { + return mutex.lock(); + }); +} + void SnapTrimEvent::SubOpBlocker::dump_detail(Formatter *f) const { f->open_array_section("dependent_operations"); @@ -109,6 +118,8 @@ SnapTrimEvent::with_pg( }).then_interruptible([this] { return enter_stage( pp().get_obc); + }).then_interruptible([this] { + return pg->snaptrim_mutex.lock(*this); }).then_interruptible([this] { return enter_stage( pp().process); @@ -140,27 +151,32 @@ SnapTrimEvent::with_pg( if (to_trim.empty()) { // the legit ENOENT -> done logger().debug("{}: to_trim is empty! Stopping iteration", *this); + pg->snaptrim_mutex.unlock(); return snap_trim_iertr::make_ready_future( seastar::stop_iteration::yes); } - for (const auto& object : to_trim) { - logger().debug("{}: trimming {}", *this, object); - auto [op, fut] = shard_services.start_operation_may_interrupt< - interruptor, SnapTrimObjSubEvent>( - pg, - object, - snapid); - subop_blocker.emplace_back( - op->get_id(), - std::move(fut) - ); - } - return enter_stage( - wait_subop - ).then_interruptible([this] { + return [&shard_services, this](const auto &to_trim) { + for (const auto& object : to_trim) { + logger().debug("{}: trimming {}", *this, object); + auto [op, fut] = shard_services.start_operation_may_interrupt< + interruptor, SnapTrimObjSubEvent>( + pg, + object, + snapid); + subop_blocker.emplace_back( + op->get_id(), + std::move(fut) + ); + } + return interruptor::now(); + }(to_trim).then_interruptible([this] { + return enter_stage(wait_subop); + }).then_interruptible([this] { logger().debug("{}: awaiting completion", *this); return subop_blocker.wait_completion(); - }).safe_then_interruptible([this] { + }).finally([this] { + pg->snaptrim_mutex.unlock(); + }).safe_then_interruptible([this] { if (!needs_pause) { return interruptor::now(); } diff --git a/src/crimson/osd/osd_operations/snaptrim_event.h b/src/crimson/osd/osd_operations/snaptrim_event.h index f4ae1bf0630c9..851b7b0f0b386 100644 --- a/src/crimson/osd/osd_operations/snaptrim_event.h +++ b/src/crimson/osd/osd_operations/snaptrim_event.h @@ -25,7 +25,6 @@ namespace crimson::osd { class OSD; class ShardServices; -class PG; // trim up to `max` objects for snapshot `snapid class SnapTrimEvent final : public PhasedOperationT { @@ -107,9 +106,12 @@ public: CommonPGPipeline::GetOBC::BlockingEvent, CommonPGPipeline::Process::BlockingEvent, WaitSubop::BlockingEvent, + PG::SnapTrimMutex::WaitPG::BlockingEvent, WaitTrimTimer::BlockingEvent, CompletionEvent > tracking_events; + + friend class PG::SnapTrimMutex; }; // remove single object. a SnapTrimEvent can create multiple subrequests. diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index 3a7d21ba9d3d0..d96db2e205665 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -61,6 +61,7 @@ namespace crimson::os { namespace crimson::osd { class OpsExecuter; class BackfillRecovery; +class SnapTrimEvent; class PG : public boost::intrusive_ref_counter< PG, @@ -552,6 +553,20 @@ public: eversion_t &version); private: + + struct SnapTrimMutex { + struct WaitPG : OrderedConcurrentPhaseT { + static constexpr auto type_name = "SnapTrimEvent::wait_pg"; + } wait_pg; + seastar::shared_mutex mutex; + + interruptible_future<> lock(SnapTrimEvent &st_event) noexcept; + + void unlock() noexcept { + mutex.unlock(); + } + } snaptrim_mutex; + using do_osd_ops_ertr = crimson::errorator< crimson::ct_error::eagain>; using do_osd_ops_iertr = -- 2.39.5