]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd/osd_operations/snaptrim_event: don't process snaptrim events 53965/head
authorXuehan Xu <xuxuehan@qianxin.com>
Sun, 6 Aug 2023 07:45:24 +0000 (15:45 +0800)
committerMatan Breizman <mbreizma@redhat.com>
Wed, 11 Oct 2023 11:53:06 +0000 (11:53 +0000)
of the same PG concurrently

Concurrently triming snaps of the same PG may lead to the following
problem:

1. fiber 1 get objects to trim for PG 1.x and snapid 1.d, which involves
   object OBJ;
2. fiber 2 get objects to trim for PG 1.x and snapid 1.c, which also
   involes object OBJ;
3. fiber 1 removes snap 1.c since it has been removed in the osdmap
4. fiber 2 try to get obc for object OBJ and find that OBJ doesn't have
   snap for id 1.c

Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
(cherry picked from commit 267babe435a54130eda5770a537420fb516e2a8f)

src/crimson/osd/osd_operations/snaptrim_event.cc
src/crimson/osd/osd_operations/snaptrim_event.h
src/crimson/osd/pg.h

index 82bfe8068a9d2bfc4559c20871b22aae3d89e414..4a32b567cd4680c1d7feb0408d3fd9e3d492860c 100644 (file)
@@ -30,6 +30,15 @@ namespace crimson {
 
 namespace crimson::osd {
 
+PG::interruptible_future<>
+PG::SnapTrimMutex::lock(SnapTrimEvent &st_event) noexcept
+{
+  return st_event.enter_stage<interruptor>(wait_pg
+  ).then_interruptible([this] {
+    return mutex.lock();
+  });
+}
+
 void SnapTrimEvent::SubOpBlocker::dump_detail(Formatter *f) const
 {
   f->open_array_section("dependent_operations");
@@ -109,6 +118,8 @@ SnapTrimEvent::with_pg(
     }).then_interruptible([this] {
       return enter_stage<interruptor>(
         pp().get_obc);
+    }).then_interruptible([this] {
+      return pg->snaptrim_mutex.lock(*this);
     }).then_interruptible([this] {
       return enter_stage<interruptor>(
         pp().process);
@@ -140,27 +151,32 @@ SnapTrimEvent::with_pg(
         if (to_trim.empty()) {
           // the legit ENOENT -> done
           logger().debug("{}: to_trim is empty! Stopping iteration", *this);
+         pg->snaptrim_mutex.unlock();
           return snap_trim_iertr::make_ready_future<seastar::stop_iteration>(
             seastar::stop_iteration::yes);
         }
-        for (const auto& object : to_trim) {
-          logger().debug("{}: trimming {}", *this, object);
-          auto [op, fut] = shard_services.start_operation_may_interrupt<
-           interruptor, SnapTrimObjSubEvent>(
-            pg,
-            object,
-            snapid);
-          subop_blocker.emplace_back(
-            op->get_id(),
-            std::move(fut)
-          );
-        }
-        return enter_stage<interruptor>(
-          wait_subop
-        ).then_interruptible([this] {
+        return [&shard_services, this](const auto &to_trim) {
+         for (const auto& object : to_trim) {
+           logger().debug("{}: trimming {}", *this, object);
+           auto [op, fut] = shard_services.start_operation_may_interrupt<
+             interruptor, SnapTrimObjSubEvent>(
+             pg,
+             object,
+             snapid);
+           subop_blocker.emplace_back(
+             op->get_id(),
+             std::move(fut)
+           );
+         }
+         return interruptor::now();
+       }(to_trim).then_interruptible([this] {
+         return enter_stage<interruptor>(wait_subop);
+       }).then_interruptible([this] {
           logger().debug("{}: awaiting completion", *this);
           return subop_blocker.wait_completion();
-        }).safe_then_interruptible([this] {
+        }).finally([this] {
+         pg->snaptrim_mutex.unlock();
+       }).safe_then_interruptible([this] {
           if (!needs_pause) {
             return interruptor::now();
           }
index f4ae1bf0630c9de8513cbc757dc57241f687c0fe..851b7b0f0b3865e2d58d04f4d23897c3471343af 100644 (file)
@@ -25,7 +25,6 @@ namespace crimson::osd {
 
 class OSD;
 class ShardServices;
-class PG;
 
 // trim up to `max` objects for snapshot `snapid
 class SnapTrimEvent final : public PhasedOperationT<SnapTrimEvent> {
@@ -107,9 +106,12 @@ public:
     CommonPGPipeline::GetOBC::BlockingEvent,
     CommonPGPipeline::Process::BlockingEvent,
     WaitSubop::BlockingEvent,
+    PG::SnapTrimMutex::WaitPG::BlockingEvent,
     WaitTrimTimer::BlockingEvent,
     CompletionEvent
   > tracking_events;
+
+  friend class PG::SnapTrimMutex;
 };
 
 // remove single object. a SnapTrimEvent can create multiple subrequests.
index 3a7d21ba9d3d0204a5988d561939b6f03b2d7044..d96db2e205665b61ba7c1f6c91055ab5b14d8291 100644 (file)
@@ -61,6 +61,7 @@ namespace crimson::os {
 namespace crimson::osd {
 class OpsExecuter;
 class BackfillRecovery;
+class SnapTrimEvent;
 
 class PG : public boost::intrusive_ref_counter<
   PG,
@@ -552,6 +553,20 @@ public:
     eversion_t &version);
 
 private:
+
+  struct SnapTrimMutex {
+    struct WaitPG : OrderedConcurrentPhaseT<WaitPG> {
+      static constexpr auto type_name = "SnapTrimEvent::wait_pg";
+    } wait_pg;
+    seastar::shared_mutex mutex;
+
+    interruptible_future<> lock(SnapTrimEvent &st_event) noexcept;
+
+    void unlock() noexcept {
+      mutex.unlock();
+    }
+  } snaptrim_mutex;
+
   using do_osd_ops_ertr = crimson::errorator<
    crimson::ct_error::eagain>;
   using do_osd_ops_iertr =