]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/osd/backfill_state: support backfilling cancellation 59118/head
authorXuehan Xu <xuxuehan@qianxin.com>
Sat, 10 Aug 2024 06:22:09 +0000 (14:22 +0800)
committerMatan Breizman <mbreizma@redhat.com>
Thu, 15 Aug 2024 08:17:30 +0000 (08:17 +0000)
Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/osd/backfill_state.cc
src/crimson/osd/backfill_state.h
src/crimson/osd/pg.h
src/crimson/osd/pg_recovery.cc
src/crimson/osd/pg_recovery.h

index b3f3cce64c1bbcd4ded30a6d6619079e0ccb7614..d015a77545cf4038f7bcef90fa8081e93c4197ae 100644 (file)
@@ -498,6 +498,14 @@ BackfillState::Crashed::Crashed()
   ceph_abort_msg("{}: this should not happen");
 }
 
+// -- Cancelled
+BackfillState::Cancelled::Cancelled()
+{
+  backfill_state().backfill_info.clear();
+  backfill_state().peer_backfill_info.clear();
+  backfill_state().progress_tracker.reset();
+}
+
 // ProgressTracker is an intermediary between the BackfillListener and
 // BackfillMachine + its states. All requests to push or drop an object
 // are directed through it. The same happens with notifications about
index 8c441f01abb14e5b1ae98e0af70eb4b30afc74ac..4cdd4daafce6d479a4c36b4e424ff33d4469b792 100644 (file)
@@ -58,6 +58,9 @@ struct BackfillState {
   struct RequestDone : sc::event<RequestDone> {
   };
 
+  struct CancelBackfill : sc::event<CancelBackfill> {
+  };
+
 private:
   // internal events
   struct RequestPrimaryScanning : sc::event<RequestPrimaryScanning> {
@@ -132,10 +135,16 @@ public:
     explicit Crashed();
   };
 
+  struct Cancelled : sc::simple_state<Cancelled, BackfillMachine>,
+                    StateHelper<Cancelled> {
+    explicit Cancelled();
+  };
+
   struct Initial : sc::state<Initial, BackfillMachine>,
                    StateHelper<Initial> {
     using reactions = boost::mpl::list<
       sc::custom_reaction<Triggered>,
+      sc::transition<CancelBackfill, Cancelled>,
       sc::transition<sc::event_base, Crashed>>;
     explicit Initial(my_context);
     // initialize after triggering backfill by on_activate_complete().
@@ -146,6 +155,7 @@ public:
   struct Enqueuing : sc::state<Enqueuing, BackfillMachine>,
                      StateHelper<Enqueuing> {
     using reactions = boost::mpl::list<
+      sc::transition<CancelBackfill, Cancelled>,
       sc::transition<RequestPrimaryScanning, PrimaryScanning>,
       sc::transition<RequestReplicasScanning, ReplicasScanning>,
       sc::transition<RequestWaiting, Waiting>,
@@ -206,6 +216,7 @@ public:
       sc::custom_reaction<ObjectPushed>,
       sc::custom_reaction<PrimaryScanned>,
       sc::transition<RequestDone, Done>,
+      sc::transition<CancelBackfill, Cancelled>,
       sc::transition<sc::event_base, Crashed>>;
     explicit PrimaryScanning(my_context);
     sc::result react(ObjectPushed);
@@ -219,6 +230,7 @@ public:
       sc::custom_reaction<ObjectPushed>,
       sc::custom_reaction<ReplicaScanned>,
       sc::transition<RequestDone, Done>,
+      sc::transition<CancelBackfill, Cancelled>,
       sc::transition<sc::event_base, Crashed>>;
     explicit ReplicasScanning(my_context);
     // collect scanning result; if all results are collected, transition
@@ -243,6 +255,7 @@ public:
     using reactions = boost::mpl::list<
       sc::custom_reaction<ObjectPushed>,
       sc::transition<RequestDone, Done>,
+      sc::transition<CancelBackfill, Cancelled>,
       sc::transition<sc::event_base, Crashed>>;
     explicit Waiting(my_context);
     sc::result react(ObjectPushed);
index 6810803867f15df45a9c8c8681d24c6cda2666b6..26b39491a703b9d97b7990e086f4659a7ee85b5c 100644 (file)
@@ -417,7 +417,7 @@ public:
     recovery_handler->on_backfill_reserved();
   }
   void on_backfill_canceled() final {
-    ceph_assert(0 == "Not implemented");
+    recovery_handler->backfill_cancelled();
   }
 
   void on_recovery_reserved() final {
index 8c1f13827c6d6740d8fa9a88c9c8616fc82e7c95..f4a7d8a63db9f4787578f07688085c7d1bb2b5b4 100644 (file)
@@ -616,11 +616,30 @@ void PGRecovery::backfilled()
     PeeringState::Backfilled{});
 }
 
+void PGRecovery::backfill_cancelled()
+{
+  // We are not creating a new BackfillRecovery request here, as we
+  // need to cancel the backfill synchronously (before this method returns).
+  using BackfillState = crimson::osd::BackfillState;
+  backfill_state->process_event(
+    BackfillState::CancelBackfill{}.intrusive_from_this());
+  backfill_state.reset();
+}
+
 void PGRecovery::dispatch_backfill_event(
   boost::intrusive_ptr<const boost::statechart::event_base> evt)
 {
   logger().debug("{}", __func__);
-  backfill_state->process_event(evt);
+  if (backfill_state) {
+    backfill_state->process_event(evt);
+  } else {
+    // TODO: Do we need to worry about cases in which the pg has
+    //              been through both backfill cancellations and backfill
+    //              restarts between the sendings and replies of
+    //              ReplicaScan/ObjectPush requests? Seems classic OSDs
+    //              doesn't handle these cases.
+    logger().debug("{}, backfill cancelled, dropping evt");
+  }
 }
 
 void PGRecovery::on_backfill_reserved()
index e0c87240c00454200e788e3db1e41235455b24e9..f5b8632a3826372720f477a71394b9779b77e1e1 100644 (file)
@@ -97,6 +97,7 @@ private:
   template <class EventT>
   void start_backfill_recovery(
     const EventT& evt);
+  void backfill_cancelled();
   void request_replica_scan(
     const pg_shard_t& target,
     const hobject_t& begin,