From: Xuehan Xu Date: Tue, 8 Oct 2024 04:26:41 +0000 (+0800) Subject: crimson/osd/backfill_state: treat Cancelled as a pause of the ongoing backfilling X-Git-Tag: v20.0.0~451^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a34b0ce2aa6820706167e8ea3119160e6e68f157;p=ceph.git crimson/osd/backfill_state: treat Cancelled as a pause of the ongoing backfilling Fixes: https://tracker.ceph.com/issues/67888 Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/osd/backfill_state.cc b/src/crimson/osd/backfill_state.cc index 62607c7fbeba..1392ee330ac2 100644 --- a/src/crimson/osd/backfill_state.cc +++ b/src/crimson/osd/backfill_state.cc @@ -407,7 +407,34 @@ BackfillState::PrimaryScanning::react(PrimaryScanned evt) LOG_PREFIX(BackfillState::PrimaryScanning::react::PrimaryScanned); DEBUGDPP("", pg()); backfill_state().backfill_info = std::move(evt.result); - return transit(); + if (!backfill_state().is_suspended()) { + return transit(); + } else { + DEBUGDPP("backfill suspended, not going Enqueuing", pg()); + backfill_state().go_enqueuing_on_resume(); + } + return discard_event(); +} + +boost::statechart::result +BackfillState::PrimaryScanning::react(CancelBackfill evt) +{ + LOG_PREFIX(BackfillState::PrimaryScanning::react::SuspendBackfill); + DEBUGDPP("suspended within PrimaryScanning", pg()); + backfill_state().on_suspended(); + return discard_event(); +} + +boost::statechart::result +BackfillState::PrimaryScanning::react(Triggered evt) +{ + LOG_PREFIX(BackfillState::PrimaryScanning::react::Triggered); + ceph_assert(backfill_state().is_suspended()); + if (backfill_state().on_resumed()) { + DEBUGDPP("Backfill resumed, going Enqueuing", pg()); + return transit(); + } + return discard_event(); } boost::statechart::result @@ -470,12 +497,17 @@ BackfillState::ReplicasScanning::react(ReplicaScanned evt) if (waiting_on_backfill.empty()) { ceph_assert(backfill_state().peer_backfill_info.size() == \ peering_state().get_backfill_targets().size()); - return transit(); + if (!backfill_state().is_suspended()) { + return transit(); + } else { + DEBUGDPP("backfill suspended, not going Enqueuing", pg()); + backfill_state().go_enqueuing_on_resume(); + } } } else { - // we canceled backfill for a while due to a too full, and this + // we suspended backfill for a while due to a too full, and this // is an extra response from a non-too-full peer - DEBUGDPP("canceled backfill (too full?)", pg()); + DEBUGDPP("suspended backfill (too full?)", pg()); } return discard_event(); } @@ -483,8 +515,22 @@ BackfillState::ReplicasScanning::react(ReplicaScanned evt) boost::statechart::result BackfillState::ReplicasScanning::react(CancelBackfill evt) { - LOG_PREFIX(BackfillState::ReplicasScanning::react::CancelBackfill); - DEBUGDPP("cancelled within ReplicasScanning", pg()); + LOG_PREFIX(BackfillState::ReplicasScanning::react::SuspendBackfill); + DEBUGDPP("suspended within ReplicasScanning", pg()); + backfill_state().on_suspended(); + return discard_event(); +} + +boost::statechart::result +BackfillState::ReplicasScanning::react(Triggered evt) +{ + LOG_PREFIX(BackfillState::ReplicasScanning::react::Triggered); + ceph_assert(backfill_state().is_suspended()); + if (backfill_state().on_resumed()) { + DEBUGDPP("Backfill resumed, going Enqueuing", pg()); + return transit(); + } + return discard_event(); } boost::statechart::result @@ -510,7 +556,34 @@ BackfillState::Waiting::react(ObjectPushed evt) LOG_PREFIX(BackfillState::Waiting::react::ObjectPushed); DEBUGDPP("Waiting::react() on ObjectPushed; evt.object={}", pg(), evt.object); backfill_state().progress_tracker->complete_to(evt.object, evt.stat, false); - return transit(); + if (!backfill_state().is_suspended()) { + return transit(); + } else { + DEBUGDPP("backfill suspended, not going Enqueuing", pg()); + backfill_state().go_enqueuing_on_resume(); + } + return discard_event(); +} + +boost::statechart::result +BackfillState::Waiting::react(CancelBackfill evt) +{ + LOG_PREFIX(BackfillState::Waiting::react::SuspendBackfill); + DEBUGDPP("suspended within Waiting", pg()); + backfill_state().on_suspended(); + return discard_event(); +} + +boost::statechart::result +BackfillState::Waiting::react(Triggered evt) +{ + LOG_PREFIX(BackfillState::Waiting::react::Triggered); + ceph_assert(backfill_state().is_suspended()); + if (backfill_state().on_resumed()) { + DEBUGDPP("Backfill resumed, going Enqueuing", pg()); + return transit(); + } + return discard_event(); } // -- Done diff --git a/src/crimson/osd/backfill_state.h b/src/crimson/osd/backfill_state.h index 34400d930b2b..463be4a7a2eb 100644 --- a/src/crimson/osd/backfill_state.h +++ b/src/crimson/osd/backfill_state.h @@ -210,11 +210,15 @@ public: sc::custom_reaction, sc::custom_reaction, sc::transition, + sc::custom_reaction, + sc::custom_reaction, sc::transition>; explicit PrimaryScanning(my_context); sc::result react(ObjectPushed); // collect scanning result and transit to Enqueuing. sc::result react(PrimaryScanned); + sc::result react(CancelBackfill); + sc::result react(Triggered); }; struct ReplicasScanning : sc::state, @@ -223,6 +227,7 @@ public: sc::custom_reaction, sc::custom_reaction, sc::custom_reaction, + sc::custom_reaction, sc::transition, sc::transition>; explicit ReplicasScanning(my_context); @@ -231,6 +236,7 @@ public: sc::result react(ObjectPushed); sc::result react(ReplicaScanned); sc::result react(CancelBackfill); + sc::result react(Triggered); // indicate whether a particular peer should be scanned to retrieve // BackfillInterval for new range of hobject_t namespace. @@ -249,9 +255,13 @@ public: using reactions = boost::mpl::list< sc::custom_reaction, sc::transition, + sc::custom_reaction, + sc::custom_reaction, sc::transition>; explicit Waiting(my_context); sc::result react(ObjectPushed); + sc::result react(CancelBackfill); + sc::result react(Triggered); }; struct Done : sc::state, @@ -296,6 +306,26 @@ public: } } private: + struct backfill_suspend_state_t { + bool suspended = false; + bool should_go_enqueuing = false; + } backfill_suspend_state; + bool is_suspended() const { + return backfill_suspend_state.suspended; + } + void on_suspended() { + ceph_assert(!is_suspended()); + backfill_suspend_state = {true, false}; + } + bool on_resumed() { + auto go_enqueuing = backfill_suspend_state.should_go_enqueuing; + backfill_suspend_state = {false, false}; + return go_enqueuing; + } + void go_enqueuing_on_resume() { + ceph_assert(is_suspended()); + backfill_suspend_state.should_go_enqueuing = true; + } hobject_t last_backfill_started; BackfillInterval backfill_info; std::map peer_backfill_info;