From 765d4750e2d6c833871540ee446f4208908c9f83 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Mon, 25 Mar 2024 08:06:41 -0500 Subject: [PATCH] osd/scrub: handle Release messages directly in ReplicaIdle The expected path is that the scrubber will be in the ReplicaIdle sub-state. If, however, we are processing a chunk (ReplicaActiveOp) - the scrub is aborted, and the scrubber is returned to the ReplicaIdle. Signed-off-by: Ronen Friedman --- src/osd/scrubber/scrub_machine.cc | 58 +++++++++---------------------- src/osd/scrubber/scrub_machine.h | 40 +++++++-------------- 2 files changed, 29 insertions(+), 69 deletions(-) diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc index 03b37c032aa5c..e4b41977d7744 100644 --- a/src/osd/scrubber/scrub_machine.cc +++ b/src/osd/scrubber/scrub_machine.cc @@ -897,14 +897,6 @@ sc::result ReplicaActive::react(const ReserverGranted& ev) } -void ReplicaActive::on_release(const ReplicaRelease& ev) -{ - DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - dout(10) << fmt::format("ReplicaActive::on_release() from {}", ev.m_from) - << dendl; - clear_remote_reservation(true); -} - void ReplicaActive::clear_remote_reservation(bool warn_if_no_reservation) { DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases @@ -917,16 +909,31 @@ void ReplicaActive::clear_remote_reservation(bool warn_if_no_reservation) m_osds->get_scrub_reserver().cancel_reservation(pg_id); reservation_granted = false; pending_reservation_nonce = 0; + ceph_assert(m_reservation_status != reservation_status_t::unreserved); + m_reservation_status = reservation_status_t::unreserved; + } else if (warn_if_no_reservation) { const auto msg = "ReplicaActive::clear_remote_reservation(): " "not reserved!"; dout(5) << msg << dendl; - scrbr->get_clog()->warn() << msg; + scrbr->get_clog()->info() << msg; } } +sc::result ReplicaActive::react(const ReplicaRelease& ev) +{ + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + dout(10) << fmt::format( + "ReplicaActive::react(const ReplicaRelease&) from {}", + ev.m_from) + << dendl; + clear_remote_reservation(true); + return discard_event(); +} + + // ---------------- ReplicaActive/ReplicaIdle --------------------------- ReplicaIdle::ReplicaIdle(my_context ctx) @@ -964,18 +971,6 @@ sc::result ReplicaUnreserved::react(const StartReplica& ev) return transit(); } -sc::result ReplicaUnreserved::react(const ReplicaRelease&) -{ - DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - dout(10) << "ReplicaUnreserved::react(const ReplicaRelease&)" << dendl; - // this is a bug. We should never receive a release request unless we - // are reserved or have a pending reservation. - scrbr->get_clog()->error() << fmt::format( - "osd.{} pg[{}]: reservation released while not reserved", - scrbr->get_whoami(), scrbr->get_spgid()); - return discard_event(); -} - // ---------------- ReplicaIdle/ReplicaWaitingReservation --------------------------- @@ -990,16 +985,6 @@ ReplicaWaitingReservation::ReplicaWaitingReservation(my_context ctx) << dendl; } -sc::result ReplicaWaitingReservation::react(const ReplicaRelease& ev) -{ - DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - dout(10) << "ReplicaWaitingReservation::react(const ReplicaRelease&)" - << dendl; - // must cancel the queued reservation request - context().on_release(ev); - return transit(); -} - sc::result ReplicaWaitingReservation::react(const StartReplica& ev) { DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases @@ -1028,14 +1013,6 @@ ReplicaReserved::ReplicaReserved(my_context ctx) << dendl; } -sc::result ReplicaReserved::react(const ReplicaRelease& ev) -{ - DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - dout(10) << "ReplicaReserved::react(const ReplicaRelease&)" << dendl; - context().on_release(ev); - return transit(); -} - sc::result ReplicaReserved::react(const StartReplica& ev) { DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases @@ -1098,8 +1075,7 @@ sc::result ReplicaActiveOp::react(const ReplicaReserveReq& ev) sc::result ReplicaActiveOp::react(const ReplicaRelease& ev) { dout(10) << "ReplicaActiveOp::react(const ReplicaRelease&)" << dendl; - post_event(ev); - return transit(); + return transit(); } diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h index 3fc53da8e9e98..081baaf943b2e 100644 --- a/src/osd/scrubber/scrub_machine.h +++ b/src/osd/scrubber/scrub_machine.h @@ -769,16 +769,6 @@ struct WaitDigestUpdate : sc::state, * - No scrubbing is performed in this state, but reservation-related * events are handled. * - * - sub-states: - * * ReplicaUnreserved - not reserved by a primary. In this state we - * are waiting for either a reservation request, or a chunk scrub op. - * - * * ReplicaWaitingReservation - a reservation request was received from - * our primary. We expect a ' go ahead' from the reserver, or a - * cancellation command from the primary (or an interval change). - * - * * ReplicaReserved - we are reserved by a primary. - * * - ReplicaActiveOp - handling a single map request op * * ReplicaWaitUpdates * * ReplicaBuildingMap @@ -826,9 +816,6 @@ struct ReplicaActive : sc::state, explicit ReplicaActive(my_context ctx); ~ReplicaActive(); - /// handle a 'release' from a primary - void on_release(const ReplicaRelease& ev); - /** * cancel a granted or pending reservation * @@ -842,11 +829,18 @@ struct ReplicaActive : sc::state, using reactions = mpl::list< sc::transition, sc::custom_reaction, - sc::custom_reaction>; + sc::custom_reaction, + sc::custom_reaction>; /// handle a reservation request from a primary sc::result react(const ReplicaReserveReq& ev); + /* + * the Primary released the reservation. + * Note: The ActiveReplicaOp state will handle this event as well. + */ + sc::result react(const ReplicaRelease&); + /** * the queued reservation request was granted by the async reserver. * Notify the Primary. @@ -942,12 +936,9 @@ struct ReplicaUnreserved : sc::state, explicit ReplicaUnreserved(my_context ctx); using reactions = mpl::list< - sc::custom_reaction, - // unexpected (bug-induced) events: - sc::custom_reaction>; + sc::custom_reaction>; sc::result react(const StartReplica& ev); - sc::result react(const ReplicaRelease&); }; /** @@ -968,11 +959,8 @@ struct ReplicaWaitingReservation explicit ReplicaWaitingReservation(my_context ctx); using reactions = mpl::list< - // the 'normal' (expected) events: - sc::custom_reaction, sc::custom_reaction>; - sc::result react(const ReplicaRelease& ev); sc::result react(const StartReplica& ev); }; @@ -990,10 +978,8 @@ struct ReplicaReserved : sc::state, NamedSimply { explicit ReplicaReserved(my_context ctx); using reactions = mpl::list< - sc::custom_reaction, - sc::custom_reaction>; + sc::custom_reaction>; - sc::result react(const ReplicaRelease&); sc::result react(const StartReplica& eq); }; @@ -1029,10 +1015,8 @@ struct ReplicaActiveOp /** * a 'release' was send by the primary. Possible scenario: 'no-scrub' - * abort. Our two-steps reaction: - * - we exit the 'ActiveOp' state, and - * - we make sure the 'release' is remembered, to be handled by the state - * we would transition into (which should be ReplicaReserved). + * abort. We abort the current chunk handling and re-enter ReplicaActive, + * releasing the reservation on the way. */ sc::result react(const ReplicaRelease&); -- 2.39.5