From 31af98f52d9749401b1b2b9afff9a0620b68cec8 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Wed, 22 Feb 2023 21:04:17 -0800 Subject: [PATCH] osd/scrubber: associate replica state with state machine states Moves responsibility for owning and resetting replica state to state machine events. Signed-off-by: Samuel Just --- src/osd/scrubber/pg_scrubber.cc | 102 +++++-------------------- src/osd/scrubber/pg_scrubber.h | 41 +--------- src/osd/scrubber/scrub_machine.cc | 67 +++++++++++----- src/osd/scrubber/scrub_machine.h | 65 +++++++++++++--- src/osd/scrubber/scrub_machine_lstnr.h | 6 ++ 5 files changed, 127 insertions(+), 154 deletions(-) diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 19288758cd73d..93749e7c5a5b9 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -215,6 +215,16 @@ void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued) } } +void PgScrubber::dec_scrubs_remote() +{ + m_osds->get_scrub_services().dec_scrubs_remote(); +} + +void PgScrubber::advance_token() +{ + m_current_token++; +} + void PgScrubber::initiate_scrub_after_repair(epoch_t epoch_queued) { dout(15) << __func__ << " epoch: " << epoch_queued << dendl; @@ -1392,27 +1402,6 @@ void PgScrubber::replica_scrub_op(OpRequestRef op) return; } - if (is_queued_or_active()) { - // this is bug! - // Somehow, we have received a new scrub request from our Primary, before - // having finished with the previous one. Did we go through an interval - // change without reseting the FSM? Possible responses: - // - crashing (the original assert_not_active() implemented that one), or - // - trying to recover: - // - (logging enough information to debug this scenario) - // - reset the FSM. - m_osds->clog->warn() << fmt::format( - "{}: error: a second scrub-op received while handling the previous one", - __func__); - - scrub_clear_state(); - m_osds->clog->warn() << fmt::format( - "{}: after a reset. Now handling the new OP", - __func__); - } - // make sure the FSM is at NotActive - m_fsm->assert_not_active(); - replica_scrubmap = ScrubMap{}; replica_scrubmap_pos = ScrubMapBuilder{}; @@ -1587,29 +1576,19 @@ void PgScrubber::handle_scrub_reserve_request(OpRequestRef op) return; } - // Purely a debug check, PgScrubber::on_new_interval should have cleared this - if (m_remote_osd_resource.has_value()) { - epoch_t e = m_remote_osd_resource->get_reservation_epoch(); - if (!check_interval(e)) { - derr << __func__ << ": BUG: stale remote osd resource from epoch " << e - << dendl; - } - } - /* The primary may unilaterally restart the scrub process without notifying * replicas. Unconditionally clear any existing state prior to handling * the new reservation. */ - reset_replica_state(); + m_fsm->process_event(FullReset{}); bool granted{false}; if (m_pg->cct->_conf->osd_scrub_during_recovery || !m_osds->is_recovery_active()) { - m_remote_osd_resource.emplace(this, m_pg, m_osds, request_ep); - // OSD resources allocated? - granted = m_remote_osd_resource->is_reserved(); - if (!granted) { - // just forget it - m_remote_osd_resource.reset(); + + granted = m_osds->get_scrub_services().inc_scrubs_remote(); + if (granted) { + m_fsm->process_event(ReplicaGrantReservation{}); + } else { dout(20) << __func__ << ": failed to reserve remotely" << dendl; } } else { @@ -1660,7 +1639,7 @@ void PgScrubber::handle_scrub_reserve_release(OpRequestRef op) * this specific scrub session has terminated. All incoming events carrying * the old tag will be discarded. */ - reset_replica_state(); + m_fsm->process_event(FullReset{}); } void PgScrubber::discard_replica_reservations() @@ -1676,7 +1655,6 @@ void PgScrubber::clear_scrub_reservations() dout(10) << __func__ << dendl; m_reservations.reset(); // the remote reservations m_local_osd_resource.reset(); // the local reservation - m_remote_osd_resource.reset(); // we as replica reserved for a Primary } void PgScrubber::message_all_replicas(int32_t opcode, std::string_view op_text) @@ -2337,17 +2315,6 @@ void PgScrubber::reset_internal_state() m_be.reset(); } -void PgScrubber::reset_replica_state() -{ - dout(10) << fmt::format("{}: prev. token:{}", __func__, m_current_token) - << dendl; - - m_current_token++; - m_remote_osd_resource.reset(); - replica_handling_done(); - m_fsm->process_event(FullReset{}); -} - bool PgScrubber::is_token_current(Scrub::act_token_t received_token) { if (received_token == 0 || received_token == m_current_token) { @@ -2745,41 +2712,6 @@ LocalReservation::~LocalReservation() } } -// ///////////////////// ReservedByRemotePrimary /////////////////////////////// - -ReservedByRemotePrimary::ReservedByRemotePrimary(const PgScrubber* scrubber, - PG* pg, - OSDService* osds, - epoch_t epoch) - : m_scrubber{scrubber} - , m_pg{pg} - , m_osds{osds} - , m_reserved_at{epoch} -{ - if (!m_osds->get_scrub_services().inc_scrubs_remote()) { - dout(10) << __func__ << ": failed to reserve at Primary request" << dendl; - // the failure is signalled by not having m_reserved_by_remote_primary set - return; - } - - dout(20) << __func__ << ": scrub resources reserved at Primary request" - << dendl; - m_reserved_by_remote_primary = true; -} - -ReservedByRemotePrimary::~ReservedByRemotePrimary() -{ - if (m_reserved_by_remote_primary) { - m_reserved_by_remote_primary = false; - m_osds->get_scrub_services().dec_scrubs_remote(); - } -} - -std::ostream& ReservedByRemotePrimary::gen_prefix(std::ostream& out) const -{ - return m_scrubber->gen_prefix(out); -} - // ///////////////////// MapsCollectionStatus //////////////////////////////// auto MapsCollectionStatus::mark_arriving_map(pg_shard_t from) diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index f67987a4b44e2..b44bf40e500ad 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -189,33 +189,6 @@ class LocalReservation { bool is_reserved() const { return m_holding_local_reservation; } }; -/** - * wraps the OSD resource we are using when reserved as a replica by a - * scrubbing primary. - */ -class ReservedByRemotePrimary { - const PgScrubber* m_scrubber; ///< we will be using its gen_prefix() - PG* m_pg; - OSDService* m_osds; - bool m_reserved_by_remote_primary{false}; - const epoch_t m_reserved_at; - - public: - ReservedByRemotePrimary(const PgScrubber* scrubber, - PG* pg, - OSDService* osds, - epoch_t epoch); - ~ReservedByRemotePrimary(); - [[nodiscard]] bool is_reserved() const - { - return m_reserved_by_remote_primary; - } - - epoch_t get_reservation_epoch() const { return m_reserved_at; } - - std::ostream& gen_prefix(std::ostream& out) const; -}; - /** * Once all replicas' scrub maps are received, we go on to compare the maps. * That is - unless we we have not yet completed building our own scrub map. @@ -585,6 +558,10 @@ class PgScrubber : public ScrubPgIF, /// Clears `m_queued_or_active` and restarts snaptrimming void clear_queued_or_active() final; + void dec_scrubs_remote() final; + + void advance_token() final; + void mark_local_map_ready() final; [[nodiscard]] bool are_all_maps_available() const final; @@ -665,12 +642,6 @@ class PgScrubber : public ScrubPgIF, private: void reset_internal_state(); - /** - * the current scrubbing operation is done. We should mark that fact, so that - * all events related to the previous operation can be discarded. - */ - void reset_replica_state(); - bool is_token_current(Scrub::act_token_t received_token); void requeue_waiting() const { m_pg->requeue_ops(m_pg->waiting_for_scrub); } @@ -745,10 +716,6 @@ class PgScrubber : public ScrubPgIF, std::optional m_reservations; std::optional m_local_osd_resource; - /// the 'remote' resource we, as a replica, grant our Primary when it is - /// scrubbing - std::optional m_remote_osd_resource; - void cleanup_on_finish(); // scrub_clear_state() as called for a Primary when // Active->NotActive diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc index 51857d2f1c553..9acb9c215e8ab 100644 --- a/src/osd/scrubber/scrub_machine.cc +++ b/src/osd/scrubber/scrub_machine.cc @@ -606,6 +606,51 @@ ScrubMachine::~ScrubMachine() = default; // -------- for replicas ----------------------------------------------------- +// ----------------------- ReservedReplica -------------------------------- + +ReservedReplica::ReservedReplica(my_context ctx) + : my_base(ctx) + , NamedSimply(context().m_scrbr, "ReservedReplica") +{ + dout(10) << "-- state -->> ReservedReplica" << dendl; +} + +ReservedReplica::~ReservedReplica() +{ + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + scrbr->dec_scrubs_remote(); + scrbr->advance_token(); +} + +// ----------------------- ReplicaIdle -------------------------------- + +ReplicaIdle::ReplicaIdle(my_context ctx) + : my_base(ctx) + , NamedSimply(context().m_scrbr, "ReplicaIdle") +{ + dout(10) << "-- state -->> ReplicaIdle" << dendl; +} + +ReplicaIdle::~ReplicaIdle() +{ +} + + +// ----------------------- ReplicaActiveOp -------------------------------- + +ReplicaActiveOp::ReplicaActiveOp(my_context ctx) + : my_base(ctx) + , NamedSimply(context().m_scrbr, "ReplicaActiveOp") +{ + dout(10) << "-- state -->> ReplicaActiveOp" << dendl; +} + +ReplicaActiveOp::~ReplicaActiveOp() +{ + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + scrbr->replica_handling_done(); +} + // ----------------------- ReplicaWaitUpdates -------------------------------- ReplicaWaitUpdates::ReplicaWaitUpdates(my_context ctx) @@ -635,15 +680,6 @@ sc::result ReplicaWaitUpdates::react(const ReplicaPushesUpd&) return discard_event(); } -/** - * the event poster is handling the scrubber reset - */ -sc::result ReplicaWaitUpdates::react(const FullReset&) -{ - dout(10) << "ReplicaWaitUpdates::react(const FullReset&)" << dendl; - return transit(); -} - // ----------------------- ReplicaBuildingMap ----------------------------------- ReplicaBuildingMap::ReplicaBuildingMap(my_context ctx) @@ -668,25 +704,16 @@ sc::result ReplicaBuildingMap::react(const SchedReplica&) scrbr->send_preempted_replica(); scrbr->replica_handling_done(); - return transit(); + return transit(); } // start or check progress of build_replica_map_chunk() auto ret_init = scrbr->build_replica_map_chunk(); if (ret_init != -EINPROGRESS) { - return transit(); + return transit(); } return discard_event(); } -/** - * the event poster is handling the scrubber reset - */ -sc::result ReplicaBuildingMap::react(const FullReset&) -{ - dout(10) << "ReplicaBuildingMap::react(const FullReset&)" << dendl; - return transit(); -} - } // namespace Scrub diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h index 45af1dc8d0f67..7bea55c9235d5 100644 --- a/src/osd/scrubber/scrub_machine.h +++ b/src/osd/scrubber/scrub_machine.h @@ -115,6 +115,9 @@ MEV(DigestUpdate) /// maps_compare_n_cleanup() transactions are done MEV(MapsCompared) +/// event emitted when the replica grants a reservation to the primary +MEV(ReplicaGrantReservation) + /// initiating replica scrub MEV(StartReplica) @@ -143,8 +146,7 @@ MEV(ScrubFinished) struct NotActive; ///< the quiescent state. No active scrubbing. struct ReservingReplicas; ///< securing scrub resources from replicas' OSDs struct ActiveScrubbing; ///< the active state for a Primary. A sub-machine. -struct ReplicaWaitUpdates; ///< an active state for a replica. Waiting for all - ///< active operations to finish. +struct ReplicaIdle; ///< Initial reserved replica state struct ReplicaBuildingMap; ///< an active state for a replica. @@ -310,8 +312,7 @@ struct NotActive : sc::state, NamedSimply { mpl::list, // a scrubbing that was initiated at recovery completion: sc::custom_reaction, - sc::transition, - sc::transition>; + sc::transition>; sc::result react(const StartScrub&); sc::result react(const AfterRepairScrub&); }; @@ -510,6 +511,49 @@ struct WaitDigestUpdate : sc::state, // ----------------------------- the "replica active" states +/** + * ReservedReplica + * + * Parent state for replica states, Controls lifecycle for + * PgScrubber::m_reservations. + */ +struct ReservedReplica : sc::state, + NamedSimply { + explicit ReservedReplica(my_context ctx); + ~ReservedReplica(); + + using reactions = mpl::list>; +}; + +struct ReplicaWaitUpdates; + +/** + * ReplicaIdle + * + * Replica is waiting for a map request. + */ +struct ReplicaIdle : sc::state, + NamedSimply { + explicit ReplicaIdle(my_context ctx); + ~ReplicaIdle(); + + using reactions = mpl::list< + sc::transition, + sc::transition>; +}; + +/** + * ReservedActiveOp + * + * Lifetime matches handling for a single map request op + */ +struct ReplicaActiveOp + : sc::state, + NamedSimply { + explicit ReplicaActiveOp(my_context ctx); + ~ReplicaActiveOp(); +}; + /* * Waiting for 'active_pushes' to complete * @@ -517,24 +561,21 @@ struct WaitDigestUpdate : sc::state, * - the details of the Primary's request were internalized by PgScrubber; * - 'active' scrubbing is set */ -struct ReplicaWaitUpdates : sc::state, +struct ReplicaWaitUpdates : sc::state, NamedSimply { explicit ReplicaWaitUpdates(my_context ctx); - using reactions = mpl::list, - sc::custom_reaction>; + using reactions = mpl::list>; sc::result react(const ReplicaPushesUpd&); - sc::result react(const FullReset&); }; -struct ReplicaBuildingMap : sc::state, NamedSimply { +struct ReplicaBuildingMap : sc::state + , NamedSimply { explicit ReplicaBuildingMap(my_context ctx); - using reactions = mpl::list, - sc::custom_reaction>; + using reactions = mpl::list>; sc::result react(const SchedReplica&); - sc::result react(const FullReset&); }; } // namespace Scrub diff --git a/src/osd/scrubber/scrub_machine_lstnr.h b/src/osd/scrubber/scrub_machine_lstnr.h index 15e3c89c89f94..8f4dd8201c105 100644 --- a/src/osd/scrubber/scrub_machine_lstnr.h +++ b/src/osd/scrubber/scrub_machine_lstnr.h @@ -200,6 +200,12 @@ struct ScrubMachineListener { virtual void set_queued_or_active() = 0; virtual void clear_queued_or_active() = 0; + /// Release remote scrub reservation + virtual void dec_scrubs_remote() = 0; + + /// Advance replica token + virtual void advance_token() = 0; + /** * Our scrubbing is blocked, waiting for an excessive length of time for * our target chunk to be unlocked. We will set the corresponding flags, -- 2.39.5