From e65ded632888bd07daf5189b210934a872026116 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Mon, 13 Nov 2023 01:09:45 -0600 Subject: [PATCH] osd/scrub: decouple being reserved from handling scrub requests For a replica, following this change: * 'ReplicaActive' captures the state of the scrubber when acting as a replica, from peering to interval change; * "being reserved" is just a flag maintained by ReplicaActive, and is no longer a prerequisite for handling scrub requests. * each scrub request is now associated with its own 'token' value. and the following minor simplification: * the 'should we wait for pushes' decision is now part of the code executed on the transition from ReplicaIdle into ReplicaActiveOp. StartReplicaNoWait can now be discarded. Signed-off-by: Ronen Friedman --- src/messages/MOSDScrubReserve.h | 2 +- src/osd/PG.cc | 5 + src/osd/PG.h | 2 + src/osd/scrubber/pg_scrubber.cc | 116 ++++------------- src/osd/scrubber/pg_scrubber.h | 30 ++--- src/osd/scrubber/scrub_machine.cc | 164 +++++++++++++++++++------ src/osd/scrubber/scrub_machine.h | 132 ++++++++++++++------ src/osd/scrubber/scrub_machine_lstnr.h | 6 - src/osd/scrubber_common.h | 5 + 9 files changed, 272 insertions(+), 190 deletions(-) diff --git a/src/messages/MOSDScrubReserve.h b/src/messages/MOSDScrubReserve.h index f1f76b3e6fe..c7ab9854117 100644 --- a/src/messages/MOSDScrubReserve.h +++ b/src/messages/MOSDScrubReserve.h @@ -24,7 +24,7 @@ private: public: spg_t pgid; epoch_t map_epoch; - enum { + enum ReserveMsgOp { REQUEST = 0, GRANT = 1, RELEASE = 2, diff --git a/src/osd/PG.cc b/src/osd/PG.cc index d2f97a129a2..ddef326e2a8 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1823,6 +1823,11 @@ void PG::on_activate(interval_set snaps) m_scrubber->on_pg_activate(m_planned_scrub); } +void PG::on_replica_activate() +{ + m_scrubber->on_replica_activate(); +} + void PG::on_active_exit() { backfill_reserving = false; diff --git a/src/osd/PG.h b/src/osd/PG.h index 2e82e74ab01..e0f070960b4 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -624,6 +624,8 @@ public: void on_activate(interval_set snaps) override; + void on_replica_activate() override; + void on_activate_committed() override; void on_active_actmap() override; diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 70d314f0d2f..a88a09aeb07 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -85,6 +85,13 @@ ostream& operator<<(ostream& out, const requested_scrub_t& sf) return out; } +void PgScrubber::on_replica_activate() +{ + dout(10) << __func__ << dendl; + m_fsm->process_event(ReplicaActivate{}); +} + + /* * if the incoming message is from a previous interval, it must mean * PrimaryLogPG::on_change() was called when that interval ended. We can safely @@ -197,7 +204,6 @@ bool PgScrubber::should_abort() const * * Some of the considerations above are also relevant to the replica-side * initiation - * ('StartReplica' & 'StartReplicaNoWait'). */ void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued) @@ -216,11 +222,6 @@ void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued) } } -void PgScrubber::dec_scrubs_remote() -{ - m_osds->get_scrub_services().dec_scrubs_remote(m_pg_id.pgid); -} - void PgScrubber::advance_token() { m_current_token++; @@ -274,13 +275,7 @@ void PgScrubber::send_start_replica(epoch_t epoch_queued, } if (check_interval(epoch_queued) && is_token_current(token)) { - // save us some time by not waiting for updates if there are none - // to wait for. Affects the transition from NotActive into either - // ReplicaWaitUpdates or ActiveReplica. - if (pending_active_pushes()) - m_fsm->process_event(StartReplica{}); - else - m_fsm->process_event(StartReplicaNoWait{}); + m_fsm->process_event(StartReplica{}); } dout(10) << "scrubber event --<< " << __func__ << dendl; } @@ -452,6 +447,11 @@ unsigned int PgScrubber::scrub_requeue_priority( * Responsible for resetting any scrub state and releasing any resources. * Any inflight events will be ignored via check_interval/should_drop_message * or canceled. + * Specifically: + * - if Primary and in an active session - the IntervalChanged handler takes + * care of discarding the remote reservations, and transitioning out of + * Session. That resets both the scrubber and the FSM. + * - if we are a reserved replica - we need to free ourselves; */ void PgScrubber::on_new_interval() { @@ -461,13 +461,7 @@ void PgScrubber::on_new_interval() is_scrub_active(), is_queued_or_active()) << dendl; - // If in active session - the IntervalChanged handler takes care of - // discarding the remote reservations, and transitioning out of Session. - // That resets both the scrubber and the FSM. m_fsm->process_event(IntervalChanged{}); - - // The 'FullReset' is only relevant if we are not an active Primary - m_fsm->process_event(FullReset{}); rm_from_osd_scrubbing(); } @@ -1139,13 +1133,7 @@ void PgScrubber::on_init() m_pg->publish_stats_to_osd(); } -/* - * Note: as on_replica_init() is likely to be called twice (entering - * both ReplicaWaitUpdates & ActiveReplica), its operations should be - * idempotent. - * Now that it includes some state-changing operations, we need to check - * m_active against double-activation. - */ + void PgScrubber::on_replica_init() { dout(10) << __func__ << " called with 'active' " @@ -1159,6 +1147,7 @@ void PgScrubber::on_replica_init() } } + int PgScrubber::build_primary_map_chunk() { epoch_t map_building_since = m_pg->get_osdmap_epoch(); @@ -1217,23 +1206,21 @@ int PgScrubber::build_replica_map_chunk() // the local map has been created. Send it to the primary. // Note: once the message reaches the Primary, it may ask us for another - // chunk - and we better be done with the current scrub. Thus - the - // preparation of the reply message is separate, and we clear the scrub - // state before actually sending it. + // chunk - and we better be done with the current scrub. The clearing of + // state must be complete before we relinquish the PG lock. - auto reply = prep_replica_map_msg(PreemptionNoted::no_preemption); - replica_handling_done(); - dout(15) << __func__ << " chunk map sent " << dendl; - send_replica_map(reply); - } break; + send_replica_map(prep_replica_map_msg(PreemptionNoted::no_preemption)); + dout(15) << fmt::format("{}: chunk map sent", __func__) << dendl; + } + break; default: // negative retval: build_scrub_map_chunk() signalled an error // Pre-Pacific code ignored this option, treating it as a success. // \todo Add an error flag in the returning message. + // \todo: must either abort, send a reply, or return some error message dout(1) << "Error! Aborting. ActiveReplica::react(SchedReplica) Ret: " << ret << dendl; - replica_handling_done(); // only in debug mode for now: assert(false && "backend error"); break; @@ -1520,6 +1507,7 @@ void PgScrubber::replica_scrub_op(OpRequestRef op) replica_scrubmap_pos.reset(); // needed? RRR set_queued_or_active(); + advance_token(); m_osds->queue_for_rep_scrub(m_pg, m_replica_request_priority, m_flags.priority, @@ -1675,7 +1663,7 @@ void PgScrubber::handle_scrub_reserve_msgs(OpRequestRef op) auto m = op->get_req(); switch (m->type) { case MOSDScrubReserve::REQUEST: - handle_scrub_reserve_request(op); + m_fsm->process_event(ReplicaReserveReq{op, m->from}); break; case MOSDScrubReserve::GRANT: m_fsm->process_event(ReplicaGrant{op, m->from}); @@ -1684,65 +1672,12 @@ void PgScrubber::handle_scrub_reserve_msgs(OpRequestRef op) m_fsm->process_event(ReplicaReject{op, m->from}); break; case MOSDScrubReserve::RELEASE: - handle_scrub_reserve_release(op); + m_fsm->process_event(ReplicaRelease{op, m->from}); break; } } -void PgScrubber::handle_scrub_reserve_request(OpRequestRef op) -{ - auto request_ep = op->sent_epoch; - dout(20) << fmt::format("{}: request_ep:{} recovery:{}", - __func__, - request_ep, - m_osds->is_recovery_active()) - << dendl; - - // The primary may unilaterally restart the scrub process without notifying - // replicas. Unconditionally clear any existing state prior to handling - // the new reservation. - m_fsm->process_event(FullReset{}); - - bool granted{false}; - if (m_pg->cct->_conf->osd_scrub_during_recovery || - !m_osds->is_recovery_active()) { - - granted = m_osds->get_scrub_services().inc_scrubs_remote(m_pg_id.pgid); - if (granted) { - m_fsm->process_event(ReplicaGrantReservation{}); - } else { - dout(20) << __func__ << ": failed to reserve remotely" << dendl; - } - } else { - dout(10) << __func__ << ": recovery is active; not granting" << dendl; - } - - dout(10) << __func__ << " reserved? " << (granted ? "yes" : "no") << dendl; - - Message* reply = new MOSDScrubReserve( - spg_t(m_pg->info.pgid.pgid, m_pg->get_primary().shard), - request_ep, - granted ? MOSDScrubReserve::GRANT : MOSDScrubReserve::REJECT, - m_pg_whoami); - - m_osds->send_message_osd_cluster(reply, op->get_req()->get_connection()); -} - -void PgScrubber::handle_scrub_reserve_release(OpRequestRef op) -{ - dout(10) << __func__ << " " << *op->get_req() << dendl; - if (should_drop_message(op)) { - // we might have turned into a Primary in the meantime. The interval - // change should have been noticed already, and caused us to reset. - return; - } - - // this specific scrub session has terminated. All incoming events carrying - // the old tag will be discarded. - m_fsm->process_event(FullReset{}); -} - bool PgScrubber::set_reserving_now() { return m_osds->get_scrub_services().set_reserving_now(m_pg_id, ceph_clock_now()); @@ -2211,6 +2146,7 @@ void PgScrubber::handle_query_state(ceph::Formatter* f) PgScrubber::~PgScrubber() { + m_fsm->process_event(IntervalChanged{}); if (m_scrub_job) { // make sure the OSD won't try to scrub this one just now rm_from_osd_scrubbing(); diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index 9946f9ce5ee..2553e49b263 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -258,14 +258,6 @@ class PgScrubber : public ScrubPgIF, */ void handle_scrub_reserve_msgs(OpRequestRef op) final; - /** - * we are a replica being asked by the Primary to reserve OSD resources for - * scrubbing - */ - void handle_scrub_reserve_request(OpRequestRef op); - - void handle_scrub_reserve_release(OpRequestRef op); - // managing scrub op registration void update_scrub_job(const requested_scrub_t& request_flags) final; @@ -334,6 +326,8 @@ class PgScrubber : public ScrubPgIF, void on_new_interval() final; + void on_replica_activate() final; + void scrub_clear_state() final; bool is_queued_or_active() const final; @@ -476,13 +470,9 @@ class PgScrubber : public ScrubPgIF, [[nodiscard]] bool was_epoch_changed() const final; void set_queued_or_active() final; - /// Clears `m_queued_or_active` and restarts snaptrimming + /// Clears `m_queued_or_active` and restarts snap-trimming void clear_queued_or_active() final; - void dec_scrubs_remote() final; - - void advance_token() final; - void mark_local_map_ready() final; [[nodiscard]] bool are_all_maps_available() const final; @@ -567,6 +557,9 @@ class PgScrubber : public ScrubPgIF, void requeue_waiting() const { m_pg->requeue_ops(m_pg->waiting_for_scrub); } + /// Modify the token identifying the current replica scrub operation + void advance_token(); + /** * mark down some parameters of the initiated scrub: * - the epoch when started; @@ -675,11 +668,12 @@ class PgScrubber : public ScrubPgIF, epoch_t m_epoch_start{0}; ///< the actual epoch when scrubbing started /** - * (replica) a tag identifying a specific scrub "session". Incremented - * whenever the Primary releases the replica scrub resources. When the scrub - * session is terminated (even if the interval remains unchanged, as might - * happen following an asok no-scrub command), stale scrub-resched messages - * triggered by the backend will be discarded. + * (replica) a tag identifying a specific replica operation, i.e. the + * creation of the replica scrub map for a single chunk. + * Incremented immediately before sending a response to the primary, + * so that the next request would be identified as such. Also changed + * on reservation release. + * Used to identify stale scrub-re-sched messages triggered by the backend. */ Scrub::act_token_t m_current_token{1}; diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc index 99286acaa12..cb10d87236b 100644 --- a/src/osd/scrubber/scrub_machine.cc +++ b/src/osd/scrubber/scrub_machine.cc @@ -654,66 +654,163 @@ ScrubMachine::~ScrubMachine() = default; // -------- for replicas ----------------------------------------------------- -// ----------------------- ReservedReplica -------------------------------- +// ----------------------- ReplicaActive -------------------------------- -ReservedReplica::ReservedReplica(my_context ctx) +ReplicaActive::ReplicaActive(my_context ctx) : my_base(ctx) - , NamedSimply(context().m_scrbr, "ReservedReplica") + , NamedSimply(context().m_scrbr, "ReplicaActive") { - dout(10) << "-- state -->> ReservedReplica" << dendl; + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + dout(10) << "-- state -->> ReplicaActive" << dendl; + m_pg = scrbr->get_pg(); + m_osds = m_pg->get_pg_osd(ScrubberPasskey()); +} + +ReplicaActive::~ReplicaActive() +{ + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + if (reserved_by_my_primary) { + dout(10) << "ReplicaActive::~ReplicaActive(): clearing reservation" + << dendl; + clear_reservation_by_remote_primary(); + } +} + + +/* + * Note: we are expected to be in the initial internal state (Idle) when + * receiving any registration request. Our other internal states, the + * active ones, have their own handler for this event, and will treat it + * as an abort request. + * + * Process: + * - if already reserved: clear existing reservation, then continue + * - ask the OSD for the "reservation resource" + * - if granted: mark it internally and notify the Primary. + * - otherwise: just notify the requesting primary. + */ +void ReplicaActive::on_reserve_req(const ReplicaReserveReq& ev) +{ + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + dout(10) << "ReplicaActive::on_reserve_req()" << dendl; + + if (reserved_by_my_primary) { + dout(10) << "ReplicaActive::on_reserve_req(): already reserved" << dendl; + // clear the existing reservation + clear_reservation_by_remote_primary(); // clears the flag, too + } + + // ask the OSD for the reservation + const auto ret = get_remote_reservation(); + if (ret.granted) { + reserved_by_my_primary = true; + dout(10) << fmt::format("{}: reserved? yes", __func__) << dendl; + } else { + dout(10) << fmt::format("{}: reserved? no ({})", __func__, ret.error_msg) + << dendl; + } + + Message* reply = new MOSDScrubReserve( + spg_t(pg_id.pgid, m_pg->get_primary().shard), ev.m_op->sent_epoch, ret.op, + m_pg->pg_whoami); + m_osds->send_message_osd_cluster(reply, ev.m_op->get_req()->get_connection()); +} + + +void ReplicaActive::on_release(const ReplicaRelease& ev) +{ + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + if (!reserved_by_my_primary) { + dout(5) << fmt::format( + "ReplicaActive::on_release() from {}: not reserved!", + ev.m_from) + << dendl; + return; + } + dout(10) << fmt::format("ReplicaActive::on_release() from {}", ev.m_from) + << dendl; + clear_reservation_by_remote_primary(); +} + + +ReplicaActive::ReservationAttemptRes ReplicaActive::get_remote_reservation() +{ + using ReservationAttemptRes = ReplicaActive::ReservationAttemptRes; + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + if (!scrbr->get_pg_cct()->_conf.get_val("osd_scrub_during_recovery") && + m_osds->is_recovery_active()) { + return ReservationAttemptRes{ + MOSDScrubReserve::REJECT, "recovery is active", false}; + } + + if (m_osds->get_scrub_services().inc_scrubs_remote(scrbr->get_spgid().pgid)) { + return ReservationAttemptRes{MOSDScrubReserve::GRANT, "", true}; + } else { + return ReservationAttemptRes{ + MOSDScrubReserve::REJECT, "failed to reserve remotely", false}; + } } -ReservedReplica::~ReservedReplica() + +void ReplicaActive::clear_reservation_by_remote_primary() { DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - scrbr->dec_scrubs_remote(); - scrbr->advance_token(); + dout(10) << "ReplicaActive::clear_reservation_by_remote_primary()" << dendl; + m_osds->get_scrub_services().dec_scrubs_remote(scrbr->get_spgid().pgid); + reserved_by_my_primary = false; } -// ----------------------- ReplicaIdle -------------------------------- + +void ReplicaActive::check_for_updates(const StartReplica& ev) +{ + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + dout(10) << "ReplicaActive::check_for_updates()" << dendl; + post_event(ReplicaPushesUpd{}); +} + +// ---------------- ReplicaActive/ReplicaIdle --------------------------- ReplicaIdle::ReplicaIdle(my_context ctx) : my_base(ctx) - , NamedSimply( - context().m_scrbr, - "ReservedReplica/ReplicaIdle") + , NamedSimply(context().m_scrbr, "ReplicaActive/ReplicaIdle") { - dout(10) << "-- state -->> ReservedReplica/ReplicaIdle" << dendl; + dout(10) << "-- state -->> ReplicaActive/ReplicaIdle" << dendl; } -ReplicaIdle::~ReplicaIdle() = default; -// ----------------------- ReplicaActiveOp -------------------------------- +// ------------- ReplicaActive/ReplicaActiveOp -------------------------- ReplicaActiveOp::ReplicaActiveOp(my_context ctx) : my_base(ctx) - , NamedSimply( - context().m_scrbr, - "ReservedReplica/ReplicaActiveOp") + , NamedSimply(context().m_scrbr, "ReplicaActiveOp") { - dout(10) << "-- state -->> ReservedReplica/ReplicaActiveOp" << dendl; + dout(10) << "-- state -->> ReplicaActive/ReplicaActiveOp" << dendl; + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + scrbr->on_replica_init(); +} + + +ReplicaActiveOp::~ReplicaActiveOp() +{ + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + dout(10) << __func__ << dendl; + scrbr->replica_handling_done(); } -/** - * \note: here is too late to call replica_handling_done(). See the - * comment in build_replica_map_chunk() - */ -ReplicaActiveOp::~ReplicaActiveOp() = default; -// ----------------------- ReplicaWaitUpdates -------------------------------- +// ------------- ReplicaActive/ReplicaWaitUpdates ------------------------ ReplicaWaitUpdates::ReplicaWaitUpdates(my_context ctx) : my_base(ctx) , NamedSimply( context().m_scrbr, - "ReservedReplica/ReplicaActiveOp/ReplicaWaitUpdates") + "ReplicaActive/ReplicaActiveOp/ReplicaWaitUpdates") { - dout(10) << "-- state -->> ReservedReplica/ReplicaActiveOp/ReplicaWaitUpdates" + dout(10) << "-- state -->> ReplicaActive/ReplicaActiveOp/ReplicaWaitUpdates" << dendl; - DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - scrbr->on_replica_init(); } + /* * Triggered externally, by the entity that had an update re pushes */ @@ -724,7 +821,6 @@ sc::result ReplicaWaitUpdates::react(const ReplicaPushesUpd&) << scrbr->pending_active_pushes() << dendl; if (scrbr->pending_active_pushes() == 0) { - // done waiting return transit(); } @@ -732,22 +828,21 @@ sc::result ReplicaWaitUpdates::react(const ReplicaPushesUpd&) return discard_event(); } + // ----------------------- ReplicaBuildingMap ----------------------------------- ReplicaBuildingMap::ReplicaBuildingMap(my_context ctx) : my_base(ctx) , NamedSimply( context().m_scrbr, - "ReservedReplica/ReplicaActiveOp/ReplicaBuildingMap") + "ReplicaActive/ReplicaActiveOp/ReplicaBuildingMap") { - DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - dout(10) << "-- state -->> ReservedReplica/ReplicaActiveOp/ReplicaBuildingMap" + dout(10) << "-- state -->> ReplicaActive/ReplicaActiveOp/ReplicaBuildingMap" << dendl; - // and as we might have skipped ReplicaWaitUpdates: - scrbr->on_replica_init(); post_event(SchedReplica{}); } + sc::result ReplicaBuildingMap::react(const SchedReplica&) { DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases @@ -758,7 +853,6 @@ sc::result ReplicaBuildingMap::react(const SchedReplica&) dout(10) << "replica scrub job preempted" << dendl; scrbr->send_preempted_replica(); - scrbr->replica_handling_done(); return transit(); } diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h index fcca0d2cc40..6dbc8b565e5 100644 --- a/src/osd/scrubber/scrub_machine.h +++ b/src/osd/scrubber/scrub_machine.h @@ -164,15 +164,12 @@ MEV(IntLocalMapDone) /// scrub_snapshot_metadata() MEV(DigestUpdate) -/// event emitted when the replica grants a reservation to the primary -MEV(ReplicaGrantReservation) +/// we are a replica for this PG +MEV(ReplicaActivate) /// initiating replica scrub MEV(StartReplica) -/// 'start replica' when there are no pending updates -MEV(StartReplicaNoWait) - MEV(SchedReplica) /// Update to active_pushes. 'active_pushes' represents recovery @@ -211,8 +208,11 @@ struct NotActive; ///< the quiescent state. No active scrubbing. struct Session; ///< either reserving or actively scrubbing struct ReservingReplicas; ///< securing scrub resources from replicas' OSDs struct ActiveScrubbing; ///< the active state for a Primary. A sub-machine. -struct ReplicaIdle; ///< Initial reserved replica state -struct ReplicaBuildingMap; ///< an active state for a replica. +// the active states for a replica: +struct ReplicaActive; ///< the quiescent state for a replica +struct ReplicaActiveOp; +struct ReplicaWaitUpdates; +struct ReplicaBuildingMap; class ScrubMachine : public sc::state_machine { @@ -370,8 +370,8 @@ public: * * - a special end-of-recovery Primary scrub event ('AfterRepairScrub'). * - * - (for a replica) 'StartReplica' or 'StartReplicaNoWait', triggered by - * an incoming MOSDRepScrub message. + * - (if already in ReplicaActive): an incoming MOSDRepScrub triggers + * 'StartReplica'. * * note (20.8.21): originally, AfterRepairScrub was triggering a scrub without * waiting for replica resources to be acquired. But once replicas started @@ -381,11 +381,13 @@ public: struct NotActive : sc::state, NamedSimply { explicit NotActive(my_context ctx); - using reactions = - mpl::list, - // a scrubbing that was initiated at recovery completion: - sc::custom_reaction, - sc::transition>; + using reactions = mpl::list< + sc::custom_reaction, + // a scrubbing that was initiated at recovery completion: + sc::custom_reaction, + // peering done, and we are a replica + sc::transition>; + sc::result react(const StartScrub&); sc::result react(const AfterRepairScrub&); }; @@ -611,45 +613,95 @@ struct WaitDigestUpdate : sc::state, // ----------------------------- the "replica active" states -/** - * ReservedReplica +/* + * The replica states: * - * Parent state for replica states, Controls lifecycle for - * PgScrubber::m_reservations. + * ReplicaActive - starts after being peered as a replica. Ends on interval. + * - maintain the "I am reserved by a primary" state; + * - handles reservation requests + * + * - ReplicaIdle - ready for a new scrub request + * * initial state of ReplicaActive + * + * - ReplicaActiveOp - handling a single map request op + * * ReplicaWaitUpdates + * * ReplicaBuildingMap */ -struct ReservedReplica : sc::state, + +struct ReplicaIdle; + +struct ReplicaActive : sc::state, NamedSimply { - explicit ReservedReplica(my_context ctx); - ~ReservedReplica(); + explicit ReplicaActive(my_context ctx); + ~ReplicaActive(); - using reactions = mpl::list>; -}; + /// handle a reservation request from a primary + void on_reserve_req(const ReplicaReserveReq&); -struct ReplicaWaitUpdates; + /// handle a 'release' from a primary + void on_release(const ReplicaRelease&); -/** - * ReplicaIdle - * - * Replica is waiting for a map request. - */ -struct ReplicaIdle : sc::state, - NamedSimply { - explicit ReplicaIdle(my_context ctx); - ~ReplicaIdle(); + void check_for_updates(const StartReplica&); using reactions = mpl::list< - sc::transition, - sc::transition>; + // a reservation request from the primary + sc::in_state_reaction< + ReplicaReserveReq, + ReplicaActive, + &ReplicaActive::on_reserve_req>, + // an explicit release request from the primary + sc::in_state_reaction< + ReplicaRelease, + ReplicaActive, + &ReplicaActive::on_release>, + // when the interval ends - we may not be a replica anymore + sc::transition>; + + private: + bool reserved_by_my_primary{false}; + + // shortcuts: + PG* m_pg; + OSDService* m_osds; + + /// a convenience internal result structure + struct ReservationAttemptRes { + MOSDScrubReserve::ReserveMsgOp op; // GRANT or REJECT + std::string_view error_msg; + bool granted; + }; + + /// request a scrub resource from our local OSD + /// (after performing some checks) + ReservationAttemptRes get_remote_reservation(); + + void clear_reservation_by_remote_primary(); }; + +struct ReplicaIdle : sc::state, NamedSimply { + explicit ReplicaIdle(my_context ctx); + ~ReplicaIdle() = default; + + // note the execution of check_for_updates() when transitioning to + // ReplicaActiveOp/ReplicaWaitUpdates. That would trigger a ReplicaPushesUpd + // event, which will be handled by ReplicaWaitUpdates. + using reactions = mpl::list>; +}; + + /** * ReplicaActiveOp * - * Lifetime matches handling for a single map request op + * Lifetime matches handling for a single map request op. */ struct ReplicaActiveOp - : sc::state, - NamedSimply { + : sc::state, + NamedSimply { explicit ReplicaActiveOp(my_context ctx); ~ReplicaActiveOp(); }; @@ -670,8 +722,8 @@ struct ReplicaWaitUpdates : sc::state, }; -struct ReplicaBuildingMap : sc::state - , NamedSimply { +struct ReplicaBuildingMap : sc::state, + NamedSimply { explicit ReplicaBuildingMap(my_context ctx); using reactions = mpl::list>; diff --git a/src/osd/scrubber/scrub_machine_lstnr.h b/src/osd/scrubber/scrub_machine_lstnr.h index 4206c789f91..890a70a8a12 100644 --- a/src/osd/scrubber/scrub_machine_lstnr.h +++ b/src/osd/scrubber/scrub_machine_lstnr.h @@ -196,12 +196,6 @@ struct ScrubMachineListener { virtual void set_queued_or_active() = 0; virtual void clear_queued_or_active() = 0; - /// Release remote scrub reservation - virtual void dec_scrubs_remote() = 0; - - /// Advance replica token - virtual void advance_token() = 0; - /** * Our scrubbing is blocked, waiting for an excessive length of time for * our target chunk to be unlocked. We will set the corresponding flags, diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index 16810bba15c..d24bb79b801 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -18,12 +18,14 @@ struct PGPool; namespace Scrub { class ReplicaReservations; + struct ReplicaActive; } /// Facilitating scrub-related object access to private PG data class ScrubberPasskey { private: friend class Scrub::ReplicaReservations; + friend struct Scrub::ReplicaActive; friend class PrimaryLogScrub; friend class PgScrubber; friend class ScrubBackend; @@ -310,6 +312,9 @@ struct ScrubPgIF { /// the OSD scrub queue virtual void on_new_interval() = 0; + /// we are peered as a replica + virtual void on_replica_activate() = 0; + virtual void scrub_clear_state() = 0; virtual void handle_query_state(ceph::Formatter* f) = 0; -- 2.39.5