From: Ronen Friedman Date: Sat, 14 Oct 2023 12:36:06 +0000 (-0500) Subject: osd/scrub: handle reservation completion within the Scrubber FSM X-Git-Tag: v19.0.0~197^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=13b84c856e797c872eab3231a9e352ea3ad99564;p=ceph.git osd/scrub: handle reservation completion within the Scrubber FSM with special handling for the 0-replica case. Signed-off-by: Ronen Friedman --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 88a0bc037564..fa938c082781 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1799,12 +1799,6 @@ void OSDService::queue_for_rep_scrub_resched(PG* pg, act_token); } -void OSDService::queue_for_scrub_granted(PG* pg, Scrub::scrub_prio_t with_priority) -{ - // Resulting scrub event: 'RemotesReserved' - queue_scrub_event_msg(pg, with_priority); -} - void OSDService::queue_for_scrub_resched(PG* pg, Scrub::scrub_prio_t with_priority) { // Resulting scrub event: 'InternalSchedScrub' diff --git a/src/osd/OSD.h b/src/osd/OSD.h index d86443351cf2..38f9a6ca8475 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -504,9 +504,6 @@ public: void queue_scrub_after_repair(PG* pg, Scrub::scrub_prio_t with_priority); - /// queue the message (-> event) that all replicas have reserved scrub resources for us - void queue_for_scrub_granted(PG* pg, Scrub::scrub_prio_t with_priority); - /// Signals either (a) the end of a sleep period, or (b) a recheck of the availability /// of the primary map being created by the backend. void queue_for_scrub_resched(PG* pg, Scrub::scrub_prio_t with_priority); diff --git a/src/osd/PG.h b/src/osd/PG.h index fe335b85e000..70c1d12b2105 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -449,11 +449,6 @@ public: "SchedReplica"); } - void scrub_send_resources_granted(epoch_t queued, ThreadPool::TPHandle& handle) - { - forward_scrub_event(&ScrubPgIF::send_remotes_reserved, queued, "RemotesReserved"); - } - void scrub_send_scrub_resched(epoch_t queued, ThreadPool::TPHandle& handle) { forward_scrub_event(&ScrubPgIF::send_scrub_resched, queued, "InternalSchedScrub"); diff --git a/src/osd/scheduler/OpSchedulerItem.cc b/src/osd/scheduler/OpSchedulerItem.cc index 0641aafdc1c9..750fc2a4f58e 100644 --- a/src/osd/scheduler/OpSchedulerItem.cc +++ b/src/osd/scheduler/OpSchedulerItem.cc @@ -77,15 +77,6 @@ void PGScrubResched::run(OSD* osd, pg->unlock(); } -void PGScrubResourcesOK::run(OSD* osd, - OSDShard* sdata, - PGRef& pg, - ThreadPool::TPHandle& handle) -{ - pg->scrub_send_resources_granted(epoch_queued, handle); - pg->unlock(); -} - void PGScrubPushesUpdate::run(OSD* osd, OSDShard* sdata, PGRef& pg, diff --git a/src/osd/scheduler/OpSchedulerItem.h b/src/osd/scheduler/OpSchedulerItem.h index 2803169a9bf9..7fb7125a1416 100644 --- a/src/osd/scheduler/OpSchedulerItem.h +++ b/src/osd/scheduler/OpSchedulerItem.h @@ -373,17 +373,6 @@ class PGScrubResched : public PGScrubItem { void run(OSD* osd, OSDShard* sdata, PGRef& pg, ThreadPool::TPHandle& handle) final; }; -/** - * all replicas have granted our scrub resources request - */ -class PGScrubResourcesOK : public PGScrubItem { - public: - PGScrubResourcesOK(spg_t pg, epoch_t epoch_queued) - : PGScrubItem{pg, epoch_queued, "PGScrubResourcesOK"} - {} - void run(OSD* osd, OSDShard* sdata, PGRef& pg, ThreadPool::TPHandle& handle) final; -}; - /** * called when a repair process completes, to initiate scrubbing. No local/remote * resources are allocated. diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 98290a06ac45..b090ec113ee3 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -359,17 +359,6 @@ void PgScrubber::send_replica_pushes_upd(epoch_t epoch_queued) dout(10) << "scrubber event --<< " << __func__ << dendl; } -void PgScrubber::send_remotes_reserved(epoch_t epoch_queued) -{ - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued - << dendl; - // note: scrub is not active yet - if (check_interval(epoch_queued)) { - m_fsm->process_event(RemotesReserved{}); - } - dout(10) << "scrubber event --<< " << __func__ << dendl; -} - void PgScrubber::send_chunk_free(epoch_t epoch_queued) { dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index 0c8fa8c34fb7..97bf7da8f22a 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -445,8 +445,6 @@ class PgScrubber : public ScrubPgIF, void send_preempted_replica() final; - void send_remotes_reserved(epoch_t epoch_queued) final; - /** * does the PG have newer updates than what we (the scrubber) know? */ diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc index cc257a47f0e3..40b43b6e0770 100644 --- a/src/osd/scrubber/scrub_machine.cc +++ b/src/osd/scrubber/scrub_machine.cc @@ -163,13 +163,22 @@ ReservingReplicas::ReservingReplicas(my_context ctx) // initiate the reservation process context().m_reservations.emplace(*scrbr); - auto timeout = scrbr->get_pg_cct()->_conf.get_val( - "osd_scrub_reservation_timeout"); - if (timeout.count() > 0) { - // Start a timer to handle case where the replicas take a long time to - // ack the reservation. See ReservationTimeout handler below. - m_timeout_token = machine.schedule_timer_event_after( - timeout); + if (context().m_reservations->get_last_sent()) { + // the 1'st reservation request was sent + + auto timeout = scrbr->get_pg_cct()->_conf.get_val( + "osd_scrub_reservation_timeout"); + if (timeout.count() > 0) { + // Start a timer to handle case where the replicas take a long time to + // ack the reservation. See ReservationTimeout handler below. + m_timeout_token = + machine.schedule_timer_event_after(timeout); + } + } else { + // no replicas to reserve + dout(10) << "no replicas to reserve" << dendl; + // can't transit directly from here + post_event(RemotesReserved{}); } } @@ -186,7 +195,11 @@ sc::result ReservingReplicas::react(const ReplicaGrant& ev) DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases dout(10) << "ReservingReplicas::react(const ReplicaGrant&)" << dendl; - context().m_reservations->handle_reserve_grant(ev.m_op, ev.m_from); + if (context().m_reservations->handle_reserve_grant( + ev.m_op, ev.m_from)) { + // we are done with the reservation process + return transit(); + } return discard_event(); } diff --git a/src/osd/scrubber/scrub_reservations.cc b/src/osd/scrubber/scrub_reservations.cc index 011ace2a2836..4d187a2a644f 100644 --- a/src/osd/scrubber/scrub_reservations.cc +++ b/src/osd/scrubber/scrub_reservations.cc @@ -84,7 +84,7 @@ ReplicaReservations::~ReplicaReservations() release_all(); } -void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from) +bool ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from) { // verify that the grant is from the peer we expected. If not? // for now - abort the OSD. \todo reconsider the reaction. @@ -94,7 +94,7 @@ void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from) get_last_sent().value_or(pg_shard_t{})) << dendl; ceph_assert(from == get_last_sent()); - return; + return false; } auto elapsed = clock::now() - m_last_request_sent_at; @@ -115,31 +115,31 @@ void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from) active_requests_cnt(), m_sorted_secondaries.size(), duration_cast(elapsed).count()) << dendl; - send_next_reservation_or_complete(); + return send_next_reservation_or_complete(); } -void ReplicaReservations::send_next_reservation_or_complete() +bool ReplicaReservations::send_next_reservation_or_complete() { if (m_next_to_request == m_sorted_secondaries.cend()) { // granted by all replicas dout(10) << "remote reservation complete" << dendl; - m_osds->queue_for_scrub_granted(m_pg, scrub_prio_t::low_priority); - - } else { - // send the next reservation request - const auto peer = *m_next_to_request; - const auto epoch = m_pg->get_osdmap_epoch(); - auto m = make_message( - spg_t{m_pgid, peer.shard}, epoch, MOSDScrubReserve::REQUEST, - m_pg->pg_whoami); - m_pg->send_cluster_message(peer.osd, m, epoch, false); - m_last_request_sent_at = clock::now(); - dout(10) << fmt::format( - "reserving {} (the {} of {} replicas)", *m_next_to_request, - active_requests_cnt()+1, m_sorted_secondaries.size()) - << dendl; - m_next_to_request++; + return true; // done } + + // send the next reservation request + const auto peer = *m_next_to_request; + const auto epoch = m_pg->get_osdmap_epoch(); + auto m = make_message( + spg_t{m_pgid, peer.shard}, epoch, MOSDScrubReserve::REQUEST, + m_pg->pg_whoami); + m_pg->send_cluster_message(peer.osd, m, epoch, false); + m_last_request_sent_at = clock::now(); + dout(10) << fmt::format( + "reserving {} (the {} of {} replicas)", *m_next_to_request, + active_requests_cnt() + 1, m_sorted_secondaries.size()) + << dendl; + m_next_to_request++; + return false; } void ReplicaReservations::verify_rejections_source( diff --git a/src/osd/scrubber/scrub_reservations.h b/src/osd/scrubber/scrub_reservations.h index 634e7e580027..a603c7073563 100644 --- a/src/osd/scrubber/scrub_reservations.h +++ b/src/osd/scrubber/scrub_reservations.h @@ -79,8 +79,11 @@ class ReplicaReservations { * the replica we are expecting a reply from) is noted, and triggers * one of two: either sending a reservation request to the next replica, * or notifying the scrubber that we have reserved them all. + * + * \returns true if there are no more replicas to send reservation requests + * (i.e., the scrubber should proceed to the next phase), false otherwise. */ - void handle_reserve_grant(OpRequestRef op, pg_shard_t from); + bool handle_reserve_grant(OpRequestRef op, pg_shard_t from); /** * Verify that the sender of the received rejection is the replica we @@ -105,6 +108,9 @@ class ReplicaReservations { */ void discard_remote_reservations(); + /// the only replica we are expecting a reply from + std::optional get_last_sent() const; + // note: 'public', as accessed via the 'standard' dout_prefix() macro std::ostream& gen_prefix(std::ostream& out, std::string fn) const; @@ -112,17 +118,14 @@ class ReplicaReservations { /// send 'release' messages to all replicas we have managed to reserve void release_all(); - /// the only replica we are expecting a reply from - std::optional get_last_sent() const; - /// The number of requests that have been sent (and not rejected) so far. size_t active_requests_cnt() const; /** - * Either send a reservation request to the next replica, or notify the - * scrubber that we have reserved all the replicas. + * Send a reservation request to the next replica. + * - if there are no more replicas to send requests to, return true */ - void send_next_reservation_or_complete(); + bool send_next_reservation_or_complete(); }; } // namespace Scrub diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index 61be0bd5a62c..745ea2388b67 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -356,12 +356,6 @@ struct ScrubPgIF { */ virtual void clear_pgscrub_state() = 0; - /** - * triggers the 'RemotesReserved' (all replicas granted scrub resources) - * state-machine event - */ - virtual void send_remotes_reserved(epoch_t epoch_queued) = 0; - virtual void cleanup_store(ObjectStore::Transaction* t) = 0; virtual bool get_store_errors(const scrub_ls_arg_t& arg,