From 74a029150d16a3d1fe9e03e40801aba28bcda0ce Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Fri, 22 Mar 2024 08:05:34 -0500 Subject: [PATCH] osd/scrub: remove detection & handling of reservation timeouts as no timeout can be set for reserver-based (queued) reservation requests. Fixes: https://tracker.ceph.com/issues/65044 Signed-off-by: Ronen Friedman (cherry picked from commit e8db183ce4f1c6db34129cc1c8c57912643d1ace) Signed-off-by: Ronen Friedman --- src/osd/scrubber/scrub_machine.cc | 31 +------------------------------ src/osd/scrubber/scrub_machine.h | 17 +++++------------ 2 files changed, 6 insertions(+), 42 deletions(-) diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc index ce4196e1ebbb1..541cb5df94e1b 100644 --- a/src/osd/scrubber/scrub_machine.cc +++ b/src/osd/scrubber/scrub_machine.cc @@ -234,18 +234,7 @@ ReservingReplicas::ReservingReplicas(my_context ctx) *scrbr, context().last_request_sent_nonce, *session.m_perf_set); - if (session.m_reservations->get_last_sent()) { - // the 1'st reservation request was sent - - auto timeout = scrbr->get_pg_cct()->_conf.get_val( - "osd_scrub_reservation_timeout"); - if (timeout.count() > 0) { - // Start a timer to handle case where the replicas take a long time to - // ack the reservation. See ReservationTimeout handler below. - m_timeout_token = - machine.schedule_timer_event_after(timeout); - } - } else { + if (!session.m_reservations->get_last_sent()) { // no replicas to reserve dout(10) << "no replicas to reserve" << dendl; // can't transit directly from here @@ -301,24 +290,6 @@ sc::result ReservingReplicas::react(const ReplicaReject& ev) return transit(); } -sc::result ReservingReplicas::react(const ReservationTimeout&) -{ - DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - auto& session = context(); - dout(10) << "ReservingReplicas::react(const ReservationTimeout&)" << dendl; - session.m_reservations->log_failure_and_duration(scrbcnt_resrv_timed_out); - - const auto msg = fmt::format( - "osd.{} PgScrubber: {} timeout on reserving replicas (since {})", - scrbr->get_whoami(), scrbr->get_spgid(), entered_at); - dout(1) << msg << dendl; - scrbr->get_clog()->warn() << msg; - - // cause the scrubber to stop the scrub session, marking 'reservation - // failure' as the cause (affecting future scheduling) - scrbr->flag_reservations_failure(); - return transit(); -} // ----------------------- ActiveScrubbing ----------------------------------- diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h index 254e7861ed956..a9902c8df8143 100644 --- a/src/osd/scrubber/scrub_machine.h +++ b/src/osd/scrubber/scrub_machine.h @@ -156,9 +156,6 @@ VALUE_EVENT(ReserverGranted, AsyncScrubResData); /// all replicas have granted our reserve request MEV(RemotesReserved) -/// reservations have timed out -MEV(ReservationTimeout) - /// initiate a new scrubbing session (relevant if we are a Primary) MEV(StartScrub) @@ -565,25 +562,21 @@ struct Session : sc::state, ScrubTimePoint m_session_started_at{ScrubClock::now()}; }; -struct ReservingReplicas : sc::state, - NamedSimply { +struct ReservingReplicas : sc::state, NamedSimply { explicit ReservingReplicas(my_context ctx); ~ReservingReplicas(); - using reactions = mpl::list, - sc::custom_reaction, - sc::transition, - sc::custom_reaction>; + using reactions = mpl::list< + sc::custom_reaction, + sc::custom_reaction, + sc::transition>; ScrubTimePoint entered_at = ScrubClock::now(); - ScrubMachine::timer_event_token_t m_timeout_token; /// a "raw" event carrying a peer's grant response sc::result react(const ReplicaGrant&); /// a "raw" event carrying a peer's denial response sc::result react(const ReplicaReject&); - - sc::result react(const ReservationTimeout&); }; -- 2.39.5