From d3194eaf2572a41b0dfa064e5b984137eb5209fb Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Thu, 19 Dec 2024 10:02:08 -0600 Subject: [PATCH] osd/scrub: abort reserving scrub if an operator-initiated scrub is requested Handling the case of receiving an operator command while the PG is scrubbing, but is waiting for replicas' reservations: Now that the reservations are queued, the wait may be a very prolonged one. Usually - an operator direct scrub command has a priority high enough to not require waiting for reservations. But in the current implementation, it would wait until the running scrub session terminates, and only then will rerun at that high priority. This is not the intended behavior. The solution is to abort the existing scrub session, and start the new one. Signed-off-by: Ronen Friedman --- src/osd/scrubber/pg_scrubber.cc | 4 ++++ src/osd/scrubber/scrub_machine.h | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 8dd666fa2d5..ae8d6c94f24 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -588,6 +588,10 @@ scrub_level_t PgScrubber::scrub_requested( return scrub_level_t::shallow; } + // abort an ongoing scrub, if it's of the lowest priority + // and stuck in replica reservations. + m_fsm->process_event(AbortIfReserving{}); + // update the relevant SchedTarget (either shallow or deep). Set its urgency // to either operator_requested or must_repair. Push it into the queue auto& trgt = m_scrub_job->get_target(scrub_level); diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h index 7daeb15a437..f7f739692bf 100644 --- a/src/osd/scrubber/scrub_machine.h +++ b/src/osd/scrubber/scrub_machine.h @@ -161,6 +161,11 @@ VALUE_EVENT(ReserverGranted, AsyncScrubResData); /// all replicas have granted our reserve request MEV(RemotesReserved) +/// abort the scrub session, if in ReservingReplicas state +/// (used when the operator issues a scrub request, and we no longer +/// need the reservations) +MEV(AbortIfReserving) + /// initiate a new scrubbing session (relevant if we are a Primary) MEV(StartScrub) @@ -570,6 +575,7 @@ struct ReservingReplicas : sc::state, NamedSimply { using reactions = mpl::list< sc::custom_reaction, sc::custom_reaction, + sc::transition, sc::transition>; ScrubTimePoint entered_at = ScrubClock::now(); -- 2.39.5