From: Ronen Friedman Date: Thu, 19 Dec 2024 16:02:08 +0000 (-0600) Subject: osd/scrub: abort reserving scrub if an operator-initiated scrub is X-Git-Tag: v20.0.0~456^2~4 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=d3194eaf2572a41b0dfa064e5b984137eb5209fb;p=ceph.git osd/scrub: abort reserving scrub if an operator-initiated scrub is requested Handling the case of receiving an operator command while the PG is scrubbing, but is waiting for replicas' reservations: Now that the reservations are queued, the wait may be a very prolonged one. Usually - an operator direct scrub command has a priority high enough to not require waiting for reservations. But in the current implementation, it would wait until the running scrub session terminates, and only then will rerun at that high priority. This is not the intended behavior. The solution is to abort the existing scrub session, and start the new one. Signed-off-by: Ronen Friedman --- diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 8dd666fa2d58c..ae8d6c94f243c 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -588,6 +588,10 @@ scrub_level_t PgScrubber::scrub_requested( return scrub_level_t::shallow; } + // abort an ongoing scrub, if it's of the lowest priority + // and stuck in replica reservations. + m_fsm->process_event(AbortIfReserving{}); + // update the relevant SchedTarget (either shallow or deep). Set its urgency // to either operator_requested or must_repair. Push it into the queue auto& trgt = m_scrub_job->get_target(scrub_level); diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h index 7daeb15a43764..f7f739692bf9e 100644 --- a/src/osd/scrubber/scrub_machine.h +++ b/src/osd/scrubber/scrub_machine.h @@ -161,6 +161,11 @@ VALUE_EVENT(ReserverGranted, AsyncScrubResData); /// all replicas have granted our reserve request MEV(RemotesReserved) +/// abort the scrub session, if in ReservingReplicas state +/// (used when the operator issues a scrub request, and we no longer +/// need the reservations) +MEV(AbortIfReserving) + /// initiate a new scrubbing session (relevant if we are a Primary) MEV(StartScrub) @@ -570,6 +575,7 @@ struct ReservingReplicas : sc::state, NamedSimply { using reactions = mpl::list< sc::custom_reaction, sc::custom_reaction, + sc::transition, sc::transition>; ScrubTimePoint entered_at = ScrubClock::now();