]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: abort reserving scrub if an operator-initiated scrub is
authorRonen Friedman <rfriedma@redhat.com>
Thu, 19 Dec 2024 16:02:08 +0000 (10:02 -0600)
committerRonen Friedman <rfriedma@redhat.com>
Sun, 29 Dec 2024 11:31:57 +0000 (05:31 -0600)
requested

Handling the case of receiving an operator command while the PG is
scrubbing, but
is waiting for replicas' reservations:

Now that the reservations are queued, the wait may be a very prolonged
one.
Usually - an operator direct scrub command has a priority high enough
to not require waiting for reservations. But in the current
implementation,
it would wait until the running scrub session terminates, and only then
will rerun at that high priority. This is not the intended behavior.

The solution is to abort the existing scrub session, and start the new
one.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/scrub_machine.h

index 8dd666fa2d58c650ca5e3795e19ff1ec52e523f3..ae8d6c94f243ce6c3d90c6a5fdf2671ce9dd0b76 100644 (file)
@@ -588,6 +588,10 @@ scrub_level_t PgScrubber::scrub_requested(
     return scrub_level_t::shallow;
   }
 
+  // abort an ongoing scrub, if it's of the lowest priority
+  // and stuck in replica reservations.
+  m_fsm->process_event(AbortIfReserving{});
+
   // update the relevant SchedTarget (either shallow or deep). Set its urgency
   // to either operator_requested or must_repair. Push it into the queue
   auto& trgt = m_scrub_job->get_target(scrub_level);
index 7daeb15a4376490748c7697048854ca8994d7d1f..f7f739692bf9ee73ab055217d8f83c10b55ebb40 100644 (file)
@@ -161,6 +161,11 @@ VALUE_EVENT(ReserverGranted, AsyncScrubResData);
 /// all replicas have granted our reserve request
 MEV(RemotesReserved)
 
+/// abort the scrub session, if in ReservingReplicas state
+/// (used when the operator issues a scrub request, and we no longer
+/// need the reservations)
+MEV(AbortIfReserving)
+
 /// initiate a new scrubbing session (relevant if we are a Primary)
 MEV(StartScrub)
 
@@ -570,6 +575,7 @@ struct ReservingReplicas : sc::state<ReservingReplicas, Session>, NamedSimply {
   using reactions = mpl::list<
       sc::custom_reaction<ReplicaGrant>,
       sc::custom_reaction<ReplicaReject>,
+      sc::transition<AbortIfReserving, PrimaryIdle>,
       sc::transition<RemotesReserved, ActiveScrubbing>>;
 
   ScrubTimePoint entered_at = ScrubClock::now();