]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: check all(*) conditions in restrictions_on_scrubbing() 63093/head
authorRonen Friedman <rfriedma@redhat.com>
Fri, 2 May 2025 08:03:15 +0000 (03:03 -0500)
committerRonen Friedman <rfriedma@redhat.com>
Sat, 3 May 2025 10:59:48 +0000 (05:59 -0500)
Modified OsdScrub::restrictions_on_scrubbing() to check all(*)
conditions, instead of stopping at the first one that is true.
The "new" (since Tentacle) scrub-type-to-conditions mapping is no
longer a simple one (is not "monotonic" in the sense of restrictions
always being removed as the scrub type is more important),
and the caller may want to know them all.

(*) The somewhat costly check for the random backoff is still only
    performed if the OSD is not already running too many scrubs.

Fixes: https://tracker.ceph.com/issues/71169
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/scrubber/osd_scrub.cc
src/osd/scrubber/osd_scrub.h

index cfbc2b538fccb474ab4b776082e5e0124fd07bef..d119ad754deb9b5540e20ddca6d0accf31f18a9f 100644 (file)
@@ -189,7 +189,8 @@ Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing(
 {
   Scrub::OSDRestrictions env_conditions;
 
-  // some environmental conditions prevent all but high priority scrubs
+  // some "environmental conditions" prevent all but specific types
+  // (urgency levels) of scrubs
 
   if (!m_resource_bookkeeper.can_inc_scrubs()) {
     // our local OSD is already running too many scrubs
@@ -198,21 +199,20 @@ Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing(
 
   } else if (scrub_random_backoff()) {
     // dice-roll says we should not scrub now
-    dout(15) << "Lost in dice. Only high priority scrubs allowed." << dendl;
+    dout(15) << "Lost on the dice. Regular scheduled scrubs are not permitted."
+            << dendl;
     env_conditions.random_backoff_active = true;
+  }
 
-  } else if (is_recovery_active && !conf->osd_scrub_during_recovery) {
+  if (is_recovery_active && !conf->osd_scrub_during_recovery) {
     dout(15) << "recovery in progress. Operator-initiated scrubs only."
             << dendl;
     env_conditions.recovery_in_progress = true;
-  } else {
-
-    // regular, i.e. non-high-priority scrubs are allowed
-    env_conditions.restricted_time = !scrub_time_permit(scrub_clock_now);
-    env_conditions.cpu_overloaded =
-       !m_load_tracker.scrub_load_below_threshold();
   }
 
+  env_conditions.restricted_time = !scrub_time_permit(scrub_clock_now);
+  env_conditions.cpu_overloaded = !m_load_tracker.scrub_load_below_threshold();
+
   return env_conditions;
 }
 
index a63f4ac505a40517b0203decb2790f7e0b41a5e4..b06798c4bef200ec85a479c69949edfacf689353 100644 (file)
@@ -140,14 +140,17 @@ class OsdScrub {
   /**
    * check the OSD-wide environment conditions (scrub resources, time, etc.).
    * These may restrict the type of scrubs we are allowed to start, maybe
-   * down to allowing only high-priority scrubs
+   * down to allowing only high-priority scrubs. See comments in scrub_job.h
+   * detailing which condiitions may prevent what types of scrubs.
    *
-   * Specifically:
-   * 'only high priority' flag is set for either of
-   * the following reasons: no local resources (too many scrubs on this OSD);
-   * a dice roll says we will not scrub in this tick;
-   * a recovery is in progress, and we are not allowed to scrub while recovery;
-   * a PG is trying to acquire replica resources.
+   * The following possible limiting conditions are checked:
+   * - high local OSD concurrency (i.e. too many scrubs on this OSD);
+   * - a "dice roll" says we will not scrub in this tick (note: this
+   *   specific condition is only checked if the "high concurrency" condition
+   *   above is not detected);
+   * - the CPU load is high (i.e. above osd_scrub_cpu_load_threshold);
+   * - the OSD is performing a recovery & osd_scrub_during_recovery is 'false';
+   * - the current time is outside of the allowed scrubbing hours/days
    */
   Scrub::OSDRestrictions restrictions_on_scrubbing(
       bool is_recovery_active,