From 5a3a4f73476580b44442a30c8404a62b5055c96d Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Fri, 2 May 2025 03:03:15 -0500 Subject: [PATCH] osd/scrub: check all(*) conditions in restrictions_on_scrubbing() Modified OsdScrub::restrictions_on_scrubbing() to check all(*) conditions, instead of stopping at the first one that is true. The "new" (since Tentacle) scrub-type-to-conditions mapping is no longer a simple one (is not "monotonic" in the sense of restrictions always being removed as the scrub type is more important), and the caller may want to know them all. (*) The somewhat costly check for the random backoff is still only performed if the OSD is not already running too many scrubs. Fixes: https://tracker.ceph.com/issues/71169 Signed-off-by: Ronen Friedman --- src/osd/scrubber/osd_scrub.cc | 18 +++++++++--------- src/osd/scrubber/osd_scrub.h | 17 ++++++++++------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc index cfbc2b538fccb..d119ad754deb9 100644 --- a/src/osd/scrubber/osd_scrub.cc +++ b/src/osd/scrubber/osd_scrub.cc @@ -189,7 +189,8 @@ Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing( { Scrub::OSDRestrictions env_conditions; - // some environmental conditions prevent all but high priority scrubs + // some "environmental conditions" prevent all but specific types + // (urgency levels) of scrubs if (!m_resource_bookkeeper.can_inc_scrubs()) { // our local OSD is already running too many scrubs @@ -198,21 +199,20 @@ Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing( } else if (scrub_random_backoff()) { // dice-roll says we should not scrub now - dout(15) << "Lost in dice. Only high priority scrubs allowed." << dendl; + dout(15) << "Lost on the dice. Regular scheduled scrubs are not permitted." + << dendl; env_conditions.random_backoff_active = true; + } - } else if (is_recovery_active && !conf->osd_scrub_during_recovery) { + if (is_recovery_active && !conf->osd_scrub_during_recovery) { dout(15) << "recovery in progress. Operator-initiated scrubs only." << dendl; env_conditions.recovery_in_progress = true; - } else { - - // regular, i.e. non-high-priority scrubs are allowed - env_conditions.restricted_time = !scrub_time_permit(scrub_clock_now); - env_conditions.cpu_overloaded = - !m_load_tracker.scrub_load_below_threshold(); } + env_conditions.restricted_time = !scrub_time_permit(scrub_clock_now); + env_conditions.cpu_overloaded = !m_load_tracker.scrub_load_below_threshold(); + return env_conditions; } diff --git a/src/osd/scrubber/osd_scrub.h b/src/osd/scrubber/osd_scrub.h index a63f4ac505a40..b06798c4bef20 100644 --- a/src/osd/scrubber/osd_scrub.h +++ b/src/osd/scrubber/osd_scrub.h @@ -140,14 +140,17 @@ class OsdScrub { /** * check the OSD-wide environment conditions (scrub resources, time, etc.). * These may restrict the type of scrubs we are allowed to start, maybe - * down to allowing only high-priority scrubs + * down to allowing only high-priority scrubs. See comments in scrub_job.h + * detailing which condiitions may prevent what types of scrubs. * - * Specifically: - * 'only high priority' flag is set for either of - * the following reasons: no local resources (too many scrubs on this OSD); - * a dice roll says we will not scrub in this tick; - * a recovery is in progress, and we are not allowed to scrub while recovery; - * a PG is trying to acquire replica resources. + * The following possible limiting conditions are checked: + * - high local OSD concurrency (i.e. too many scrubs on this OSD); + * - a "dice roll" says we will not scrub in this tick (note: this + * specific condition is only checked if the "high concurrency" condition + * above is not detected); + * - the CPU load is high (i.e. above osd_scrub_cpu_load_threshold); + * - the OSD is performing a recovery & osd_scrub_during_recovery is 'false'; + * - the current time is outside of the allowed scrubbing hours/days */ Scrub::OSDRestrictions restrictions_on_scrubbing( bool is_recovery_active, -- 2.39.5