From 3ce41e3bc1a96cd804b118e1538db90c9927fcce Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Mon, 20 Nov 2023 07:31:31 -0600 Subject: [PATCH] osd/scrub: separate counters for primary vs. replica scrubs The OSD limits the number of concurrent scrubs performed on its PGs. This limit is now enforced separately for primary and replica scrubs. Signed-off-by: Ronen Friedman --- src/osd/scrubber/scrub_resources.cc | 51 +++++++++++++++++++---------- src/osd/scrubber/scrub_resources.h | 4 +++ 2 files changed, 37 insertions(+), 18 deletions(-) diff --git a/src/osd/scrubber/scrub_resources.cc b/src/osd/scrubber/scrub_resources.cc index 25dcec2399f..6203a2912aa 100644 --- a/src/osd/scrubber/scrub_resources.cc +++ b/src/osd/scrubber/scrub_resources.cc @@ -21,28 +21,38 @@ ScrubResources::ScrubResources( , conf{config} {} +// ------------------------- scrubbing as primary on this OSD ----------------- + +// can we increase the number of concurrent scrubs performed by Primaries +// on this OSD? note that counted separately from the number of scrubs +// performed by replicas. bool ScrubResources::can_inc_scrubs() const { std::lock_guard lck{resource_lock}; - if (scrubs_local + granted_reservations.size() < conf->osd_max_scrubs) { - return true; - } - log_upwards(fmt::format( - "{}== false. {} (local) + {} (remote) >= max ({})", __func__, - scrubs_local, granted_reservations.size(), conf->osd_max_scrubs)); - return false; + return can_inc_local_scrubs_unlocked(); } bool ScrubResources::inc_scrubs_local() { std::lock_guard lck{resource_lock}; - if (scrubs_local + granted_reservations.size() < conf->osd_max_scrubs) { + if (can_inc_local_scrubs_unlocked()) { ++scrubs_local; + log_upwards(fmt::format( + "{}: {} -> {} (max {}, remote {})", __func__, (scrubs_local - 1), + scrubs_local, conf->osd_max_scrubs, granted_reservations.size())); + return true; + } + return false; +} + +bool ScrubResources::can_inc_local_scrubs_unlocked() const +{ + if (scrubs_local < conf->osd_max_scrubs) { return true; } log_upwards(fmt::format( - "{}: {} (local) + {} (remote) >= max ({})", __func__, scrubs_local, - granted_reservations.size(), conf->osd_max_scrubs)); + "{}: Cannot add local scrubs. Current counter ({}) >= max ({})", __func__, + scrubs_local, conf->osd_max_scrubs)); return false; } @@ -50,12 +60,15 @@ void ScrubResources::dec_scrubs_local() { std::lock_guard lck{resource_lock}; log_upwards(fmt::format( - "{}: {} -> {} (max {}, remote {})", __func__, scrubs_local, - (scrubs_local - 1), conf->osd_max_scrubs, granted_reservations.size())); + "{}: {} -> {} (max {}, remote {})", + __func__, scrubs_local, (scrubs_local - 1), conf->osd_max_scrubs, + granted_reservations.size())); --scrubs_local; ceph_assert(scrubs_local >= 0); } +// ------------------------- scrubbing on this OSD as replicas ---------------- + bool ScrubResources::inc_scrubs_remote(pg_t pgid) { std::lock_guard lck{resource_lock}; @@ -67,18 +80,20 @@ bool ScrubResources::inc_scrubs_remote(pg_t pgid) return true; } - auto prev = granted_reservations.size(); - if (scrubs_local + prev < conf->osd_max_scrubs) { + auto pre_op_cnt = granted_reservations.size(); + if (pre_op_cnt < conf->osd_max_scrubs) { granted_reservations.insert(pgid); log_upwards(fmt::format( - "{}: pg[{}] {} -> {} (max {}, local {})", __func__, pgid, prev, - granted_reservations.size(), conf->osd_max_scrubs, scrubs_local)); + "{}: pg[{}] reserved. Remote scrubs count changed from {} -> {} (max " + "{}, local {})", + __func__, pgid, pre_op_cnt, granted_reservations.size(), + conf->osd_max_scrubs, scrubs_local)); return true; } log_upwards(fmt::format( - "{}: pg[{}] {} (local) + {} (remote) >= max ({})", __func__, pgid, - scrubs_local, granted_reservations.size(), conf->osd_max_scrubs)); + "{}: pg[{}] failed. Too many concurrent replica scrubs ({} >= max ({}))", + __func__, pgid, pre_op_cnt, conf->osd_max_scrubs)); return false; } diff --git a/src/osd/scrubber/scrub_resources.h b/src/osd/scrubber/scrub_resources.h index 724e206ee27..75807a10f82 100644 --- a/src/osd/scrubber/scrub_resources.h +++ b/src/osd/scrubber/scrub_resources.h @@ -40,6 +40,10 @@ class ScrubResources { const ceph::common::ConfigProxy& conf; + /// an aux used to check available local scrubs. Must be called with + /// the resource lock held. + bool can_inc_local_scrubs_unlocked() const; + public: explicit ScrubResources( log_upwards_t log_access, -- 2.39.5