From 116c72bf1b5afd4eb5e54ce4dd0a8690fdbb03b7 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Sun, 12 Nov 2023 09:25:22 -0600 Subject: [PATCH] osd/scrub: maintain a set of remote reservations ... instead of a simple counter. This - as a preparation for the next commit, which will decouple the "being reserved" state from the handling of scrub requests. The planned changes to the scrub state machine will make it harder to know when to clear the "being reserved" state. The changes here would allow us to err on the side of caution, i.e. trying to "un-count" a remote reservation even if it was not actually reserved or was already deleted. Signed-off-by: Ronen Friedman --- qa/standalone/scrub/osd-scrub-dump.sh | 2 +- src/osd/scrubber/osd_scrub.cc | 8 ++-- src/osd/scrubber/osd_scrub.h | 4 +- src/osd/scrubber/pg_scrubber.cc | 4 +- src/osd/scrubber/scrub_resources.cc | 54 +++++++++++++++++---------- src/osd/scrubber/scrub_resources.h | 10 +++-- 6 files changed, 50 insertions(+), 32 deletions(-) diff --git a/qa/standalone/scrub/osd-scrub-dump.sh b/qa/standalone/scrub/osd-scrub-dump.sh index 0129114625a..644f82d8071 100755 --- a/qa/standalone/scrub/osd-scrub-dump.sh +++ b/qa/standalone/scrub/osd-scrub-dump.sh @@ -123,7 +123,7 @@ function TEST_recover_unexpected() { for o in $(seq 0 $(expr $OSDS - 1)) do CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations - scrubs=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations | jq '.scrubs_local + .scrubs_remote') + scrubs=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations | jq '.scrubs_local + .granted_reservations') if [ $scrubs -gt $MAX_SCRUBS ]; then echo "ERROR: More than $MAX_SCRUBS currently reserved" return 1 diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc index e3a71e26234..99367170dba 100644 --- a/src/osd/scrubber/osd_scrub.cc +++ b/src/osd/scrubber/osd_scrub.cc @@ -441,14 +441,14 @@ void OsdScrub::dec_scrubs_local() m_resource_bookkeeper.dec_scrubs_local(); } -bool OsdScrub::inc_scrubs_remote() +bool OsdScrub::inc_scrubs_remote(pg_t pgid) { - return m_resource_bookkeeper.inc_scrubs_remote(); + return m_resource_bookkeeper.inc_scrubs_remote(pgid); } -void OsdScrub::dec_scrubs_remote() +void OsdScrub::dec_scrubs_remote(pg_t pgid) { - m_resource_bookkeeper.dec_scrubs_remote(); + m_resource_bookkeeper.dec_scrubs_remote(pgid); } void OsdScrub::mark_pg_scrub_blocked(spg_t blocked_pg) diff --git a/src/osd/scrubber/osd_scrub.h b/src/osd/scrubber/osd_scrub.h index 570430660ed..56167df2ee6 100644 --- a/src/osd/scrubber/osd_scrub.h +++ b/src/osd/scrubber/osd_scrub.h @@ -67,8 +67,8 @@ class OsdScrub { // updating the resource counters bool inc_scrubs_local(); void dec_scrubs_local(); - bool inc_scrubs_remote(); - void dec_scrubs_remote(); + bool inc_scrubs_remote(pg_t pgid); + void dec_scrubs_remote(pg_t pgid); // counting the number of PGs stuck while scrubbing, waiting for objects void mark_pg_scrub_blocked(spg_t blocked_pg); diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 4cd861b89c8..70d314f0d2f 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -218,7 +218,7 @@ void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued) void PgScrubber::dec_scrubs_remote() { - m_osds->get_scrub_services().dec_scrubs_remote(); + m_osds->get_scrub_services().dec_scrubs_remote(m_pg_id.pgid); } void PgScrubber::advance_token() @@ -1708,7 +1708,7 @@ void PgScrubber::handle_scrub_reserve_request(OpRequestRef op) if (m_pg->cct->_conf->osd_scrub_during_recovery || !m_osds->is_recovery_active()) { - granted = m_osds->get_scrub_services().inc_scrubs_remote(); + granted = m_osds->get_scrub_services().inc_scrubs_remote(m_pg_id.pgid); if (granted) { m_fsm->process_event(ReplicaGrantReservation{}); } else { diff --git a/src/osd/scrubber/scrub_resources.cc b/src/osd/scrubber/scrub_resources.cc index 179bd5e7e0e..25dcec2399f 100644 --- a/src/osd/scrubber/scrub_resources.cc +++ b/src/osd/scrubber/scrub_resources.cc @@ -4,10 +4,12 @@ #include "./scrub_resources.h" #include +#include #include "common/debug.h" #include "include/ceph_assert.h" +#include "osd/osd_types_fmt.h" using ScrubResources = Scrub::ScrubResources; @@ -22,25 +24,25 @@ ScrubResources::ScrubResources( bool ScrubResources::can_inc_scrubs() const { std::lock_guard lck{resource_lock}; - if (scrubs_local + scrubs_remote < conf->osd_max_scrubs) { + if (scrubs_local + granted_reservations.size() < conf->osd_max_scrubs) { return true; } log_upwards(fmt::format( "{}== false. {} (local) + {} (remote) >= max ({})", __func__, - scrubs_local, scrubs_remote, conf->osd_max_scrubs)); + scrubs_local, granted_reservations.size(), conf->osd_max_scrubs)); return false; } bool ScrubResources::inc_scrubs_local() { std::lock_guard lck{resource_lock}; - if (scrubs_local + scrubs_remote < conf->osd_max_scrubs) { + if (scrubs_local + granted_reservations.size() < conf->osd_max_scrubs) { ++scrubs_local; return true; } log_upwards(fmt::format( "{}: {} (local) + {} (remote) >= max ({})", __func__, scrubs_local, - scrubs_remote, conf->osd_max_scrubs)); + granted_reservations.size(), conf->osd_max_scrubs)); return false; } @@ -49,42 +51,56 @@ void ScrubResources::dec_scrubs_local() std::lock_guard lck{resource_lock}; log_upwards(fmt::format( "{}: {} -> {} (max {}, remote {})", __func__, scrubs_local, - (scrubs_local - 1), conf->osd_max_scrubs, scrubs_remote)); + (scrubs_local - 1), conf->osd_max_scrubs, granted_reservations.size())); --scrubs_local; ceph_assert(scrubs_local >= 0); } -bool ScrubResources::inc_scrubs_remote() +bool ScrubResources::inc_scrubs_remote(pg_t pgid) { std::lock_guard lck{resource_lock}; - if (scrubs_local + scrubs_remote < conf->osd_max_scrubs) { + + // if this PG is already reserved - it's probably a benign bug. + // report it, but do not fail the reservation. + if (granted_reservations.contains(pgid)) { + log_upwards(fmt::format("{}: pg[{}] already reserved", __func__, pgid)); + return true; + } + + auto prev = granted_reservations.size(); + if (scrubs_local + prev < conf->osd_max_scrubs) { + granted_reservations.insert(pgid); log_upwards(fmt::format( - "{}: {} -> {} (max {}, local {})", __func__, scrubs_remote, - (scrubs_remote + 1), conf->osd_max_scrubs, scrubs_local)); - ++scrubs_remote; + "{}: pg[{}] {} -> {} (max {}, local {})", __func__, pgid, prev, + granted_reservations.size(), conf->osd_max_scrubs, scrubs_local)); return true; } log_upwards(fmt::format( - "{}: {} (local) + {} (remote) >= max ({})", __func__, scrubs_local, - scrubs_remote, conf->osd_max_scrubs)); + "{}: pg[{}] {} (local) + {} (remote) >= max ({})", __func__, pgid, + scrubs_local, granted_reservations.size(), conf->osd_max_scrubs)); return false; } -void ScrubResources::dec_scrubs_remote() +void ScrubResources::dec_scrubs_remote(pg_t pgid) { std::lock_guard lck{resource_lock}; - log_upwards(fmt::format( - "{}: {} -> {} (max {}, local {})", __func__, scrubs_remote, - (scrubs_remote - 1), conf->osd_max_scrubs, scrubs_local)); - --scrubs_remote; - ceph_assert(scrubs_remote >= 0); + // we might not have this PG in the set (e.g. if we are concluding a + // high priority scrub, one that does not require reservations) + auto cnt = granted_reservations.erase(pgid); + if (cnt) { + log_upwards(fmt::format( + "{}: remote reservation for {} removed -> {} (max {}, local {})", + __func__, pgid, granted_reservations.size(), conf->osd_max_scrubs, + scrubs_local)); + } } void ScrubResources::dump_scrub_reservations(ceph::Formatter* f) const { std::lock_guard lck{resource_lock}; f->dump_int("scrubs_local", scrubs_local); - f->dump_int("scrubs_remote", scrubs_remote); + f->dump_int("granted_reservations", granted_reservations.size()); + f->dump_string("PGs being served", fmt::format("{}", granted_reservations)); f->dump_int("osd_max_scrubs", conf->osd_max_scrubs); } diff --git a/src/osd/scrubber/scrub_resources.h b/src/osd/scrubber/scrub_resources.h index 890ee5d0e2f..724e206ee27 100644 --- a/src/osd/scrubber/scrub_resources.h +++ b/src/osd/scrubber/scrub_resources.h @@ -8,6 +8,7 @@ #include "common/ceph_mutex.h" #include "common/config_proxy.h" #include "common/Formatter.h" +#include "osd/osd_types.h" namespace Scrub { @@ -28,8 +29,9 @@ class ScrubResources { /// the number of concurrent scrubs performed by Primaries on this OSD int scrubs_local{0}; - /// the number of active scrub reservations granted by replicas - int scrubs_remote{0}; + /// the set of PGs that have active scrub reservations as replicas + /// \todo come C++23 - consider std::flat_set + std::set granted_reservations; mutable ceph::mutex resource_lock = ceph::make_mutex("ScrubQueue::resource_lock"); @@ -56,10 +58,10 @@ class ScrubResources { void dec_scrubs_local(); /// increments the number of scrubs acting as a Replica - bool inc_scrubs_remote(); + bool inc_scrubs_remote(pg_t pgid); /// decrements the number of scrubs acting as a Replica - void dec_scrubs_remote(); + void dec_scrubs_remote(pg_t pgid); void dump_scrub_reservations(ceph::Formatter* f) const; }; -- 2.39.5