From: Ronen Friedman Date: Thu, 7 Dec 2023 16:11:34 +0000 (-0600) Subject: osd/scrub: don't block high-priority scrubs on a too-high scrubs count X-Git-Tag: v19.3.0~331^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e1d91b24e4e63b08928b03f61e9bc68f6147507f;p=ceph.git osd/scrub: don't block high-priority scrubs on a too-high scrubs count A scrub can only be initiated if the number of active scrubs performed on the OSD when acting as a primary is below a certain configured limit. After this change - high priority scrubs are not blocked by this limit (although they are still counted towards it). Signed-off-by: Ronen Friedman --- diff --git a/src/osd/PG.h b/src/osd/PG.h index d9acfafd0328..6e81af436e7d 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -67,7 +67,6 @@ class ScrubBackend; namespace Scrub { class Store; class ReplicaReservations; - class LocalReservation; class ReservedByRemotePrimary; enum class schedule_result_t; } diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc index fb21e0e1c5ee..f11ddd2737ca 100644 --- a/src/osd/scrubber/osd_scrub.cc +++ b/src/osd/scrubber/osd_scrub.cc @@ -407,9 +407,10 @@ void OsdScrub::remove_from_osd_queue(Scrub::ScrubJobRef sjob) m_queue.remove_from_osd_queue(sjob); } -bool OsdScrub::inc_scrubs_local() +std::unique_ptr OsdScrub::inc_scrubs_local( + bool is_high_priority) { - return m_resource_bookkeeper.inc_scrubs_local(); + return m_resource_bookkeeper.inc_scrubs_local(is_high_priority); } void OsdScrub::dec_scrubs_local() diff --git a/src/osd/scrubber/osd_scrub.h b/src/osd/scrubber/osd_scrub.h index 774076711e2d..dae11f860011 100644 --- a/src/osd/scrubber/osd_scrub.h +++ b/src/osd/scrubber/osd_scrub.h @@ -65,7 +65,8 @@ class OsdScrub { // --------------------------------------------------------------- // updating the resource counters - bool inc_scrubs_local(); + std::unique_ptr inc_scrubs_local( + bool is_high_priority); void dec_scrubs_local(); bool inc_scrubs_remote(pg_t pgid); void dec_scrubs_remote(pg_t pgid); diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index edc6c2a2dcd4..11112bc48d96 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -587,16 +587,17 @@ void PgScrubber::request_rescrubbing(requested_scrub_t& request_flags) bool PgScrubber::reserve_local() { // try to create the reservation object (which translates into asking the - // OSD for the local scrub resource). If failing - undo it immediately - - m_local_osd_resource.emplace(m_osds); - if (m_local_osd_resource->is_reserved()) { + // OSD for a local scrub resource). The object returned is a + // a wrapper around the actual reservation, and that object releases + // the local resource automatically when reset. + m_local_osd_resource = m_osds->get_scrub_services().inc_scrubs_local( + m_scrub_job->is_high_priority()); + if (m_local_osd_resource) { dout(15) << __func__ << ": local resources reserved" << dendl; return true; } - dout(10) << __func__ << ": failed to reserve local scrub resources" << dendl; - m_local_osd_resource.reset(); + dout(15) << __func__ << ": failed to reserve local scrub resources" << dendl; return false; } @@ -2445,28 +2446,8 @@ void PgScrubber::preemption_data_t::reset() m_size_divisor = 1; } - -// ///////////////////// LocalReservation ////////////////////////////////// - namespace Scrub { -// note: no dout()s in LocalReservation functions. Client logs interactions. -LocalReservation::LocalReservation(OSDService* osds) : m_osds{osds} -{ - if (m_osds->get_scrub_services().inc_scrubs_local()) { - // a failure is signalled by not having m_holding_local_reservation set - m_holding_local_reservation = true; - } -} - -LocalReservation::~LocalReservation() -{ - if (m_holding_local_reservation) { - m_holding_local_reservation = false; - m_osds->get_scrub_services().dec_scrubs_local(); - } -} - // ///////////////////// MapsCollectionStatus //////////////////////////////// auto MapsCollectionStatus::mark_arriving_map(pg_shard_t from) diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index 3ac4d6dfdb74..9f71f2d83230 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -87,21 +87,9 @@ Main Scrubber interfaces: namespace Scrub { class ScrubMachine; struct BuildMap; +class LocalResourceWrapper; -/** - * wraps the local OSD scrub resource reservation in an RAII wrapper - */ -class LocalReservation { - OSDService* m_osds; - bool m_holding_local_reservation{false}; - - public: - explicit LocalReservation(OSDService* osds); - ~LocalReservation(); - bool is_reserved() const { return m_holding_local_reservation; } -}; - /** * Once all replicas' scrub maps are received, we go on to compare the maps. * That is - unless we we have not yet completed building our own scrub map. @@ -628,9 +616,11 @@ class PgScrubber : public ScrubPgIF, epoch_t m_last_aborted{}; // last time we've noticed a request to abort - // 'optional', as 'LocalReservation' is - // 'RAII-designed' to guarantee un-reserving when deleted. - std::optional m_local_osd_resource; + /** + * once we acquire the local OSD resource, this is set to a wrapper that + * guarantees that the resource will be released when the scrub is done + */ + std::unique_ptr m_local_osd_resource; void cleanup_on_finish(); // scrub_clear_state() as called for a Primary when // Active->NotActive diff --git a/src/osd/scrubber/scrub_resources.cc b/src/osd/scrubber/scrub_resources.cc index dd9d31a15732..a69c6f06aca9 100644 --- a/src/osd/scrubber/scrub_resources.cc +++ b/src/osd/scrubber/scrub_resources.cc @@ -13,6 +13,7 @@ using ScrubResources = Scrub::ScrubResources; +using LocalResourceWrapper = Scrub::LocalResourceWrapper; ScrubResources::ScrubResources( log_upwards_t log_access, @@ -32,17 +33,18 @@ bool ScrubResources::can_inc_scrubs() const return can_inc_local_scrubs_unlocked(); } -bool ScrubResources::inc_scrubs_local() +std::unique_ptr ScrubResources::inc_scrubs_local( + bool is_high_priority) { std::lock_guard lck{resource_lock}; - if (can_inc_local_scrubs_unlocked()) { + if (is_high_priority || can_inc_local_scrubs_unlocked()) { ++scrubs_local; log_upwards(fmt::format( "{}: {} -> {} (max {}, remote {})", __func__, (scrubs_local - 1), scrubs_local, conf->osd_max_scrubs, granted_reservations.size())); - return true; + return std::make_unique(*this); } - return false; + return nullptr; } bool ScrubResources::can_inc_local_scrubs_unlocked() const @@ -119,3 +121,16 @@ void ScrubResources::dump_scrub_reservations(ceph::Formatter* f) const f->dump_string("PGs being served", fmt::format("{}", granted_reservations)); f->dump_int("osd_max_scrubs", conf->osd_max_scrubs); } + +// --------------- LocalResourceWrapper + +Scrub::LocalResourceWrapper::LocalResourceWrapper( + ScrubResources& resource_bookkeeper) + : m_resource_bookkeeper{resource_bookkeeper} +{} + +Scrub::LocalResourceWrapper::~LocalResourceWrapper() +{ + m_resource_bookkeeper.dec_scrubs_local(); +} + diff --git a/src/osd/scrubber/scrub_resources.h b/src/osd/scrubber/scrub_resources.h index 75807a10f825..ff9587bb60b0 100644 --- a/src/osd/scrubber/scrub_resources.h +++ b/src/osd/scrubber/scrub_resources.h @@ -18,6 +18,7 @@ namespace Scrub { * (prefix func, OSD id, etc.) */ using log_upwards_t = std::function; +class LocalResourceWrapper; /** * The number of concurrent scrub operations performed on an OSD is limited @@ -26,7 +27,15 @@ using log_upwards_t = std::function; * acting as primary and acting as a replica, and for enforcing the limit. */ class ScrubResources { - /// the number of concurrent scrubs performed by Primaries on this OSD + friend class LocalResourceWrapper; + + /** + * the number of concurrent scrubs performed by Primaries on this OSD. + * + * Note that, as high priority scrubs are always allowed to proceed, this + * counter may exceed the configured limit. When in this state - no new + * regular scrubs will be allowed to start. + */ int scrubs_local{0}; /// the set of PGs that have active scrub reservations as replicas @@ -56,7 +65,7 @@ class ScrubResources { bool can_inc_scrubs() const; /// increments the number of scrubs acting as a Primary - bool inc_scrubs_local(); + std::unique_ptr inc_scrubs_local(bool is_high_priority); /// decrements the number of scrubs acting as a Primary void dec_scrubs_local(); @@ -69,4 +78,21 @@ class ScrubResources { void dump_scrub_reservations(ceph::Formatter* f) const; }; + + +/** + * a wrapper around a "local scrub resource". The resources bookkeeper + * is handing these out to the PGs that acquired the local OSD's scrub + * resources. The PGs use these to release the resources when they are + * done scrubbing. + */ +class LocalResourceWrapper { + ScrubResources& m_resource_bookkeeper; + + public: + LocalResourceWrapper( + ScrubResources& resource_bookkeeper); + ~LocalResourceWrapper(); +}; + } // namespace Scrub