From: Ronen Friedman Date: Thu, 14 Sep 2023 14:09:43 +0000 (-0500) Subject: osd/scrub: declare OsdScrub, an OSD subobject X-Git-Tag: v19.0.0~438^2~11 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=e0551a743c4a8e58309d4cc2ae69b370d6e9d0ec;p=ceph.git osd/scrub: declare OsdScrub, an OSD subobject for all OSD scrub things. For now: OsdScrub is mostly a forwarder to the ScrubQueue object (which it now owns). The resource counters moved into a separate object within OsdScrub. Signed-off-by: Ronen Friedman --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 29e9e47c605fa..13076fb9aff71 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -246,7 +246,7 @@ OSDService::OSDService(OSD *osd, ceph::async::io_context_pool& poolctx) : osd_skip_data_digest(cct->_conf, "osd_skip_data_digest"), publish_lock{ceph::make_mutex("OSDService::publish_lock")}, pre_publish_lock{ceph::make_mutex("OSDService::pre_publish_lock")}, - m_scrub_queue{cct, *this}, + m_osd_scrub{cct, *this, cct->_conf}, agent_valid_iterator(false), agent_ops(0), flush_mode_high_count(0), @@ -2853,7 +2853,7 @@ will start to track new ops received afterwards."; f->close_section(); } else if (prefix == "dump_scrub_reservations") { f->open_object_section("scrub_reservations"); - service.get_scrub_services().dump_scrub_reservations(f); + service.get_scrub_services().resource_bookkeeper().dump_scrub_reservations(f); f->close_section(); } else if (prefix == "get_latest_osdmap") { get_latest_osdmap(); diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 8ed960d96055a..40368387159d5 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -53,7 +53,7 @@ #include "common/EventTrace.h" #include "osd/osd_perf_counters.h" #include "common/Finisher.h" -#include "scrubber/osd_scrub_sched.h" +#include "scrubber/osd_scrub.h" #define CEPH_OSD_PROTOCOL 10 /* cluster internal */ @@ -239,14 +239,11 @@ public: void handle_misdirected_op(PG *pg, OpRequestRef op); private: - /** - * The entity that maintains the set of PGs we may scrub (i.e. - those that we - * are their primary), and schedules their scrubbing. - */ - ScrubQueue m_scrub_queue; + /// the entity that offloads all scrubbing-related operations + OsdScrub m_osd_scrub; public: - ScrubQueue& get_scrub_services() { return m_scrub_queue; } + OsdScrub& get_scrub_services() { return m_osd_scrub; } /** * A callback used by the ScrubQueue object to initiate a scrub on a specific PG. diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc index 2cf6df8ea9315..8c4740cc71c6c 100644 --- a/src/osd/scrubber/osd_scrub.cc +++ b/src/osd/scrubber/osd_scrub.cc @@ -1,10 +1,10 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab -#include "./osd_scrub_sched.h" +#include "./osd_scrub.h" -#include "osdc/Objecter.h" #include "osd/OSD.h" +#include "osdc/Objecter.h" #include "pg_scrubber.h" @@ -13,19 +13,55 @@ using namespace ::std::chrono_literals; using namespace ::std::literals; -// ////////////////////////////////////////////////////////////////////////// // -// scrub initiation - OSD code temporarily moved here from OSD.cc - #define dout_subsys ceph_subsys_osd #undef dout_context #define dout_context (cct) -#define dout_subsys ceph_subsys_osd #undef dout_prefix -#define dout_prefix _prefix(_dout, whoami, get_osdmap_epoch()) +#define dout_prefix _prefix_fn(_dout, this, __func__) + +template +static std::ostream& _prefix_fn(std::ostream* _dout, T* t, std::string fn = "") +{ + return t->gen_prefix(*_dout, fn); +} + +OsdScrub::OsdScrub( + CephContext* cct, + Scrub::ScrubSchedListener& osd_svc, + const ceph::common::ConfigProxy& config) + : cct{cct} + , m_osd_svc{osd_svc} + , conf{config} + , m_resource_bookkeeper{[this](std::string msg) { log_fwd(msg); }, conf} + , m_queue{cct, m_osd_svc} + , m_log_prefix{fmt::format("osd.{}: osd-scrub:", m_osd_svc.get_nodeid())} +{} + +std::ostream& OsdScrub::gen_prefix(std::ostream& out, std::string_view fn) const +{ + return out << m_log_prefix << fn << ": "; +} + +void OsdScrub::dump_scrubs(ceph::Formatter* f) const +{ + m_queue.dump_scrubs(f); +} + +void OsdScrub::log_fwd(std::string_view text) +{ + dout(20) << text << dendl; +} + + +// ////////////////////////////////////////////////////////////////////////// // +// scrub initiation - OSD code temporarily moved here from OSD.cc +// temporary dout() support for OSD members: static ostream& _prefix(std::ostream* _dout, int whoami, epoch_t epoch) { return *_dout << "osd." << whoami << " " << epoch << " "; } +#undef dout_prefix +#define dout_prefix _prefix(_dout, whoami, get_osdmap_epoch()) void OSD::sched_scrub() { @@ -83,6 +119,7 @@ void OSD::sched_scrub() << ")" << dendl; } + Scrub::schedule_result_t OSDService::initiate_a_scrub(spg_t pgid, bool allow_requested_repair_only) { @@ -118,6 +155,7 @@ Scrub::schedule_result_t OSDService::initiate_a_scrub(spg_t pgid, return scrub_attempt; } + void OSD::resched_all_scrubs() { dout(10) << __func__ << ": start" << dendl; @@ -140,15 +178,9 @@ void OSD::resched_all_scrubs() } - - -#undef dout_context -#define dout_context (cct) +// restoring local dout() settings (to be removed in a followup commit) #undef dout_prefix -#define dout_prefix \ - *_dout << "osd." << osd_service.get_nodeid() << " scrub-queue::" << __func__ \ - << " " - +#define dout_prefix _prefix_fn(_dout, this, __func__) void ScrubQueue::dump_scrubs(ceph::Formatter* f) const { @@ -303,21 +335,20 @@ Scrub::schedule_result_t ScrubQueue::select_from_group( // ////////////////////////////////////////////////////////////////////////// // // CPU load tracking and related + +///\todo replace with Knuth's algo (to reduce the numerical error) std::optional ScrubQueue::update_load_average() { int hb_interval = conf()->osd_heartbeat_interval; int n_samples = std::chrono::duration_cast(24h).count(); if (hb_interval > 1) { - n_samples /= hb_interval; - if (n_samples < 1) - n_samples = 1; + n_samples = std::max(n_samples / hb_interval, 1); } // get CPU load avg double loadavg; if (getloadavg(&loadavg, 1) == 1) { daily_loadavg = (daily_loadavg * (n_samples - 1) + loadavg) / n_samples; - dout(17) << "heartbeat: daily_loadavg " << daily_loadavg << dendl; return 100 * loadavg; } @@ -328,7 +359,7 @@ bool ScrubQueue::scrub_load_below_threshold() const { double loadavgs[3]; if (getloadavg(loadavgs, 3) != 3) { - dout(10) << __func__ << " couldn't read loadavgs\n" << dendl; + dout(10) << fmt::format("{}: couldn't read loadavgs", __func__) << dendl; return false; } @@ -336,26 +367,42 @@ bool ScrubQueue::scrub_load_below_threshold() const long cpus = sysconf(_SC_NPROCESSORS_ONLN); double loadavg_per_cpu = cpus > 0 ? loadavgs[0] / cpus : loadavgs[0]; if (loadavg_per_cpu < conf()->osd_scrub_load_threshold) { - dout(20) << "loadavg per cpu " << loadavg_per_cpu << " < max " - << conf()->osd_scrub_load_threshold << " = yes" << dendl; + dout(20) << fmt::format( + "loadavg per cpu {:.3f} < max {:.3f} = yes", + loadavg_per_cpu, conf()->osd_scrub_load_threshold) + << dendl; return true; } // allow scrub if below daily avg and currently decreasing if (loadavgs[0] < daily_loadavg && loadavgs[0] < loadavgs[2]) { - dout(20) << "loadavg " << loadavgs[0] << " < daily_loadavg " - << daily_loadavg << " and < 15m avg " << loadavgs[2] << " = yes" + dout(20) << fmt::format( + "loadavg {:.3f} < daily_loadavg {:.3f} and < 15m avg " + "{:.3f} = yes", + loadavgs[0], daily_loadavg, loadavgs[2]) << dendl; return true; } - dout(20) << "loadavg " << loadavgs[0] << " >= max " - << conf()->osd_scrub_load_threshold << " and ( >= daily_loadavg " - << daily_loadavg << " or >= 15m avg " << loadavgs[2] << ") = no" + dout(10) << fmt::format( + "loadavg {:.3f} >= max {:.3f} and ( >= daily_loadavg {:.3f} " + "or >= 15m avg {:.3f} ) = no", + loadavgs[0], conf()->osd_scrub_load_threshold, daily_loadavg, + loadavgs[2]) << dendl; return false; } + +std::optional OsdScrub::update_load_average() +{ + return m_queue.update_load_average(); +} + + + +// ////////////////////////////////////////////////////////////////////////// // + // checks for half-closed ranges. Modify the (p update_load_average(); + + private: + CephContext* cct; + Scrub::ScrubSchedListener& m_osd_svc; + const ceph::common::ConfigProxy& conf; + + /// resource reservation management + Scrub::ScrubResources m_resource_bookkeeper; + + /// the queue of PGs waiting to be scrubbed + ScrubQueue m_queue; + + public: + // for this transitory commit only - to be removed + bool can_inc_scrubs() { return m_resource_bookkeeper.can_inc_scrubs(); } + + // for this transitory commit only - to be removed + Scrub::schedule_result_t select_pg_and_scrub( + Scrub::OSDRestrictions& preconds); + + // for this transitory commit only - to be moved elsewhere + /** + * @return the list (not std::set!) of all scrub jobs registered + * (apart from PGs in the process of being removed) + */ + Scrub::ScrubQContainer list_registered_jobs() const; + + /// one of this OSD's PGs is trying to acquire replica resources + bool is_reserving_now() const; + + private: + const std::string m_log_prefix{}; + + /// number of PGs stuck while scrubbing, waiting for objects + int get_blocked_pgs_count() const; +}; diff --git a/src/osd/scrubber/osd_scrub_sched.h b/src/osd/scrubber/osd_scrub_sched.h index b5f4c5eca6889..94cca0f38471b 100644 --- a/src/osd/scrubber/osd_scrub_sched.h +++ b/src/osd/scrubber/osd_scrub_sched.h @@ -170,6 +170,7 @@ class ScrubQueue { friend class TestOSDScrub; friend class ScrubSchedTestWrapper; ///< unit-tests structure + friend class OsdScrub; ///< transitory - fixed in followup commits using sched_params_t = Scrub::sched_params_t; /** @@ -360,14 +361,6 @@ class ScrubQueue { Scrub::ScrubQContainer collect_ripe_jobs(Scrub::ScrubQContainer& group, utime_t time_now); - /// scrub resources management lock (guarding scrubs_local & scrubs_remote) - mutable ceph::mutex resource_lock = - ceph::make_mutex("ScrubQueue::resource_lock"); - - /// the counters used to manage scrub activity parallelism: - int scrubs_local{0}; - int scrubs_remote{0}; - /** * The scrubbing of PGs might be delayed if the scrubbed chunk of objects is * locked by some other operation. A bug might cause this to be an infinite diff --git a/src/osd/scrubber/scrub_resources.cc b/src/osd/scrubber/scrub_resources.cc index a324234721be1..179bd5e7e0e61 100644 --- a/src/osd/scrubber/scrub_resources.cc +++ b/src/osd/scrubber/scrub_resources.cc @@ -1,91 +1,90 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab -#include "./osd_scrub_sched.h" +#include "./scrub_resources.h" +#include -// ////////////////////////////////////////////////////////////////////////// // -// ScrubQueue - scrub resource management +#include "common/debug.h" -#define dout_subsys ceph_subsys_osd -#undef dout_context -#define dout_context (cct) -#undef dout_prefix -#define dout_prefix \ - *_dout << "osd." << osd_service.get_nodeid() << " scrub-queue::" << __func__ \ - << " " +#include "include/ceph_assert.h" -bool ScrubQueue::can_inc_scrubs() const + +using ScrubResources = Scrub::ScrubResources; + +ScrubResources::ScrubResources( + log_upwards_t log_access, + const ceph::common::ConfigProxy& config) + : log_upwards{log_access} + , conf{config} +{} + +bool ScrubResources::can_inc_scrubs() const { - // consider removing the lock here. Caller already handles delayed - // inc_scrubs_local() failures std::lock_guard lck{resource_lock}; - - if (scrubs_local + scrubs_remote < conf()->osd_max_scrubs) { + if (scrubs_local + scrubs_remote < conf->osd_max_scrubs) { return true; } - - dout(20) << " == false. " << scrubs_local << " local + " << scrubs_remote - << " remote >= max " << conf()->osd_max_scrubs << dendl; + log_upwards(fmt::format( + "{}== false. {} (local) + {} (remote) >= max ({})", __func__, + scrubs_local, scrubs_remote, conf->osd_max_scrubs)); return false; } -bool ScrubQueue::inc_scrubs_local() +bool ScrubResources::inc_scrubs_local() { std::lock_guard lck{resource_lock}; - - if (scrubs_local + scrubs_remote < conf()->osd_max_scrubs) { + if (scrubs_local + scrubs_remote < conf->osd_max_scrubs) { ++scrubs_local; return true; } - - dout(20) << ": " << scrubs_local << " local + " << scrubs_remote - << " remote >= max " << conf()->osd_max_scrubs << dendl; + log_upwards(fmt::format( + "{}: {} (local) + {} (remote) >= max ({})", __func__, scrubs_local, + scrubs_remote, conf->osd_max_scrubs)); return false; } -void ScrubQueue::dec_scrubs_local() +void ScrubResources::dec_scrubs_local() { std::lock_guard lck{resource_lock}; - dout(20) << ": " << scrubs_local << " -> " << (scrubs_local - 1) << " (max " - << conf()->osd_max_scrubs << ", remote " << scrubs_remote << ")" - << dendl; - + log_upwards(fmt::format( + "{}: {} -> {} (max {}, remote {})", __func__, scrubs_local, + (scrubs_local - 1), conf->osd_max_scrubs, scrubs_remote)); --scrubs_local; ceph_assert(scrubs_local >= 0); } -bool ScrubQueue::inc_scrubs_remote() +bool ScrubResources::inc_scrubs_remote() { std::lock_guard lck{resource_lock}; - - if (scrubs_local + scrubs_remote < conf()->osd_max_scrubs) { - dout(20) << ": " << scrubs_remote << " -> " << (scrubs_remote + 1) - << " (max " << conf()->osd_max_scrubs << ", local " - << scrubs_local << ")" << dendl; + if (scrubs_local + scrubs_remote < conf->osd_max_scrubs) { + log_upwards(fmt::format( + "{}: {} -> {} (max {}, local {})", __func__, scrubs_remote, + (scrubs_remote + 1), conf->osd_max_scrubs, scrubs_local)); ++scrubs_remote; return true; } - dout(20) << ": " << scrubs_local << " local + " << scrubs_remote - << " remote >= max " << conf()->osd_max_scrubs << dendl; + log_upwards(fmt::format( + "{}: {} (local) + {} (remote) >= max ({})", __func__, scrubs_local, + scrubs_remote, conf->osd_max_scrubs)); return false; } -void ScrubQueue::dec_scrubs_remote() +void ScrubResources::dec_scrubs_remote() { std::lock_guard lck{resource_lock}; - dout(20) << ": " << scrubs_remote << " -> " << (scrubs_remote - 1) << " (max " - << conf()->osd_max_scrubs << ", local " << scrubs_local << ")" - << dendl; + log_upwards(fmt::format( + "{}: {} -> {} (max {}, local {})", __func__, scrubs_remote, + (scrubs_remote - 1), conf->osd_max_scrubs, scrubs_local)); --scrubs_remote; ceph_assert(scrubs_remote >= 0); } -void ScrubQueue::dump_scrub_reservations(ceph::Formatter* f) const +void ScrubResources::dump_scrub_reservations(ceph::Formatter* f) const { std::lock_guard lck{resource_lock}; f->dump_int("scrubs_local", scrubs_local); f->dump_int("scrubs_remote", scrubs_remote); - f->dump_int("osd_max_scrubs", conf()->osd_max_scrubs); + f->dump_int("osd_max_scrubs", conf->osd_max_scrubs); } diff --git a/src/osd/scrubber/scrub_resources.h b/src/osd/scrubber/scrub_resources.h new file mode 100644 index 0000000000000..890ee5d0e2fae --- /dev/null +++ b/src/osd/scrubber/scrub_resources.h @@ -0,0 +1,66 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#pragma once + +#include +#include + +#include "common/ceph_mutex.h" +#include "common/config_proxy.h" +#include "common/Formatter.h" + +namespace Scrub { + +/** + * an interface allowing the ScrubResources to log directly into its + * owner's log. This way, we do not need the full dout() mechanism + * (prefix func, OSD id, etc.) + */ +using log_upwards_t = std::function; + +/** + * The number of concurrent scrub operations performed on an OSD is limited + * by a configuration parameter. The 'ScrubResources' class is responsible for + * maintaining a count of the number of scrubs currently performed, both + * acting as primary and acting as a replica, and for enforcing the limit. + */ +class ScrubResources { + /// the number of concurrent scrubs performed by Primaries on this OSD + int scrubs_local{0}; + + /// the number of active scrub reservations granted by replicas + int scrubs_remote{0}; + + mutable ceph::mutex resource_lock = + ceph::make_mutex("ScrubQueue::resource_lock"); + + log_upwards_t log_upwards; ///< access into the owner's dout() + + const ceph::common::ConfigProxy& conf; + + public: + explicit ScrubResources( + log_upwards_t log_access, + const ceph::common::ConfigProxy& config); + + /** + * \returns true if the number of concurrent scrubs is + * below osd_max_scrubs + */ + bool can_inc_scrubs() const; + + /// increments the number of scrubs acting as a Primary + bool inc_scrubs_local(); + + /// decrements the number of scrubs acting as a Primary + void dec_scrubs_local(); + + /// increments the number of scrubs acting as a Replica + bool inc_scrubs_remote(); + + /// decrements the number of scrubs acting as a Replica + void dec_scrubs_remote(); + + void dump_scrub_reservations(ceph::Formatter* f) const; +}; +} // namespace Scrub