From 162baba744c26cc0f0a0497769f22f7dcb89e4d2 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Tue, 20 May 2025 00:21:37 -0500 Subject: [PATCH] osd/scrub: remove OsdScrub::LoadTracker As we no longer maintain a 'daily average', and as the interaction between the load tracker and the scrub scheduler is now much simplified, we can remove the load tracker entirely. Signed-off-by: Ronen Friedman --- src/osd/scrubber/osd_scrub.cc | 85 ++++++++++------------------------- src/osd/scrubber/osd_scrub.h | 39 +++++++--------- 2 files changed, 38 insertions(+), 86 deletions(-) diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc index f730198fe52..305d9e76836 100644 --- a/src/osd/scrubber/osd_scrub.cc +++ b/src/osd/scrubber/osd_scrub.cc @@ -38,7 +38,6 @@ OsdScrub::OsdScrub( , m_resource_bookkeeper{[this](std::string msg) { log_fwd(msg); }, conf} , m_queue{cct, m_osd_svc} , m_log_prefix{fmt::format("osd.{} osd-scrub:", m_osd_svc.get_nodeid())} - , m_load_tracker{cct, conf, m_osd_svc.get_nodeid()} { create_scrub_perf_counters(); } @@ -211,7 +210,7 @@ Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing( } env_conditions.restricted_time = !scrub_time_permit(scrub_clock_now); - env_conditions.cpu_overloaded = !m_load_tracker.scrub_load_below_threshold(); + env_conditions.cpu_overloaded = !scrub_load_below_threshold(); return env_conditions; } @@ -263,89 +262,51 @@ void OsdScrub::on_config_change() } } + // ////////////////////////////////////////////////////////////////////////// // // CPU load tracking and related -OsdScrub::LoadTracker::LoadTracker( - CephContext* cct, - const ceph::common::ConfigProxy& config, - int node_id) - : cct{cct} - , conf{config} - , log_prefix{fmt::format("osd.{} scrub-queue::load-tracker::", node_id)} -{ - // initialize the daily loadavg with current 15min loadavg - if (double loadavgs[3]; getloadavg(loadavgs, 3) == 3) { - daily_loadavg = loadavgs[2]; - } else { - derr << "OSD::init() : couldn't read loadavgs\n" << dendl; - daily_loadavg = 1.0; - } -} - -///\todo replace with Knuth's algo (to reduce the numerical error) -std::optional OsdScrub::LoadTracker::update_load_average() +std::optional OsdScrub::update_load_average() { - auto hb_interval = conf->osd_heartbeat_interval; - int n_samples = std::chrono::duration_cast(24h).count(); - if (hb_interval > 1) { - n_samples = std::max(n_samples / hb_interval, 1L); - } + // cache the number of CPUs + loadavg_cpu_count = std::max(sysconf(_SC_NPROCESSORS_ONLN), 1L); double loadavg; - if (getloadavg(&loadavg, 1) == 1) { - loadavg_1min = loadavg; - daily_loadavg = (daily_loadavg * (n_samples - 1) + loadavg) / n_samples; - return 100 * loadavg; + if (getloadavg(&loadavg, 1) != 1) { + return std::nullopt; } - - // getloadavg() failed - loadavg_1min = 0; - return std::nullopt; + return 100 * loadavg; } -bool OsdScrub::LoadTracker::scrub_load_below_threshold() const + +bool OsdScrub::scrub_load_below_threshold() const { - // if the 1-min load average - even before dividing by the number of CPUs - - // is below the configured threshold, scrubs are allowed. No need to call - // sysconf(). - if (loadavg_1min < conf->osd_scrub_load_threshold) { - dout(20) << fmt::format( - "loadavg {:.3f} < max {:.3f} = yes", - loadavg_1min, conf->osd_scrub_load_threshold) - << dendl; - return true; + // fetch an up-to-date load average. + // For the number of CPUs - rely on the last known value, fetched in the + // 'heartbeat' thread. + double loadavg; + if (getloadavg(&loadavg, 1) != 1) { + loadavg = 0; } - // check the load per CPU - const long cpus = sysconf(_SC_NPROCESSORS_ONLN); - const double loadavg_per_cpu = cpus > 0 ? loadavg_1min / cpus : loadavg_1min; + const double loadavg_per_cpu = loadavg / loadavg_cpu_count; if (loadavg_per_cpu < conf->osd_scrub_load_threshold) { dout(20) << fmt::format( - "loadavg per cpu {:.3f} < max {:.3f} (#CPUs: {}) = yes", - loadavg_per_cpu, conf->osd_scrub_load_threshold, cpus) + "loadavg per cpu {:.3f} < max {:.3f} (#CPUs:{}) = yes", + loadavg_per_cpu, conf->osd_scrub_load_threshold, + loadavg_cpu_count) << dendl; return true; } dout(10) << fmt::format( - "loadavg {:.3f} >= max {:.3f} (#CPUs: {}) = no", loadavg_1min, - conf->osd_scrub_load_threshold, cpus) + "loadavg {:.3f} >= max {:.3f} (#CPUs:{}) = no", + loadavg_per_cpu, conf->osd_scrub_load_threshold, + loadavg_cpu_count) << dendl; return false; } -std::ostream& OsdScrub::LoadTracker::gen_prefix( - std::ostream& out, - std::string_view fn) const -{ - return out << log_prefix << fn << ": "; -} - -std::optional OsdScrub::update_load_average() -{ - return m_load_tracker.update_load_average(); -} // ////////////////////////////////////////////////////////////////////////// // diff --git a/src/osd/scrubber/osd_scrub.h b/src/osd/scrubber/osd_scrub.h index a280679f16b..932860c92ff 100644 --- a/src/osd/scrubber/osd_scrub.h +++ b/src/osd/scrubber/osd_scrub.h @@ -195,31 +195,22 @@ class OsdScrub { */ bool scrub_random_backoff() const; - /** - * tracking the average load on the CPU. Used both by the - * OSD logger, and by the scrub queue (as no scrubbing is allowed if - * the load is too high). + // tracking the CPU load + // --------------------------------------------------------------- + + /* + * tracking the average load on the CPU. Used both by the OSD performance + * counters logger, and by the scrub queue (as no periodic scrubbing is + * allowed if the load is too high). */ - class LoadTracker { - CephContext* cct; - const ceph::common::ConfigProxy& conf; - const std::string log_prefix; - double daily_loadavg{0.0}; - double loadavg_1min{0.0}; - - public: - explicit LoadTracker( - CephContext* cct, - const ceph::common::ConfigProxy& config, - int node_id); - - std::optional update_load_average(); - - [[nodiscard]] bool scrub_load_below_threshold() const; - - std::ostream& gen_prefix(std::ostream& out, std::string_view fn) const; - }; - LoadTracker m_load_tracker; + + /// the number of CPUs + long loadavg_cpu_count{1}; + + /// true if the load average (the 1-minute system average divided by + /// the number of CPUs) is below the configured threshold + bool scrub_load_below_threshold() const; + // the scrub performance counters collections // --------------------------------------------------------------- -- 2.39.5