]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: remove OsdScrub::LoadTracker
authorRonen Friedman <rfriedma@redhat.com>
Tue, 20 May 2025 05:21:37 +0000 (00:21 -0500)
committerRonen Friedman <rfriedma@redhat.com>
Tue, 20 May 2025 10:45:59 +0000 (05:45 -0500)
As we no longer maintain a 'daily average', and as the interaction
between the load tracker and the scrub scheduler is now much simplified,
we can remove the load tracker entirely.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/scrubber/osd_scrub.cc
src/osd/scrubber/osd_scrub.h

index f730198fe524a570e6f3eb2ef3728b8a4a68d491..305d9e768366873ac1e738d14ccb396ae615b556 100644 (file)
@@ -38,7 +38,6 @@ OsdScrub::OsdScrub(
     , m_resource_bookkeeper{[this](std::string msg) { log_fwd(msg); }, conf}
     , m_queue{cct, m_osd_svc}
     , m_log_prefix{fmt::format("osd.{} osd-scrub:", m_osd_svc.get_nodeid())}
-    , m_load_tracker{cct, conf, m_osd_svc.get_nodeid()}
 {
   create_scrub_perf_counters();
 }
@@ -211,7 +210,7 @@ Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing(
   }
 
   env_conditions.restricted_time = !scrub_time_permit(scrub_clock_now);
-  env_conditions.cpu_overloaded = !m_load_tracker.scrub_load_below_threshold();
+  env_conditions.cpu_overloaded = !scrub_load_below_threshold();
 
   return env_conditions;
 }
@@ -263,89 +262,51 @@ void OsdScrub::on_config_change()
   }
 }
 
+
 // ////////////////////////////////////////////////////////////////////////// //
 // CPU load tracking and related
 
-OsdScrub::LoadTracker::LoadTracker(
-    CephContext* cct,
-    const ceph::common::ConfigProxy& config,
-    int node_id)
-    : cct{cct}
-    , conf{config}
-    , log_prefix{fmt::format("osd.{} scrub-queue::load-tracker::", node_id)}
-{
-  // initialize the daily loadavg with current 15min loadavg
-  if (double loadavgs[3]; getloadavg(loadavgs, 3) == 3) {
-    daily_loadavg = loadavgs[2];
-  } else {
-    derr << "OSD::init() : couldn't read loadavgs\n" << dendl;
-    daily_loadavg = 1.0;
-  }
-}
-
-///\todo replace with Knuth's algo (to reduce the numerical error)
-std::optional<double> OsdScrub::LoadTracker::update_load_average()
+std::optional<double> OsdScrub::update_load_average()
 {
-  auto hb_interval = conf->osd_heartbeat_interval;
-  int n_samples = std::chrono::duration_cast<seconds>(24h).count();
-  if (hb_interval > 1) {
-    n_samples = std::max(n_samples / hb_interval, 1L);
-  }
+  // cache the number of CPUs
+  loadavg_cpu_count = std::max(sysconf(_SC_NPROCESSORS_ONLN), 1L);
 
   double loadavg;
-  if (getloadavg(&loadavg, 1) == 1) {
-    loadavg_1min = loadavg;
-    daily_loadavg = (daily_loadavg * (n_samples - 1) + loadavg) / n_samples;
-    return 100 * loadavg;
+  if (getloadavg(&loadavg, 1) != 1) {
+    return std::nullopt;
   }
-
-  // getloadavg() failed
-  loadavg_1min = 0;
-  return std::nullopt;
+  return 100 * loadavg;
 }
 
-bool OsdScrub::LoadTracker::scrub_load_below_threshold() const
+
+bool OsdScrub::scrub_load_below_threshold() const
 {
-  // if the 1-min load average - even before dividing by the number of CPUs -
-  // is below the configured threshold, scrubs are allowed. No need to call
-  // sysconf().
-  if (loadavg_1min < conf->osd_scrub_load_threshold) {
-    dout(20) << fmt::format(
-                   "loadavg {:.3f} < max {:.3f} = yes",
-                   loadavg_1min, conf->osd_scrub_load_threshold)
-            << dendl;
-    return true;
+  // fetch an up-to-date load average.
+  // For the number of CPUs - rely on the last known value, fetched in the
+  // 'heartbeat' thread.
+  double loadavg;
+  if (getloadavg(&loadavg, 1) != 1) {
+    loadavg = 0;
   }
 
-  // check the load per CPU
-  const long cpus = sysconf(_SC_NPROCESSORS_ONLN);
-  const double loadavg_per_cpu = cpus > 0 ? loadavg_1min / cpus : loadavg_1min;
+  const double loadavg_per_cpu = loadavg / loadavg_cpu_count;
   if (loadavg_per_cpu < conf->osd_scrub_load_threshold) {
     dout(20) << fmt::format(
-                   "loadavg per cpu {:.3f} < max {:.3f}  (#CPUs: {}) = yes",
-                   loadavg_per_cpu, conf->osd_scrub_load_threshold, cpus)
+                   "loadavg per cpu {:.3f} < max {:.3f} (#CPUs:{}) = yes",
+                   loadavg_per_cpu, conf->osd_scrub_load_threshold,
+                   loadavg_cpu_count)
             << dendl;
     return true;
   }
 
   dout(10) << fmt::format(
-                 "loadavg {:.3f} >= max {:.3f} (#CPUs: {}) = no", loadavg_1min,
-                 conf->osd_scrub_load_threshold, cpus)
+                 "loadavg {:.3f} >= max {:.3f} (#CPUs:{}) = no",
+                 loadavg_per_cpu, conf->osd_scrub_load_threshold,
+                 loadavg_cpu_count)
           << dendl;
   return false;
 }
 
-std::ostream& OsdScrub::LoadTracker::gen_prefix(
-    std::ostream& out,
-    std::string_view fn) const
-{
-  return out << log_prefix << fn << ": ";
-}
-
-std::optional<double> OsdScrub::update_load_average()
-{
-  return m_load_tracker.update_load_average();
-}
 
 // ////////////////////////////////////////////////////////////////////////// //
 
index a280679f16b02459ba22b088ebd1ba1c813775c5..932860c92ff062d10b8b668f72ab8057a743e952 100644 (file)
@@ -195,31 +195,22 @@ class OsdScrub {
    */
   bool scrub_random_backoff() const;
 
-  /**
-   * tracking the average load on the CPU. Used both by the
-   * OSD logger, and by the scrub queue (as no scrubbing is allowed if
-   * the load is too high).
+  // tracking the CPU load
+  // ---------------------------------------------------------------
+
+  /*
+   * tracking the average load on the CPU. Used both by the OSD performance
+   * counters logger, and by the scrub queue (as no periodic scrubbing is
+   * allowed if the load is too high).
    */
-  class LoadTracker {
-    CephContext* cct;
-    const ceph::common::ConfigProxy& conf;
-    const std::string log_prefix;
-    double daily_loadavg{0.0};
-    double loadavg_1min{0.0};
-
-   public:
-    explicit LoadTracker(
-       CephContext* cct,
-       const ceph::common::ConfigProxy& config,
-       int node_id);
-
-    std::optional<double> update_load_average();
-
-    [[nodiscard]] bool scrub_load_below_threshold() const;
-
-    std::ostream& gen_prefix(std::ostream& out, std::string_view fn) const;
-  };
-  LoadTracker m_load_tracker;
+
+  /// the number of CPUs
+  long loadavg_cpu_count{1};
+
+  /// true if the load average (the 1-minute system average divided by
+  /// the number of CPUs) is below the configured threshold
+  bool scrub_load_below_threshold() const;
+
 
   // the scrub performance counters collections
   // ---------------------------------------------------------------