osd/scrub: select_pg_and_scrub() moved into osd_scrub.cc

author Ronen Friedman <rfriedma@redhat.com>

Sun, 10 Sep 2023 18:50:35 +0000 (13:50 -0500)

committer Ronen Friedman <rfriedma@redhat.com>

Wed, 20 Sep 2023 06:39:10 +0000 (01:39 -0500)
author Ronen Friedman <rfriedma@redhat.com>
Sun, 10 Sep 2023 18:50:35 +0000 (13:50 -0500)
committer Ronen Friedman <rfriedma@redhat.com>
Wed, 20 Sep 2023 06:39:10 +0000 (01:39 -0500)
diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc

index fe92fd63c7dbd3c9c88f7739671a05c893c639f4..97f266d3dee47f6ecd4d8fc799a78d17183eee7b 100644 (file)
--- a/src/osd/scrubber/osd_scrub.cc
+++ b/src/osd/scrubber/osd_scrub.cc
@@ -12,39 +12,13 @@ using namespace ::std::chrono;
  using namespace ::std::chrono_literals;
  using namespace ::std::literals;
  
-#define dout_subsys ceph_subsys_osd
-#undef dout_context
-#define dout_context (cct)
-#undef dout_prefix
-#define dout_prefix                                                            \
-  *_dout << "osd." << osd_service.get_nodeid() << " scrub-queue::" << __func__ \
-        << " "
-
-
-
-void ScrubQueue::dump_scrubs(ceph::Formatter* f) const
-{
-  ceph_assert(f != nullptr);
-  std::lock_guard lck(jobs_lock);
-
-  f->open_array_section("scrubs");
-
-  std::for_each(to_scrub.cbegin(), to_scrub.cend(), [&f](const ScrubJobRef& j) {
-    j->dump(f);
-  });
-
-  std::for_each(penalized.cbegin(),
-               penalized.cend(),
-               [&f](const ScrubJobRef& j) { j->dump(f); });
-
-  f->close_section();
-}
  
  // ////////////////////////////////////////////////////////////////////////// //
  // scrub initiation - OSD code temporarily moved here from OSD.cc
  
+#define dout_subsys ceph_subsys_osd
  #undef dout_context
-#define dout_context cct
+#define dout_context (cct)
  #define dout_subsys ceph_subsys_osd
  #undef dout_prefix
  #define dout_prefix _prefix(_dout, whoami, get_osdmap_epoch())
@@ -176,6 +150,155 @@ void OSD::resched_all_scrubs()
          << " "
  
  
+void ScrubQueue::dump_scrubs(ceph::Formatter* f) const
+{
+  ceph_assert(f != nullptr);
+  std::lock_guard lck(jobs_lock);
+
+  f->open_array_section("scrubs");
+
+  std::for_each(to_scrub.cbegin(), to_scrub.cend(), [&f](const ScrubJobRef& j) {
+    j->dump(f);
+  });
+
+  std::for_each(penalized.cbegin(),
+               penalized.cend(),
+               [&f](const ScrubJobRef& j) { j->dump(f); });
+
+  f->close_section();
+}
+
+/**
+ *  a note regarding 'to_scrub_copy':
+ *  'to_scrub_copy' is a sorted set of all the ripe jobs from to_copy.
+ *  As we usually expect to refer to only the first job in this set, we could
+ *  consider an alternative implementation:
+ *  - have collect_ripe_jobs() return the copied set without sorting it;
+ *  - loop, performing:
+ *    - use std::min_element() to find a candidate;
+ *    - try that one. If not suitable, discard from 'to_scrub_copy'
+ */
+Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub(
+  Scrub::OSDRestrictions& preconds)
+{
+  dout(10) << " reg./pen. sizes: " << to_scrub.size() << " / "
+          << penalized.size() << dendl;
+
+  utime_t now_is = time_now();
+
+  preconds.time_permit = scrub_time_permit(now_is);
+  preconds.load_is_low = scrub_load_below_threshold();
+  preconds.only_deadlined = !preconds.time_permit || !preconds.load_is_low;
+
+  //  create a list of candidates (copying, as otherwise creating a deadlock):
+  //  - possibly restore penalized
+  //  - (if we didn't handle directly) remove invalid jobs
+  //  - create a copy of the to_scrub (possibly up to first not-ripe)
+  //  - same for the penalized (although that usually be a waste)
+  //  unlock, then try the lists
+
+  std::unique_lock lck{jobs_lock};
+
+  // pardon all penalized jobs that have deadlined (or were updated)
+  scan_penalized(restore_penalized, now_is);
+  restore_penalized = false;
+
+  // remove the 'updated' flag from all entries
+  std::for_each(to_scrub.begin(),
+               to_scrub.end(),
+               [](const auto& jobref) -> void { jobref->updated = false; });
+
+  // add failed scrub attempts to the penalized list
+  move_failed_pgs(now_is);
+
+  //  collect all valid & ripe jobs from the two lists. Note that we must copy,
+  //  as when we use the lists we will not be holding jobs_lock (see
+  //  explanation above)
+
+  auto to_scrub_copy = collect_ripe_jobs(to_scrub, now_is);
+  auto penalized_copy = collect_ripe_jobs(penalized, now_is);
+  lck.unlock();
+
+  // try the regular queue first
+  auto res = select_from_group(to_scrub_copy, preconds, now_is);
+
+  // in the sole scenario in which we've gone over all ripe jobs without success
+  // - we will try the penalized
+  if (res == Scrub::schedule_result_t::none_ready && !penalized_copy.empty()) {
+    res = select_from_group(penalized_copy, preconds, now_is);
+    dout(10) << "tried the penalized. Res: "
+            << ScrubQueue::attempt_res_text(res) << dendl;
+    restore_penalized = true;
+  }
+
+  dout(15) << dendl;
+  return res;
+}
+
+
+// not holding jobs_lock. 'group' is a copy of the actual list.
+Scrub::schedule_result_t ScrubQueue::select_from_group(
+    ScrubQContainer& group,
+    const Scrub::OSDRestrictions& preconds,
+    utime_t now_is)
+{
+  dout(15) << "jobs #: " << group.size() << dendl;
+
+  for (auto& candidate : group) {
+
+    // we expect the first job in the list to be a good candidate (if any)
+
+    dout(20) << "try initiating scrub for " << candidate->pgid << dendl;
+
+    if (preconds.only_deadlined && (candidate->schedule.deadline.is_zero() ||
+                                   candidate->schedule.deadline >= now_is)) {
+      dout(15) << " not scheduling scrub for " << candidate->pgid << " due to "
+              << (preconds.time_permit ? "high load" : "time not permitting")
+              << dendl;
+      continue;
+    }
+
+    // we have a candidate to scrub. We turn to the OSD to verify that the PG
+    // configuration allows the specified type of scrub, and to initiate the
+    // scrub.
+    switch (osd_service.initiate_a_scrub(
+       candidate->pgid, preconds.allow_requested_repair_only)) {
+
+      case Scrub::schedule_result_t::scrub_initiated:
+       // the happy path. We are done
+       dout(20) << " initiated for " << candidate->pgid << dendl;
+       return Scrub::schedule_result_t::scrub_initiated;
+
+      case Scrub::schedule_result_t::already_started:
+      case Scrub::schedule_result_t::preconditions:
+      case Scrub::schedule_result_t::bad_pg_state:
+       // continue with the next job
+       dout(20) << "failed (state/cond/started) " << candidate->pgid << dendl;
+       break;
+
+      case Scrub::schedule_result_t::no_such_pg:
+       // The pg is no longer there
+       dout(20) << "failed (no pg) " << candidate->pgid << dendl;
+       break;
+
+      case Scrub::schedule_result_t::no_local_resources:
+       // failure to secure local resources. No point in trying the other
+       // PGs at this time. Note that this is not the same as replica resources
+       // failure!
+       dout(20) << "failed (local) " << candidate->pgid << dendl;
+       return Scrub::schedule_result_t::no_local_resources;
+
+      case Scrub::schedule_result_t::none_ready:
+       // can't happen. Just for the compiler.
+       dout(5) << "failed !!! " << candidate->pgid << dendl;
+       return Scrub::schedule_result_t::none_ready;
+    }
+  }
+
+  dout(20) << " returning 'none ready'" << dendl;
+  return Scrub::schedule_result_t::none_ready;
+}
+
  
  // ////////////////////////////////////////////////////////////////////////// //
  // CPU load tracking and related
diff --git a/src/osd/scrubber/osd_scrub_sched.cc b/src/osd/scrubber/osd_scrub_sched.cc

index b07b4385a58bcf1c04781daa710a1a58273307ea..9170e369ac47f0a729659f457b7f3c80f890e606 100644 (file)
--- a/src/osd/scrubber/osd_scrub_sched.cc
+++ b/src/osd/scrubber/osd_scrub_sched.cc
@@ -310,72 +310,6 @@ std::string_view ScrubQueue::qu_state_text(qu_state_t st)
  }
  // clang-format on
  
-/**
- *  a note regarding 'to_scrub_copy':
- *  'to_scrub_copy' is a sorted set of all the ripe jobs from to_copy.
- *  As we usually expect to refer to only the first job in this set, we could
- *  consider an alternative implementation:
- *  - have collect_ripe_jobs() return the copied set without sorting it;
- *  - loop, performing:
- *    - use std::min_element() to find a candidate;
- *    - try that one. If not suitable, discard from 'to_scrub_copy'
- */
-Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub(
-  Scrub::OSDRestrictions& preconds)
-{
-  dout(10) << " reg./pen. sizes: " << to_scrub.size() << " / "
-          << penalized.size() << dendl;
-
-  utime_t now_is = time_now();
-
-  preconds.time_permit = scrub_time_permit(now_is);
-  preconds.load_is_low = scrub_load_below_threshold();
-  preconds.only_deadlined = !preconds.time_permit || !preconds.load_is_low;
-
-  //  create a list of candidates (copying, as otherwise creating a deadlock):
-  //  - possibly restore penalized
-  //  - (if we didn't handle directly) remove invalid jobs
-  //  - create a copy of the to_scrub (possibly up to first not-ripe)
-  //  - same for the penalized (although that usually be a waste)
-  //  unlock, then try the lists
-
-  std::unique_lock lck{jobs_lock};
-
-  // pardon all penalized jobs that have deadlined (or were updated)
-  scan_penalized(restore_penalized, now_is);
-  restore_penalized = false;
-
-  // remove the 'updated' flag from all entries
-  std::for_each(to_scrub.begin(),
-               to_scrub.end(),
-               [](const auto& jobref) -> void { jobref->updated = false; });
-
-  // add failed scrub attempts to the penalized list
-  move_failed_pgs(now_is);
-
-  //  collect all valid & ripe jobs from the two lists. Note that we must copy,
-  //  as when we use the lists we will not be holding jobs_lock (see
-  //  explanation above)
-
-  auto to_scrub_copy = collect_ripe_jobs(to_scrub, now_is);
-  auto penalized_copy = collect_ripe_jobs(penalized, now_is);
-  lck.unlock();
-
-  // try the regular queue first
-  auto res = select_from_group(to_scrub_copy, preconds, now_is);
-
-  // in the sole scenario in which we've gone over all ripe jobs without success
-  // - we will try the penalized
-  if (res == Scrub::schedule_result_t::none_ready && !penalized_copy.empty()) {
-    res = select_from_group(penalized_copy, preconds, now_is);
-    dout(10) << "tried the penalized. Res: "
-            << ScrubQueue::attempt_res_text(res) << dendl;
-    restore_penalized = true;
-  }
-
-  dout(15) << dendl;
-  return res;
-}
  
  // must be called under lock
  void ScrubQueue::rm_unregistered_jobs(ScrubQContainer& group)
@@ -434,69 +368,6 @@ ScrubQueue::ScrubQContainer ScrubQueue::collect_ripe_jobs(
    return ripes;
  }
  
-// not holding jobs_lock. 'group' is a copy of the actual list.
-Scrub::schedule_result_t ScrubQueue::select_from_group(
-  ScrubQContainer& group,
-  const Scrub::OSDRestrictions& preconds,
-  utime_t now_is)
-{
-  dout(15) << "jobs #: " << group.size() << dendl;
-
-  for (auto& candidate : group) {
-
-    // we expect the first job in the list to be a good candidate (if any)
-
-    dout(20) << "try initiating scrub for " << candidate->pgid << dendl;
-
-    if (preconds.only_deadlined && (candidate->schedule.deadline.is_zero() ||
-                                   candidate->schedule.deadline >= now_is)) {
-      dout(15) << " not scheduling scrub for " << candidate->pgid << " due to "
-              << (preconds.time_permit ? "high load" : "time not permitting")
-              << dendl;
-      continue;
-    }
-
-    // we have a candidate to scrub. We turn to the OSD to verify that the PG
-    // configuration allows the specified type of scrub, and to initiate the
-    // scrub.
-    switch (
-      osd_service.initiate_a_scrub(candidate->pgid,
-                                  preconds.allow_requested_repair_only)) {
-
-      case Scrub::schedule_result_t::scrub_initiated:
-       // the happy path. We are done
-       dout(20) << " initiated for " << candidate->pgid << dendl;
-       return Scrub::schedule_result_t::scrub_initiated;
-
-      case Scrub::schedule_result_t::already_started:
-      case Scrub::schedule_result_t::preconditions:
-      case Scrub::schedule_result_t::bad_pg_state:
-       // continue with the next job
-       dout(20) << "failed (state/cond/started) " << candidate->pgid << dendl;
-       break;
-
-      case Scrub::schedule_result_t::no_such_pg:
-       // The pg is no longer there
-       dout(20) << "failed (no pg) " << candidate->pgid << dendl;
-       break;
-
-      case Scrub::schedule_result_t::no_local_resources:
-       // failure to secure local resources. No point in trying the other
-       // PGs at this time. Note that this is not the same as replica resources
-       // failure!
-       dout(20) << "failed (local) " << candidate->pgid << dendl;
-       return Scrub::schedule_result_t::no_local_resources;
-
-      case Scrub::schedule_result_t::none_ready:
-       // can't happen. Just for the compiler.
-       dout(5) << "failed !!! " << candidate->pgid << dendl;
-       return Scrub::schedule_result_t::none_ready;
-    }
-  }
-
-  dout(20) << " returning 'none ready'" << dendl;
-  return Scrub::schedule_result_t::none_ready;
-}
  
  ScrubQueue::scrub_schedule_t ScrubQueue::adjust_target_time(
    const sched_params_t& times) const
author	Ronen Friedman <rfriedma@redhat.com>
	Sun, 10 Sep 2023 18:50:35 +0000 (13:50 -0500)
committer	Ronen Friedman <rfriedma@redhat.com>
	Wed, 20 Sep 2023 06:39:10 +0000 (01:39 -0500)
src/osd/scrubber/osd_scrub.cc		patch \| blob \| history
src/osd/scrubber/osd_scrub_sched.cc		patch \| blob \| history