osd/scrub: fix scrub eligibility tests

author Ronen Friedman <rfriedma@redhat.com>

Thu, 7 Dec 2023 14:55:30 +0000 (08:55 -0600)

committer Ronen Friedman <rfriedma@redhat.com>

Mon, 11 Dec 2023 07:26:02 +0000 (01:26 -0600)
author Ronen Friedman <rfriedma@redhat.com>
Thu, 7 Dec 2023 14:55:30 +0000 (08:55 -0600)
committer Ronen Friedman <rfriedma@redhat.com>
Mon, 11 Dec 2023 07:26:02 +0000 (01:26 -0600)
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index ddef326e2a8acd5d88169702a3a147de3182cb76..5ff1246427b7daf3010f408fa00d84034cf10ca5 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -1331,30 +1331,44 @@ unsigned int PG::scrub_requeue_priority(Scrub::scrub_prio_t with_priority, unsig
  // ==========================================================================================
  // SCRUB
  
+
  /*
   *  implementation note:
- *  PG::sched_scrub() is called only once per a specific scrub session.
+ *  PG::start_scrubbing() is called only once per a specific scrub session.
   *  That call commits us to the whatever choices are made (deep/shallow, etc').
   *  Unless failing to start scrubbing, the 'planned scrub' flag-set is 'frozen' into
   *  PgScrubber's m_flags, then cleared.
   */
-Scrub::schedule_result_t PG::sched_scrub()
+Scrub::schedule_result_t PG::start_scrubbing(
+    Scrub::OSDRestrictions osd_restrictions)
  {
    using Scrub::schedule_result_t;
-  dout(15) << __func__ << " pg(" << info.pgid
-         << (is_active() ? ") <active>" : ") <not-active>")
-         << (is_clean() ? " <clean>" : " <not-clean>") << dendl;
+  dout(10) << fmt::format(
+                 "{}: {}+{} (env restrictions:{})", __func__,
+                 (is_active() ? "<active>" : "<not-active>"),
+                 (is_clean() ? "<clean>" : "<not-clean>"), osd_restrictions)
+          << dendl;
    ceph_assert(ceph_mutex_is_locked(_lock));
-  ceph_assert(m_scrubber);
-
-  if (is_scrub_queued_or_active()) {
-     dout(10) << __func__ << ": already scrubbing" << dendl;
-     return schedule_result_t::target_specific_failure;
-  }
  
    if (!is_primary() || !is_active() || !is_clean()) {
      dout(10) << __func__ << ": cannot scrub (not a clean and active primary)"
-      << dendl;
+            << dendl;
+    return schedule_result_t::target_specific_failure;
+  }
+
+  ceph_assert(m_scrubber);
+  if (is_scrub_queued_or_active()) {
+    dout(10) << __func__ << ": scrub already in progress" << dendl;
+    return schedule_result_t::target_specific_failure;
+  }
+  // if only explicitly requested repairing is allowed - skip other types
+  // of scrubbing
+  if (osd_restrictions.allow_requested_repair_only &&
+      !get_planned_scrub().must_repair) {
+    dout(10) << __func__
+            << ": skipping this PG as repairing was not explicitly "
+               "requested for it"
+            << dendl;
      return schedule_result_t::target_specific_failure;
    }
  
@@ -1366,9 +1380,9 @@ Scrub::schedule_result_t PG::sched_scrub()
      return schedule_result_t::target_specific_failure;
    }
  
-  // analyse the combination of the requested scrub flags, the osd/pool configuration
-  // and the PG status to determine whether we should scrub now, and what type of scrub
-  // should that be.
+  // analyze the combination of the requested scrub flags, the osd/pool
+  // configuration and the PG status to determine whether we should scrub
+  // now, and what type of scrub should that be.
    auto updated_flags = validate_scrub_mode();
    if (!updated_flags) {
      // the stars do not align for starting a scrub for this PG at this time
@@ -1391,15 +1405,17 @@ Scrub::schedule_result_t PG::sched_scrub()
    // An interrupted recovery repair could leave this set.
    state_clear(PG_STATE_REPAIR);
  
-  // Pass control to the scrubber. It is the scrubber that handles the replicas'
-  // resources reservations.
+  // Pass control to the scrubber. It is the scrubber that handles the
+  // replicas' resources reservations.
    m_scrubber->set_op_parameters(m_planned_scrub);
  
+  // using the OSD queue, as to not execute the scrub code as part of the tick.
    dout(10) << __func__ << ": queueing" << dendl;
    osd->queue_for_scrub(this, Scrub::scrub_prio_t::low_priority);
    return schedule_result_t::scrub_initiated;
  }
  
+
  double PG::next_deepscrub_interval() const
  {
    double deep_scrub_interval =
diff --git a/src/osd/PG.h b/src/osd/PG.h

index 8713b1c8ae887de905b6f168e97529104749e285..d9acfafd03285271fe3b9023aab3915278f446fb 100644 (file)
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -709,11 +709,16 @@ public:
    virtual void on_shutdown() = 0;
  
    bool get_must_scrub() const;
-  Scrub::schedule_result_t sched_scrub();
  
-  unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority, unsigned int suggested_priority) const;
+  Scrub::schedule_result_t start_scrubbing(
+      Scrub::OSDRestrictions osd_restrictions);
+
+  unsigned int scrub_requeue_priority(
+      Scrub::scrub_prio_t with_priority,
+      unsigned int suggested_priority) const;
    /// the version that refers to flags_.priority
    unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority) const;
+
  private:
    // auxiliaries used by sched_scrub():
    double next_deepscrub_interval() const;
diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc

index 536c4479b1d30ebd3aca8707257e034f9b9c519f..fb21e0e1c5ee904bb8288ca6d2f61cf773c78b7c 100644 (file)
--- a/src/osd/scrubber/osd_scrub.cc
+++ b/src/osd/scrubber/osd_scrub.cc
@@ -71,7 +71,7 @@ void OsdScrub::initiate_scrub(bool is_recovery_active)
  {
    const utime_t scrub_time = ceph_clock_now();
    dout(10) << fmt::format(
-                 "time now:{}, recover is active?:{}", scrub_time,
+                 "time now:{:s}, recovery is active?:{}", scrub_time,
                   is_recovery_active)
            << dendl;
  
@@ -116,8 +116,7 @@ void OsdScrub::initiate_scrub(bool is_recovery_active)
      // we have a candidate to scrub. But we may fail when trying to initiate that
      // scrub. For some failures - we can continue with the next candidate. For
      // others - we should stop trying to scrub at this tick.
-    auto res = initiate_a_scrub(
-       candidate, env_restrictions.allow_requested_repair_only);
+    auto res = initiate_a_scrub(candidate, env_restrictions);
  
      if (res == schedule_result_t::target_specific_failure) {
        // continue with the next job.
@@ -190,7 +189,7 @@ Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing(
  
  Scrub::schedule_result_t OsdScrub::initiate_a_scrub(
      spg_t pgid,
-    bool allow_requested_repair_only)
+    Scrub::OSDRestrictions restrictions)
  {
    dout(20) << fmt::format("trying pg[{}]", pgid) << dendl;
  
@@ -205,23 +204,8 @@ Scrub::schedule_result_t OsdScrub::initiate_a_scrub(
      return Scrub::schedule_result_t::target_specific_failure;
    }
  
-  // This one is already scrubbing, so go on to the next scrub job
-  if (locked_pg->pg()->is_scrub_queued_or_active()) {
-    dout(10) << fmt::format("pg[{}]: scrub already in progress", pgid) << dendl;
-    return Scrub::schedule_result_t::target_specific_failure;
-  }
-  // Skip other kinds of scrubbing if only explicitly requested repairing is allowed
-  if (allow_requested_repair_only &&
-      !locked_pg->pg()->get_planned_scrub().must_repair) {
-    dout(10) << fmt::format(
-                   "skipping pg[{}] as repairing was not explicitly "
-                   "requested for that pg",
-                   pgid)
-            << dendl;
-    return Scrub::schedule_result_t::target_specific_failure;
-  }
-
-  return locked_pg->pg()->sched_scrub();
+  // later on, here is where the scrub target would be dequeued
+  return locked_pg->pg()->start_scrubbing(restrictions);
  }
  
  void OsdScrub::on_config_change()
diff --git a/src/osd/scrubber/osd_scrub.h b/src/osd/scrubber/osd_scrub.h

index fcc4fd3fe9c52ed6d36d85e3000b33fe8a9a1b0d..774076711e2d0cb9484009fc343be033bb1a9f0f 100644 (file)
--- a/src/osd/scrubber/osd_scrub.h
+++ b/src/osd/scrubber/osd_scrub.h
@@ -193,7 +193,7 @@ class OsdScrub {
     */
    Scrub::schedule_result_t initiate_a_scrub(
        spg_t pgid,
-      bool allow_requested_repair_only);
+      Scrub::OSDRestrictions restrictions);
  
    /// resource reservation management
    Scrub::ScrubResources m_resource_bookkeeper;
author	Ronen Friedman <rfriedma@redhat.com>
	Thu, 7 Dec 2023 14:55:30 +0000 (08:55 -0600)
committer	Ronen Friedman <rfriedma@redhat.com>
	Mon, 11 Dec 2023 07:26:02 +0000 (01:26 -0600)
src/osd/PG.cc		patch \| blob \| history
src/osd/PG.h		patch \| blob \| history
src/osd/scrubber/osd_scrub.cc		patch \| blob \| history
src/osd/scrubber/osd_scrub.h		patch \| blob \| history