osd/scrub: fix the conditions for auto-repair scrubs

author Ronen Friedman <rfriedma@redhat.com>

Thu, 15 Aug 2024 12:51:15 +0000 (07:51 -0500)

committer Ronen Friedman <rfriedma@redhat.com>

Sun, 25 Aug 2024 13:01:00 +0000 (08:01 -0500)
author Ronen Friedman <rfriedma@redhat.com>
Thu, 15 Aug 2024 12:51:15 +0000 (07:51 -0500)
committer Ronen Friedman <rfriedma@redhat.com>
Sun, 25 Aug 2024 13:01:00 +0000 (08:01 -0500)
diff --git a/qa/standalone/scrub/osd-recovery-scrub.sh b/qa/standalone/scrub/osd-recovery-scrub.sh

index 3d3121fe8d80cb177a19f30128f7eb0a924daa07..1f4319e3ad5d169eaaec0ce76b342fcea3a570d1 100755 (executable)
--- a/qa/standalone/scrub/osd-recovery-scrub.sh
+++ b/qa/standalone/scrub/osd-recovery-scrub.sh
@@ -242,7 +242,7 @@ function TEST_recovery_scrub_2() {
      setup $dir || return 1
      run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
      run_mgr $dir x || return 1
-    local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 "
+    local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0.1 "
      ceph_osd_args+="--osd_scrub_backoff_ratio=0 "
      ceph_osd_args+="--osd_stats_update_period_not_scrubbing=3 "
      ceph_osd_args+="--osd_stats_update_period_scrubbing=2"
diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc

index dfba0ee56f0b5dd6860a35d14340f46b20de90d5..1708fc26b442214c4391886ba42a48881a52ea3f 100644 (file)
--- a/src/osd/scrubber/osd_scrub.cc
+++ b/src/osd/scrubber/osd_scrub.cc
@@ -115,13 +115,15 @@ void OsdScrub::initiate_scrub(bool is_recovery_active)
    const auto env_restrictions =
        restrictions_on_scrubbing(is_recovery_active, scrub_time);
  
-  dout(10) << fmt::format("scrub scheduling (@tick) starts. "
-                          "time now:{:s}, recovery is active?:{} restrictions:{}",
-                          scrub_time, is_recovery_active, env_restrictions)
+  dout(10) << fmt::format(
+                 "scrub scheduling (@tick) starts. "
+                 "time now:{:s}, recovery is active?:{} restrictions:{}",
+                 scrub_time, is_recovery_active, env_restrictions)
            << dendl;
  
    if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>() &&
-      !env_restrictions.high_priority_only) {
+      !env_restrictions.max_concurrency_reached &&
+      !env_restrictions.random_backoff_active) {
      debug_log_all_jobs();
    }
  
@@ -165,7 +167,8 @@ bool OsdScrub::is_sched_target_eligible(
        ScrubJob::observes_max_concurrency(e.urgency)) {
      return false;
    }
-  if (!r.load_is_low && ScrubJob::observes_random_backoff(e.urgency)) {
+  if (r.random_backoff_active &&
+      ScrubJob::observes_random_backoff(e.urgency)) {
      return false;
    }
    if (!r.time_permit && ScrubJob::observes_allowed_hours(e.urgency)) {
@@ -174,7 +177,9 @@ bool OsdScrub::is_sched_target_eligible(
    if (!r.load_is_low && ScrubJob::observes_load_limit(e.urgency)) {
      return false;
    }
-  // recovery?
+  if (r.recovery_in_progress && ScrubJob::observes_recovery(e.urgency)) {
+    return false;
+  }
    return true;
  }
  
@@ -190,13 +195,12 @@ Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing(
    if (!m_resource_bookkeeper.can_inc_scrubs()) {
      // our local OSD is already running too many scrubs
      dout(15) << "OSD cannot inc scrubs" << dendl;
-    env_conditions.high_priority_only = true;
+    env_conditions.max_concurrency_reached = true;
  
    } else if (scrub_random_backoff()) {
      // dice-roll says we should not scrub now
-      dout(15) << "Lost in dice. Only high priority scrubs allowed."
-              << dendl;
-      env_conditions.high_priority_only = true;
+    dout(15) << "Lost in dice. Only high priority scrubs allowed." << dendl;
+    env_conditions.random_backoff_active = true;
  
    } else if (is_recovery_active && !conf->osd_scrub_during_recovery) {
      if (conf->osd_repair_during_recovery) {
@@ -207,9 +211,9 @@ Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing(
        env_conditions.allow_requested_repair_only = true;
  
      } else {
-      dout(15) << "recovery in progress. Only high priority scrubs allowed."
+      dout(15) << "recovery in progress. Operator-initiated scrubs only."
                << dendl;
-      env_conditions.high_priority_only = true;
+      env_conditions.recovery_in_progress = true;
      }
    } else {
  
diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc

index 7b58c2cfdfce5a2238e5f2957fc340ab22f88392..1d2f2dc09a1c041fce2d887a81dba2515b301c5c 100644 (file)
--- a/src/osd/scrubber/pg_scrubber.cc
+++ b/src/osd/scrubber/pg_scrubber.cc
@@ -710,12 +710,21 @@ bool PgScrubber::is_after_repair_required() const
  }
  
  
+/**
+ * mark for a deep-scrub after the current scrub ended with errors.
+ * Note that no need to requeue the target, as it will be requeued
+ * when the scrub ends.
+ */
  void PgScrubber::request_rescrubbing(requested_scrub_t& request_flags)
  {
    dout(10) << __func__ << " flags: " << request_flags << dendl;
  
    request_flags.need_auto = true;
-  update_scrub_job(delay_ready_t::no_delay);
+  auto& trgt = m_scrub_job->get_target(scrub_level_t::deep);
+  trgt.up_urgency_to(urgency_t::must_repair);
+  trgt.sched_info.schedule.scheduled_at = {0, 0};
+  trgt.sched_info.schedule.not_before = ceph_clock_now();
+  // no need to requeue, as scrub_finish() will do that.
  }
  
  
@@ -1709,7 +1718,7 @@ void PgScrubber::set_op_parameters(
    m_epoch_start = m_pg->get_osdmap_epoch();
  
    m_flags.check_repair = m_active_target->urgency() == urgency_t::after_repair;
-  m_flags.auto_repair = request.auto_repair || request.need_auto;
+  m_flags.auto_repair = false;
    m_flags.required = request.req_scrub || request.must_scrub;
  
    m_flags.priority = (request.must_scrub || request.need_auto)
@@ -1733,6 +1742,9 @@ void PgScrubber::set_op_parameters(
    m_is_deep = m_active_target->sched_info.level == scrub_level_t::deep;
    if (m_is_deep) {
      state_set(PG_STATE_DEEP_SCRUB);
+    if (pg_cond.can_autorepair || request.auto_repair) {
+      m_flags.auto_repair = true;
+    }
    } else {
      ceph_assert(!request.must_deep_scrub);
      ceph_assert(!request.need_auto);
@@ -2105,9 +2117,6 @@ void PgScrubber::scrub_finish()
      ceph_assert(tr == 0);
    }
  
-  // determine the next scrub time
-  update_scrub_job(delay_ready_t::delay_ready);
-
    if (has_error) {
      m_pg->queue_peering_event(PGPeeringEventRef(
        std::make_shared<PGPeeringEvent>(get_osdmap_epoch(),
@@ -2123,6 +2132,8 @@ void PgScrubber::scrub_finish()
    if (do_auto_scrub) {
      request_rescrubbing(m_planned_scrub);
    }
+  // determine the next scrub time
+  update_scrub_job(delay_ready_t::delay_ready);
  
    if (m_pg->is_active() && m_pg->is_primary()) {
      m_pg->recovery_state.share_pg_info();
diff --git a/src/osd/scrubber/scrub_job.cc b/src/osd/scrubber/scrub_job.cc

index 45a300aaafe6864b064bbff6f758231575770aa9..ee33ee06706f7aa94474f56fb65919bfd1f06176 100644 (file)
--- a/src/osd/scrubber/scrub_job.cc
+++ b/src/osd/scrubber/scrub_job.cc
@@ -397,3 +397,8 @@ bool ScrubJob::observes_random_backoff(urgency_t urgency)
  {
    return urgency < urgency_t::after_repair;
  }
+
+bool ScrubJob::observes_recovery(urgency_t urgency)
+{
+  return urgency < urgency_t::operator_requested;
+}
diff --git a/src/osd/scrubber/scrub_job.h b/src/osd/scrubber/scrub_job.h

index 256e0d2caa178b6403a32da71f7a0c7dacc29b3e..98a3e101f9bfa030f3d4873a58f8549bfd9087ba 100644 (file)
--- a/src/osd/scrubber/scrub_job.h
+++ b/src/osd/scrubber/scrub_job.h
@@ -344,6 +344,7 @@ class ScrubJob {
   *  | noscrub    |    yes     |      no?     |     no   |      no     |
   *  | max-scrubs |    yes     |      yes?    |     no   |      no     |
   *  | backoff    |    yes     |      no      |     no   |      no     |
+ *  | recovery   |    yes     |      no      |     no   |      no     |
   *  +------------+------------+--------------+----------+-------------+
   */
  
@@ -362,7 +363,10 @@ class ScrubJob {
  
    static bool observes_max_concurrency(urgency_t urgency);
  
-  static bool observes_random_backoff(urgency_t urgency);};
+  static bool observes_random_backoff(urgency_t urgency);
+
+  static bool observes_recovery(urgency_t urgency);
+};
  }  // namespace Scrub
  
  namespace std {
diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h

index c14834db8aae61fd9248da74fbc7b052ba76d2e9..5d6fe2902146a8e07c8367064f48f34530401bdb 100644 (file)
--- a/src/osd/scrubber_common.h
+++ b/src/osd/scrubber_common.h
@@ -84,12 +84,21 @@ using act_token_t = uint32_t;
  /// (note: struct size should be kept small, as it is copied around)
  struct OSDRestrictions {
    /// high local OSD concurrency. Thus - only high priority scrubs are allowed
-  bool high_priority_only{false};
-  bool allow_requested_repair_only{false};
-  bool only_deadlined{false};
+  bool max_concurrency_reached{false};
+
+  /// rolled a dice, and decided not to scrub in this tick
+  bool random_backoff_active{false};
+
+  /// the OSD is performing recovery & osd_repair_during_recovery is 'true'
+  bool allow_requested_repair_only:1{false};
+
+  /// the load is high, or the time is not right. For periodic scrubs,
+  /// only the overdue ones are allowed.
+  bool only_deadlined:1{false};
    bool load_is_low:1{true};
    bool time_permit:1{true};
-  bool max_concurrency_reached:1{false};
+  /// the OSD is performing a recovery, osd_scrub_during_recovery is 'false',
+  /// and so is osd_repair_during_recovery
    bool recovery_in_progress:1{false};
  };
  static_assert(sizeof(Scrub::OSDRestrictions) <= sizeof(uint32_t));
@@ -185,13 +194,13 @@ struct formatter<Scrub::OSDRestrictions> {
    auto format(const Scrub::OSDRestrictions& conds, FormatContext& ctx) const
    {
      return fmt::format_to(
-      ctx.out(),
-      "priority-only:{},overdue-only:{},load:{},time:{},repair-only:{}",
-        conds.high_priority_only,
-        conds.only_deadlined,
-        conds.load_is_low ? "ok" : "high",
-        conds.time_permit ? "ok" : "no",
-        conds.allow_requested_repair_only);
+       ctx.out(), "<{}.{}.{}.{}.{}.{}>",
+       conds.max_concurrency_reached ? "max-scrubs" : "",
+       conds.random_backoff_active ? "backoff" : "",
+       conds.load_is_low ? "" : "high-load",
+       conds.time_permit ? "" : "time-restrict",
+       conds.recovery_in_progress ? "recovery" : "",
+       conds.allow_requested_repair_only ? "repair-only" : "");
    }
  };
author	Ronen Friedman <rfriedma@redhat.com>
	Thu, 15 Aug 2024 12:51:15 +0000 (07:51 -0500)
committer	Ronen Friedman <rfriedma@redhat.com>
	Sun, 25 Aug 2024 13:01:00 +0000 (08:01 -0500)
qa/standalone/scrub/osd-recovery-scrub.sh		patch \| blob \| history
src/osd/scrubber/osd_scrub.cc		patch \| blob \| history
src/osd/scrubber/pg_scrubber.cc		patch \| blob \| history
src/osd/scrubber/scrub_job.cc		patch \| blob \| history
src/osd/scrubber/scrub_job.h		patch \| blob \| history
src/osd/scrubber_common.h		patch \| blob \| history