]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: introduce a 'not before' attribute for scrub jobs
authorRonen Friedman <rfriedma@redhat.com>
Sun, 31 Dec 2023 16:18:09 +0000 (10:18 -0600)
committerRonen Friedman <rfriedma@redhat.com>
Mon, 22 Jan 2024 13:25:25 +0000 (07:25 -0600)
The NB enables the OSD to delay the next attempt to schedule a specific
scrub job.  This is useful for jobs that have failed for whatever
reason, especially if the primary has failed to acquire the replicas.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/scrubber/osd_scrub.cc
src/osd/scrubber/osd_scrub.h
src/osd/scrubber/osd_scrub_sched.cc
src/osd/scrubber/osd_scrub_sched.h
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/scrub_job.cc
src/osd/scrubber/scrub_job.h

index e5abe03feaad6f2b0e6fe8814d5bff4317e6b7c7..a721dc1cb9c040e7a9b400c412e1fcff0c03dc6e 100644 (file)
@@ -429,9 +429,10 @@ Scrub::sched_params_t OsdScrub::determine_scrub_time(
 
 void OsdScrub::update_job(
     Scrub::ScrubJobRef sjob,
-    const Scrub::sched_params_t& suggested)
+    const Scrub::sched_params_t& suggested,
+    bool reset_notbefore)
 {
-  m_queue.update_job(sjob, suggested);
+  m_queue.update_job(sjob, suggested, reset_notbefore);
 }
 
 void OsdScrub::register_with_osd(
index ce7b8524d69cb3b1d64e147ea0e98068872b6f64..407eae059079f60d773536b1072c661180168a56 100644 (file)
@@ -90,21 +90,23 @@ class OsdScrub {
    *   the registration will be with "beginning of time" target, making the
    *   scrub-job eligible to immediate scrub (given that external conditions
    *   do not prevent scrubbing)
-   *
    * - 'must' is asserted, and the suggested time is 'now':
    *   This happens if our stats are unknown. The results are similar to the
    *   previous scenario.
-   *
    * - not a 'must': we take the suggested time as a basis, and add to it some
    *   configuration / random delays.
-   *
    *  ('must' is Scrub::sched_params_t.is_must)
    *
+   *  'reset_notbefore' is used to reset the 'not_before' time to the updated
+   *  'scheduled_at' time. This is used whenever the scrub-job schedule is
+   *  updated not as a result of a scrub attempt failure.
+   *
    *  locking: not using the jobs_lock
    */
   void update_job(
       Scrub::ScrubJobRef sjob,
-      const Scrub::sched_params_t& suggested);
+      const Scrub::sched_params_t& suggested,
+      bool reset_notbefore);
 
   /**
    * Add the scrub job to the list of jobs (i.e. list of PGs) to be periodically
index bb72ae4cb3fb99664acac75b51ab02ec186b75fc..836bf740ec08769641e0bebc0d2071fe4ebe8453 100644 (file)
@@ -94,7 +94,7 @@ void ScrubQueue::register_with_osd(
   switch (state_at_entry) {
     case qu_state_t::registered:
       // just updating the schedule?
-      update_job(scrub_job, suggested);
+      update_job(scrub_job, suggested, false /* keep n.b. delay */);
       break;
 
     case qu_state_t::not_registered:
@@ -110,7 +110,7 @@ void ScrubQueue::register_with_osd(
          break;
        }
 
-       update_job(scrub_job, suggested);
+       update_job(scrub_job, suggested, true /* resets not_before */);
        to_scrub.push_back(scrub_job);
        scrub_job->in_queues = true;
        scrub_job->state = qu_state_t::registered;
@@ -124,7 +124,7 @@ void ScrubQueue::register_with_osd(
        // at any minute
        std::lock_guard lck{jobs_lock};
 
-       update_job(scrub_job, suggested);
+       update_job(scrub_job, suggested, true /* resets not_before */);
        if (scrub_job->state == qu_state_t::not_registered) {
          dout(5) << " scrub job state changed to 'not registered'" << dendl;
          to_scrub.push_back(scrub_job);
@@ -138,18 +138,19 @@ void ScrubQueue::register_with_osd(
   dout(10) << fmt::format(
                "pg[{}] sched-state changed from <{:.14}> to <{:.14}> (@{:s})",
                scrub_job->pgid, state_at_entry, scrub_job->state.load(),
-               scrub_job->schedule.scheduled_at)
+               scrub_job->schedule.not_before)
           << dendl;
 }
 
-// look mommy - no locks!
+
 void ScrubQueue::update_job(Scrub::ScrubJobRef scrub_job,
-                           const sched_params_t& suggested)
+                           const sched_params_t& suggested,
+                            bool reset_nb)
 {
   // adjust the suggested scrub time according to OSD-wide status
   auto adjusted = adjust_target_time(suggested);
-  scrub_job->update_schedule(adjusted);
   scrub_job->high_priority = suggested.is_must == must_scrub_t::mandatory;
+  scrub_job->update_schedule(adjusted, reset_nb);
 }
 
 sched_params_t ScrubQueue::determine_scrub_time(
@@ -262,7 +263,7 @@ ScrubQContainer ScrubQueue::collect_ripe_jobs(
     utime_t time_now)
 {
   auto filtr = [time_now, rst = restrictions](const auto& jobref) -> bool {
-    return jobref->schedule.scheduled_at <= time_now &&
+    return jobref->schedule.not_before <= time_now &&
           (!rst.high_priority_only || jobref->high_priority) &&
           (!rst.only_deadlined || (!jobref->schedule.deadline.is_zero() &&
                                    jobref->schedule.deadline <= time_now));
@@ -280,7 +281,8 @@ ScrubQContainer ScrubQueue::collect_ripe_jobs(
     for (const auto& jobref : group) {
       if (!filtr(jobref)) {
        dout(20) << fmt::format(
-                       " not ripe: {} @ {:s}", jobref->pgid,
+                       " not ripe: {} @ {:s} ({:s})", jobref->pgid,
+                        jobref->schedule.not_before,
                        jobref->schedule.scheduled_at)
                 << dendl;
       }
@@ -295,7 +297,7 @@ Scrub::scrub_schedule_t ScrubQueue::adjust_target_time(
   const sched_params_t& times) const
 {
   Scrub::scrub_schedule_t sched_n_dead{
-    times.proposed_time, times.proposed_time};
+    times.proposed_time, times.proposed_time, times.proposed_time};
 
   if (times.is_must == Scrub::must_scrub_t::not_mandatory) {
     // unless explicitly requested, postpone the scrub with a random delay
index 83578dff7d935a5e7e7daf291c0e379a38a99e84..d94ca8cf7579962d7fe241ceae5ed01b090ca366 100644 (file)
@@ -202,19 +202,23 @@ class ScrubQueue {
    *   the registration will be with "beginning of time" target, making the
    *   scrub-job eligible to immediate scrub (given that external conditions
    *   do not prevent scrubbing)
-   *
    * - 'must' is asserted, and the suggested time is 'now':
    *   This happens if our stats are unknown. The results are similar to the
    *   previous scenario.
-   *
    * - not a 'must': we take the suggested time as a basis, and add to it some
    *   configuration / random delays.
-   *
    *  ('must' is sched_params_t.is_must)
    *
+   *  'reset_notbefore' is used to reset the 'not_before' time to the updated
+   *  'scheduled_at' time. This is used whenever the scrub-job schedule is
+   *  updated not as a result of a scrub attempt failure.
+   *
    *  locking: not using the jobs_lock
    */
-  void update_job(Scrub::ScrubJobRef sjob, const sched_params_t& suggested);
+  void update_job(
+      Scrub::ScrubJobRef sjob,
+      const sched_params_t& suggested,
+      bool reset_notbefore);
 
   sched_params_t determine_scrub_time(const requested_scrub_t& request_flags,
                                      const pg_info_t& pg_info,
index 2dae53273a7059a5b4c338ce0a69c0b7791e550b..9cd082a87947063a7d1ec420e3522da0fedffbe3 100644 (file)
@@ -547,7 +547,7 @@ void PgScrubber::update_scrub_job(const requested_scrub_t& request_flags)
     ceph_assert(m_pg->is_locked());
     auto suggested = m_osds->get_scrub_services().determine_scrub_time(
        request_flags, m_pg->info, m_pg->get_pgpool().info.opts);
-    m_osds->get_scrub_services().update_job(m_scrub_job, suggested);
+    m_osds->get_scrub_services().update_job(m_scrub_job, suggested, true);
     m_pg->publish_stats_to_osd();
   }
 
@@ -2126,7 +2126,7 @@ pg_scrubbing_status_t PgScrubber::get_schedule() const
                  !m_planned_scrub.must_deep_scrub;
 
   // are we ripe for scrubbing?
-  if (now_is > m_scrub_job->schedule.scheduled_at) {
+  if (now_is > m_scrub_job->schedule.not_before) {
     // we are waiting for our turn at the OSD.
     return pg_scrubbing_status_t{m_scrub_job->schedule.scheduled_at,
                                 0,
@@ -2136,7 +2136,7 @@ pg_scrubbing_status_t PgScrubber::get_schedule() const
                                 periodic};
   }
 
-  return pg_scrubbing_status_t{m_scrub_job->schedule.scheduled_at,
+  return pg_scrubbing_status_t{m_scrub_job->schedule.not_before,
                               0,
                               pg_scrub_sched_status_t::scheduled,
                               false,
index 35071af5fd510c9bc3079bb3c69fe9e1109f7f32..1875641e754a3d9df4cf4d3764d41b4df3f29f12 100644 (file)
@@ -43,18 +43,30 @@ ostream& operator<<(ostream& out, const ScrubJob& sjob)
 }
 }  // namespace std
 
-void ScrubJob::update_schedule(const Scrub::scrub_schedule_t& adjusted)
+void ScrubJob::update_schedule(
+    const Scrub::scrub_schedule_t& adjusted,
+    bool reset_nb)
 {
-  schedule = adjusted;
-  penalty_timeout = utime_t(0, 0);  // helps with debugging
+  dout(15)
+      << fmt::format(
+            "was: nb:{:s}({:s}). Called with: rest?{} nb:{:s} ({:s}) ({})",
+            schedule.not_before, schedule.scheduled_at, reset_nb,
+            adjusted.not_before, adjusted.scheduled_at, registration_state())
+      << dendl;
+  schedule.scheduled_at = adjusted.scheduled_at;
+  schedule.deadline = adjusted.deadline;
+
+  if (reset_nb || schedule.not_before < schedule.scheduled_at) {
+    schedule.not_before = schedule.scheduled_at;
+  }
 
   // 'updated' is changed here while not holding jobs_lock. That's OK, as
   // the (atomic) flag will only be cleared by select_pg_and_scrub() after
   // scan_penalized() is called and the job was moved to the to_scrub queue.
   updated = true;
   dout(10) << fmt::format(
-                 "adjusted: {:s} ({})", schedule.scheduled_at,
-                 registration_state())
+                 "adjusted: nb:{:s} ({:s}) ({})", schedule.not_before,
+                 schedule.scheduled_at, registration_state())
           << dendl;
 }
 
@@ -67,15 +79,14 @@ std::string ScrubJob::scheduling_state(utime_t now_is, bool is_deep_expected)
   }
 
   // if the time has passed, we are surely in the queue
-  // (note that for now we do not tell client if 'penalized')
-  if (now_is > schedule.scheduled_at) {
+  if (now_is > schedule.not_before) {
     // we are never sure that the next scrub will indeed be shallow:
     return fmt::format("queued for {}scrub", (is_deep_expected ? "deep " : ""));
   }
 
   return fmt::format(
-      "{}scrub scheduled @ {:s}", (is_deep_expected ? "deep " : ""),
-      schedule.scheduled_at);
+      "{}scrub scheduled @ {:s} ({:s})", (is_deep_expected ? "deep " : ""),
+      schedule.not_before, schedule.scheduled_at);
 }
 
 std::ostream& ScrubJob::gen_prefix(std::ostream& out, std::string_view fn) const
@@ -100,7 +111,8 @@ void ScrubJob::dump(ceph::Formatter* f) const
 {
   f->open_object_section("scrub");
   f->dump_stream("pgid") << pgid;
-  f->dump_stream("sched_time") << schedule.scheduled_at;
+  f->dump_stream("sched_time") << schedule.not_before;
+  f->dump_stream("orig_sched_time") << schedule.scheduled_at;
   f->dump_stream("deadline") << schedule.deadline;
   f->dump_bool("forced",
               schedule.scheduled_at == PgScrubber::scrub_must_stamp());
index b75141ea179c33707dfc29ff79b2590e2612b140..e6765c767b9685c12cd6696c4856fcbfe0ff821b 100644 (file)
@@ -38,6 +38,7 @@ enum class qu_state_t {
 struct scrub_schedule_t {
   utime_t scheduled_at{};
   utime_t deadline{0, 0};
+  utime_t not_before{utime_t::max()};
 };
 
 struct sched_params_t {
@@ -66,7 +67,7 @@ class ScrubJob final : public RefCountedObject {
 
   /**
    * the old 'is_registered'. Set whenever the job is registered with the OSD,
-   * i.e. is in either the 'to_scrub' or the 'penalized' vectors.
+   * i.e. is in 'to_scrub'.
    */
   std::atomic_bool in_queues{false};
 
@@ -93,7 +94,7 @@ class ScrubJob final : public RefCountedObject {
 
   ScrubJob(CephContext* cct, const spg_t& pg, int node_id);
 
-  utime_t get_sched_time() const { return schedule.scheduled_at; }
+  utime_t get_sched_time() const { return schedule.not_before; }
 
   static std::string_view qu_state_text(qu_state_t st);
 
@@ -107,7 +108,15 @@ class ScrubJob final : public RefCountedObject {
     return qu_state_text(state.load(std::memory_order_relaxed));
   }
 
-  void update_schedule(const scrub_schedule_t& adjusted);
+  /**
+   * 'reset_failure_penalty' is used to reset the 'not_before' jo attribute to
+   * the updated 'scheduled_at' time. This is used whenever the scrub-job
+   * schedule is updated, and the update is not a result of a scrub attempt
+   * failure.
+   */
+  void update_schedule(
+      const scrub_schedule_t& adjusted,
+      bool reset_failure_penalty);
 
   void dump(ceph::Formatter* f) const;
 
@@ -227,9 +236,9 @@ struct formatter<Scrub::ScrubJob> {
   {
     return fmt::format_to(
        ctx.out(),
-       "pg[{}] @ {:s} (dl:{:s}) - <{}> / failure: {} / queue state: "
+       "pg[{}] @ {:s} ({:s}) (dl:{:s}) - <{}> / failure: {} / queue state: "
        "{:.7}",
-       sjob.pgid, sjob.schedule.scheduled_at,
+       sjob.pgid, sjob.schedule.not_before, sjob.schedule.scheduled_at,
        sjob.schedule.deadline, sjob.registration_state(),
        sjob.resources_failure, sjob.state.load(std::memory_order_relaxed));
   }