]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: exempt only operator scrubs from max_scrubs limit 59020/head
authorRonen Friedman <rfriedma@redhat.com>
Mon, 29 Jul 2024 14:11:50 +0000 (09:11 -0500)
committerRonen Friedman <rfriedma@redhat.com>
Mon, 5 Aug 2024 10:38:19 +0000 (05:38 -0500)
Existing code exempts all 'high priority' scrubs, including for example
'after_repair' and 'mandatory on invalid history' scrubs from the limit.

PGs that do not have valid last-scrub data (which is what we have when
a pool is first created) - are set to shallow-scrub immediately.
Unfortunately - this type of scrub is (in the low granularity implemented
in existing code) is 'high priority'.
Which means that a newly created pool will have all its PGs start
scrubbing, regardless of concurrency (or any other) limits.

Fixes: https://tracker.ceph.com/issues/67253
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
(cherry picked from commit babd65e412266f5c734f7a2b57d87657d3470c47)
conflict resolution:
- eliminating irrelevant 'main' code that was picked into this branch.
- the code to set the scrub_job's flag moved to osd_scrub_sched.cc,
  where the corresponding function is.

src/osd/scrubber/osd_scrub_sched.cc
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/scrub_job.h

index 079e2a7e7aed6b30b7cde8302185135b073ee943..a7531a6cde2f9eaf545aa11041cc9aa620f787f4 100644 (file)
@@ -150,6 +150,7 @@ void ScrubQueue::update_job(Scrub::ScrubJobRef scrub_job,
   // adjust the suggested scrub time according to OSD-wide status
   auto adjusted = adjust_target_time(suggested);
   scrub_job->high_priority = suggested.is_must == must_scrub_t::mandatory;
+  scrub_job->observes_max_concurrency = suggested.observes_max_scrubs;
   scrub_job->update_schedule(adjusted, reset_nb);
 }
 
index b2648bcf87c0e2d897a0bdbff637fe58b5e510a1..e716d5d6e962d5787280735920b84b637bb6259e 100644 (file)
@@ -511,6 +511,7 @@ sched_params_t PgScrubber::determine_scrub_time(
     // Set the smallest time that isn't utime_t()
     res.proposed_time = PgScrubber::scrub_must_stamp();
     res.is_must = Scrub::must_scrub_t::mandatory;
+    res.observes_max_scrubs = false;
     // we do not need the interval data in this case
 
   } else if (
@@ -643,7 +644,7 @@ bool PgScrubber::reserve_local()
   // a wrapper around the actual reservation, and that object releases
   // the local resource automatically when reset.
   m_local_osd_resource = m_osds->get_scrub_services().inc_scrubs_local(
-      m_scrub_job->is_high_priority());
+      !m_scrub_job->observes_max_concurrency);
   if (m_local_osd_resource) {
     dout(15) << __func__ << ": local resources reserved" << dendl;
     return true;
index 98a73477b092772288e71f54a461319d793bd47c..4a802a4f10097394d79745208835ed633fba0b2b 100644 (file)
@@ -46,6 +46,7 @@ struct sched_params_t {
   double min_interval{0.0};
   double max_interval{0.0};
   must_scrub_t is_must{must_scrub_t::not_mandatory};
+  bool observes_max_scrubs{true};
 };
 
 class ScrubJob final : public RefCountedObject {
@@ -92,6 +93,13 @@ class ScrubJob final : public RefCountedObject {
 
   bool high_priority{false};
 
+  /**
+   * If cleared: the scrub can be initiated even if the local OSD has reached
+   * osd_max_scrubs. Only 'false' for those high-priority scrubs that were
+   * operator initiated.
+   */
+  bool observes_max_concurrency{true};
+
   ScrubJob(CephContext* cct, const spg_t& pg, int node_id);
 
   utime_t get_sched_time() const { return schedule.not_before; }