osd/scrub: remove the 'penalized jobs' queue

author Ronen Friedman <rfriedma@redhat.com>

Sat, 30 Dec 2023 12:36:26 +0000 (06:36 -0600)

committer Ronen Friedman <rfriedma@redhat.com>

Mon, 22 Jan 2024 13:25:25 +0000 (07:25 -0600)
author Ronen Friedman <rfriedma@redhat.com>
Sat, 30 Dec 2023 12:36:26 +0000 (06:36 -0600)
committer Ronen Friedman <rfriedma@redhat.com>
Mon, 22 Jan 2024 13:25:25 +0000 (07:25 -0600)
diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h

index ff02ca5730f78c80a9940bf9b90459a1f072595c..66f39a91ac4d16a92e8349eab6eb63c4b682b04d 100644 (file)
--- a/src/osd/osd_types.h
+++ b/src/osd/osd_types.h
@@ -403,7 +403,7 @@ struct pg_t {
    uint32_t m_seed;
  
    pg_t() : m_pool(0), m_seed(0) {}
-  pg_t(ps_t seed, uint64_t pool) :
+  constexpr pg_t(ps_t seed, uint64_t pool) :
      m_pool(pool), m_seed(seed) {}
    // cppcheck-suppress noExplicitConstructor
    pg_t(const ceph_pg& cpg) :
@@ -521,7 +521,7 @@ struct spg_t {
    pg_t pgid;
    shard_id_t shard;
    spg_t() : shard(shard_id_t::NO_SHARD) {}
-  spg_t(pg_t pgid, shard_id_t shard) : pgid(pgid), shard(shard) {}
+  constexpr spg_t(pg_t pgid, shard_id_t shard) : pgid(pgid), shard(shard) {}
    explicit spg_t(pg_t pgid) : pgid(pgid), shard(shard_id_t::NO_SHARD) {}
    auto operator<=>(const spg_t&) const = default;
    unsigned get_split_bits(unsigned pg_num) const {
diff --git a/src/osd/scrubber/osd_scrub_sched.cc b/src/osd/scrubber/osd_scrub_sched.cc

index 1b3506a35e5021653d0ca2660043f05904403c44..bb72ae4cb3fb99664acac75b51ab02ec186b75fc 100644 (file)
--- a/src/osd/scrubber/osd_scrub_sched.cc
+++ b/src/osd/scrubber/osd_scrub_sched.cc
@@ -188,75 +188,27 @@ sched_params_t ScrubQueue::determine_scrub_time(
  }
  
  
-// used under jobs_lock
-void ScrubQueue::move_failed_pgs(utime_t now_is)
-{
-  int punished_cnt{0}; // for log/debug only
-
-  for (auto job = to_scrub.begin(); job != to_scrub.end();) {
-    if ((*job)->resources_failure) {
-      auto sjob = *job;
-
-      // last time it was scheduled for a scrub, this PG failed in securing
-      // remote resources. Move it to the secondary scrub queue.
-
-      dout(15) << "moving " << sjob->pgid
-              << " state: " << ScrubJob::qu_state_text(sjob->state) << dendl;
-
-      // determine the penalty time, after which the job should be reinstated
-      utime_t after = now_is;
-      after += conf()->osd_scrub_sleep * 2 + utime_t{300'000ms};
-
-      // note: currently - not taking 'deadline' into account when determining
-      // 'penalty_timeout'.
-      sjob->penalty_timeout = after;
-      sjob->resources_failure = false;
-      sjob->updated = false;  // as otherwise will be pardoned immediately
-
-      // place in the penalty list, and remove from the to-scrub group
-      penalized.push_back(sjob);
-      job = to_scrub.erase(job);
-      punished_cnt++;
-    } else {
-      job++;
-    }
-  }
-
-  if (punished_cnt) {
-    dout(15) << "# of jobs penalized: " << punished_cnt << dendl;
-  }
-}
-
  std::vector<ScrubTargetId> ScrubQueue::ready_to_scrub(
      OSDRestrictions restrictions,  // note: 4B in size! (copy)
      utime_t scrub_tick)
  {
    dout(10) << fmt::format(
-                 " @{:s}: reg./pen. sizes: {} / {} ({})", scrub_tick,
-                 to_scrub.size(), penalized.size(), restrictions)
+                 " @{:s}: registered: {} ({})", scrub_tick,
+                 to_scrub.size(), restrictions)
            << dendl;
+
    //  create a list of candidates (copying, as otherwise creating a deadlock):
-  //  - possibly restore penalized
    //  - (if we didn't handle directly) remove invalid jobs
    //  - create a copy of the to_scrub (possibly up to first not-ripe)
-  //  - same for the penalized (although that usually be a waste)
    //  unlock, then try the lists
-
    std::unique_lock lck{jobs_lock};
  
-  // pardon all penalized jobs that have deadlined (or were updated)
-  scan_penalized(restore_penalized, scrub_tick);
-  restore_penalized = false;
-
    // remove the 'updated' flag from all entries
    std::for_each(
        to_scrub.begin(), to_scrub.end(),
        [](const auto& jobref) -> void { jobref->updated = false; });
  
-  // add failed scrub attempts to the penalized list
-  move_failed_pgs(scrub_tick);
-
-  // collect all valid & ripe jobs from the two lists. Note that we must copy,
+  // collect all valid & ripe jobs. Note that we must copy,
    // as when we use the lists we will not be holding jobs_lock (see
    // explanation above)
  
@@ -264,7 +216,6 @@ std::vector<ScrubTargetId> ScrubQueue::ready_to_scrub(
    // transformed into a vector of targets (which, in this phase, are
    // the PG id-s).
    auto to_scrub_copy = collect_ripe_jobs(to_scrub, restrictions, scrub_tick);
-  auto penalized_copy = collect_ripe_jobs(penalized, restrictions, scrub_tick);
    lck.unlock();
  
    std::vector<ScrubTargetId> all_ready;
@@ -272,13 +223,6 @@ std::vector<ScrubTargetId> ScrubQueue::ready_to_scrub(
        to_scrub_copy.cbegin(), to_scrub_copy.cend(),
        std::back_inserter(all_ready),
        [](const auto& jobref) -> ScrubTargetId { return jobref->pgid; });
-  // not bothering to handle the "reached the penalized - so all should be
-  // forgiven" case, as the penalty queue is destined to be removed in a
-  // followup PR.
-  std::transform(
-      penalized_copy.cbegin(), penalized_copy.cend(),
-      std::back_inserter(all_ready),
-      [](const auto& jobref) -> ScrubTargetId { return jobref->pgid; });
    return all_ready;
  }
  
@@ -389,71 +333,29 @@ Scrub::scrub_schedule_t ScrubQueue::adjust_target_time(
  }
  
  
-// note: called with jobs_lock held
-void ScrubQueue::scan_penalized(bool forgive_all, utime_t time_now)
-{
-  dout(20) << time_now << (forgive_all ? " all " : " - ") << penalized.size()
-          << dendl;
-
-  // clear dead entries (deleted PGs, or those PGs we are no longer their
-  // primary)
-  rm_unregistered_jobs(penalized);
-
-  if (forgive_all) {
-
-    std::copy(penalized.begin(), penalized.end(), std::back_inserter(to_scrub));
-    penalized.clear();
-
-  } else {
-
-    auto forgiven_last = std::partition(
-      penalized.begin(),
-      penalized.end(),
-      [time_now](const auto& e) {
-       return (*e).updated || ((*e).penalty_timeout <= time_now);
-      });
-
-    std::copy(penalized.begin(), forgiven_last, std::back_inserter(to_scrub));
-    penalized.erase(penalized.begin(), forgiven_last);
-    dout(20) << "penalized after screening: " << penalized.size() << dendl;
-  }
-}
-
  void ScrubQueue::dump_scrubs(ceph::Formatter* f) const
  {
    ceph_assert(f != nullptr);
    std::lock_guard lck(jobs_lock);
  
    f->open_array_section("scrubs");
-
    std::for_each(
        to_scrub.cbegin(), to_scrub.cend(),
        [&f](const Scrub::ScrubJobRef& j) { j->dump(f); });
-
-  std::for_each(
-      penalized.cbegin(), penalized.cend(),
-      [&f](const Scrub::ScrubJobRef& j) { j->dump(f); });
-
    f->close_section();
  }
  
  ScrubQContainer ScrubQueue::list_registered_jobs() const
  {
    ScrubQContainer all_jobs;
-  all_jobs.reserve(to_scrub.size() + penalized.size());
+  all_jobs.reserve(to_scrub.size());
    dout(20) << " size: " << all_jobs.capacity() << dendl;
  
    std::lock_guard lck{jobs_lock};
-
    std::copy_if(to_scrub.begin(),
                to_scrub.end(),
                std::back_inserter(all_jobs),
                registered_job);
-  std::copy_if(penalized.begin(),
-              penalized.end(),
-              std::back_inserter(all_jobs),
-              registered_job);
-
    return all_jobs;
  }
  
diff --git a/src/osd/scrubber/osd_scrub_sched.h b/src/osd/scrubber/osd_scrub_sched.h

index bd6de1c9347894c38f0e32e77c2acab8f43dde09..83578dff7d935a5e7e7daf291c0e379a38a99e84 100644 (file)
--- a/src/osd/scrubber/osd_scrub_sched.h
+++ b/src/osd/scrubber/osd_scrub_sched.h
@@ -30,7 +30,6 @@
  │                                        │
  │                                        │
  │  ScrubQContainer    to_scrub <>────────┼────────┐
-│  ScrubQContainer    penalized          │        │
  │                                        │        │
  │                                        │        │
  │  OSD_wide resource counters            │        │
@@ -146,11 +145,6 @@ class ScrubSchedListener {
   * the queue of PGs waiting to be scrubbed.
   * Main operations are scheduling/unscheduling a PG to be scrubbed at a certain
   * time.
- *
- * A "penalty" queue maintains those PGs that have failed to reserve the
- * resources of their replicas. The PGs in this list will be reinstated into the
- * scrub queue when all eligible PGs were already handled, or after a timeout
- * (or if their deadline has passed [[disabled at this time]]).
   */
  class ScrubQueue {
   public:
@@ -282,8 +276,6 @@ class ScrubQueue {
    mutable ceph::mutex jobs_lock = ceph::make_mutex("ScrubQueue::jobs_lock");
  
    Scrub::ScrubQContainer to_scrub;   ///< scrub jobs (i.e. PGs) to scrub
-  Scrub::ScrubQContainer penalized;  ///< those that failed to reserve remote resources
-  bool restore_penalized{false};
  
    static inline constexpr auto registered_job = [](const auto& jobref) -> bool {
      return jobref->state == Scrub::qu_state_t::registered;
@@ -293,11 +285,6 @@ class ScrubQueue {
      return jobref->state == Scrub::qu_state_t::not_registered;
    };
  
-  /**
-   * Are there scrub jobs that should be reinstated?
-   */
-  void scan_penalized(bool forgive_all, utime_t time_now);
-
    /**
     * clear dead entries (unregistered, or belonging to removed PGs) from a
     * queue. Job state is changed to match new status.
@@ -353,16 +340,6 @@ class ScrubQueue {
    Scrub::scrub_schedule_t adjust_target_time(
      const Scrub::sched_params_t& recomputed_params) const;
  
-  /**
-   * Look for scrub jobs that have their 'resources_failure' set. These jobs
-   * have failed to acquire remote resources last time we've initiated a scrub
-   * session on them. They are now moved from the 'to_scrub' queue to the
-   * 'penalized' set.
-   *
-   * locking: called with job_lock held
-   */
-  void move_failed_pgs(utime_t now_is);
-
  protected: // used by the unit-tests
    /**
     * unit-tests will override this function to return a mock time
diff --git a/src/osd/scrubber/scrub_job.h b/src/osd/scrubber/scrub_job.h

index 11e7388f6362469ee51d3bb167841dd3be4e4422..b75141ea179c33707dfc29ff79b2590e2612b140 100644 (file)
--- a/src/osd/scrubber/scrub_job.h
+++ b/src/osd/scrubber/scrub_job.h
@@ -77,8 +77,6 @@ class ScrubJob final : public RefCountedObject {
     * 'updated' is a temporary flag, used to create a barrier after
     * 'sched_time' and 'deadline' (or any other job entry) were modified by
     * different task.
-   * 'updated' also signals the need to move a job back from the penalized
-   * queue to the regular one.
     */
    std::atomic_bool updated{false};
  
@@ -89,8 +87,6 @@ class ScrubJob final : public RefCountedObject {
    bool blocked{false};
    utime_t blocked_since{};
  
-  utime_t penalty_timeout{0, 0};
-
    CephContext* cct;
  
    bool high_priority{false};
@@ -231,12 +227,11 @@ struct formatter<Scrub::ScrubJob> {
    {
      return fmt::format_to(
         ctx.out(),
-       "pg[{}] @ {:s} (dl:{:s}) - <{}> / failure: {} / pen. t.o.: {:s} / "
-       "queue "
-       "state: {:.7}",
-       sjob.pgid, sjob.schedule.scheduled_at, sjob.schedule.deadline,
-       sjob.registration_state(), sjob.resources_failure, sjob.penalty_timeout,
-       sjob.state.load(std::memory_order_relaxed));
+       "pg[{}] @ {:s} (dl:{:s}) - <{}> / failure: {} / queue state: "
+       "{:.7}",
+       sjob.pgid, sjob.schedule.scheduled_at,
+       sjob.schedule.deadline, sjob.registration_state(),
+       sjob.resources_failure, sjob.state.load(std::memory_order_relaxed));
    }
  };
  
diff --git a/src/test/osd/test_scrub_sched.cc b/src/test/osd/test_scrub_sched.cc

index da8fb3bb5e3ec80a52a3ade906d8f95ac67c5f78..afeb32b21d9d28582155067102b26fac5a158a7a 100644 (file)
--- a/src/test/osd/test_scrub_sched.cc
+++ b/src/test/osd/test_scrub_sched.cc
@@ -57,7 +57,6 @@ class ScrubSchedTestWrapper : public ScrubQueue {
    void rm_unregistered_jobs()
    {
      ScrubQueue::rm_unregistered_jobs(to_scrub);
-    ScrubQueue::rm_unregistered_jobs(penalized);
    }
  
    ScrubQContainer collect_ripe_jobs()
author	Ronen Friedman <rfriedma@redhat.com>
	Sat, 30 Dec 2023 12:36:26 +0000 (06:36 -0600)
committer	Ronen Friedman <rfriedma@redhat.com>
	Mon, 22 Jan 2024 13:25:25 +0000 (07:25 -0600)
src/osd/osd_types.h		patch \| blob \| history
src/osd/scrubber/osd_scrub_sched.cc		patch \| blob \| history
src/osd/scrubber/osd_scrub_sched.h		patch \| blob \| history
src/osd/scrubber/scrub_job.h		patch \| blob \| history
src/test/osd/test_scrub_sched.cc		patch \| blob \| history