osd: Try other PGs when reservation failures occur

author David Zafman <dzafman@redhat.com>

Fri, 15 Jan 2021 04:39:32 +0000 (20:39 -0800)

committer David Zafman <dzafman@redhat.com>

Fri, 5 Mar 2021 19:41:26 +0000 (11:41 -0800)
author David Zafman <dzafman@redhat.com>
Fri, 15 Jan 2021 04:39:32 +0000 (20:39 -0800)
committer David Zafman <dzafman@redhat.com>
Fri, 5 Mar 2021 19:41:26 +0000 (11:41 -0800)
diff --git a/PendingReleaseNotes b/PendingReleaseNotes

index 0b7d0cdba557c267d5130554fd9921b4ea50ab64..a2395da4430765e9960b3411d410913945e9db4a 100644 (file)
--- a/PendingReleaseNotes
+++ b/PendingReleaseNotes
@@ -84,6 +84,9 @@
    is enabled. This helps with the monitor logs on larger clusters, that may get
    many 'osd.X reported immediately failed by osd.Y' messages, and confuse tools.
  
+* Scubs are more aggressive in trying to find more simultaneous possible PGs within osd_max_scrubs limitation.
+  It is possible that increasing osd_scrub_sleep may be necessary to maintain client responsiveness.
+
  >=15.0.0
  --------
  
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc

index fd2d630ad1e630e813101d368a5381bc2101cf8b..8046fd860f42441ab846e38988d566ab5aa3aac6 100644 (file)
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -7617,6 +7617,13 @@ void OSD::sched_scrub()
         continue;
        }
  
+      // If this one couldn't reserve, skip for now
+      if (pg->get_reserve_failed()) {
+       pg->unlock();
+       dout(20) << __func__ << " pg  " << scrub_job.pgid << " reserve failed, skipped" << dendl;
+        continue;
+      }
+
        // This has already started, so go on to the next scrub job
        if (pg->is_scrub_active()) {
         pg->unlock();
@@ -7636,7 +7643,7 @@ void OSD::sched_scrub()
        if (pg->m_scrubber->is_reserving()) {
         pg->unlock();
         dout(10) << __func__ << ": reserve in progress pgid " << scrub_job.pgid << dendl;
-       break;
+       goto out;
        }
        dout(15) << "sched_scrub scrubbing " << scrub_job.pgid << " at " << scrub_job.sched_time
                << (pg->get_must_scrub() ? ", explicitly requested" :
@@ -7645,11 +7652,34 @@ void OSD::sched_scrub()
        if (pg->sched_scrub()) {
         pg->unlock();
          dout(10) << __func__ << " scheduled a scrub!" << " (~" << scrub_job.pgid << "~)" << dendl;
-       break;
+       goto out;
        }
+      // If this is set now we must have had a local reserve failure, so can't scrub anything right now
+      if (pg->get_reserve_failed()) {
+       pg->unlock();
+       dout(20) << __func__ << " pg  " << scrub_job.pgid << " local reserve failed, nothing to be done now" << dendl;
+        goto out;
+      }
+
        pg->unlock();
      } while (service.next_scrub_stamp(scrub_job, &scrub_job));
+
+    // Clear reserve_failed from all pending PGs, so we try again
+    if (service.first_scrub_stamp(&scrub_job)) {
+      do {
+        if (scrub_job.sched_time > now)
+         break;
+        PGRef pg = _lookup_lock_pg(scrub_job.pgid);
+       // If we can't lock, it's ok we can get it next time
+        if (!pg)
+         continue;
+        pg->clear_reserve_failed();
+        pg->unlock();
+      } while (service.next_scrub_stamp(scrub_job, &scrub_job));
+    }
    }
+
+out:
    dout(20) << "sched_scrub done" << dendl;
  }
  
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index 604d6b1e328454e196f863bb858095f49afd702a..02c54f2ad0e5217d4468f1326bea424258f3daa1 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -1362,6 +1362,7 @@ bool PG::sched_scrub()
    // be retried by the OSD later on.
    if (!m_scrubber->reserve_local()) {
      dout(10) << __func__ << ": failed to reserve locally" << dendl;
+    set_reserve_failed();
      return false;
    }
  
diff --git a/src/osd/PG.h b/src/osd/PG.h

index 37e96b2bf90af82e8c429a99b8245fad38708fd2..c7dc441e3c3d479b6ed5dd9ffff1806beb8b9ff8 100644 (file)
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -186,6 +186,10 @@ public:
    /// scrubbing state for both Primary & replicas
    bool is_scrub_active() const { return m_scrubber->is_scrub_active(); }
  
+  bool get_reserve_failed() const { return m_scrubber->get_reserve_failed(); }
+  void set_reserve_failed() { m_scrubber->set_reserve_failed(); }
+  void clear_reserve_failed() { m_scrubber->clear_reserve_failed(); }
+
  public:
    // -- members --
    const coll_t coll;
diff --git a/src/osd/pg_scrubber.h b/src/osd/pg_scrubber.h

index bbbac38ca7565f2fe06e0cc76fa9468cbf1f813e..69fa6b0fcebdb842dbfdb0d94afcebe02b152311 100644 (file)
--- a/src/osd/pg_scrubber.h
+++ b/src/osd/pg_scrubber.h
@@ -412,6 +412,10 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
  
    [[nodiscard]] bool is_scrub_active() const final { return m_active; }
  
+  [[nodiscard]] bool get_reserve_failed() const final { return m_reserve_failed; }
+  void set_reserve_failed() final { m_reserve_failed = true; }
+  void clear_reserve_failed() final { m_reserve_failed = false; }
+
   private:
    void reset_internal_state();
  
@@ -536,6 +540,9 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
  
    bool m_active{false};
  
+  // This PG could not get all the scrub reservations
+  bool m_reserve_failed{false};
+
    eversion_t m_subset_last_update{};
  
    std::unique_ptr<Scrub::Store> m_store;
diff --git a/src/osd/scrub_machine.cc b/src/osd/scrub_machine.cc

index 2a2ee8732bda928de2464eec32d8e16154020928..64d79e7236732d479fe8a32423b03ce63cc5e85c 100644 (file)
--- a/src/osd/scrub_machine.cc
+++ b/src/osd/scrub_machine.cc
@@ -91,6 +91,8 @@ sc::result ReservingReplicas::react(const ReservationFailure&)
    DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
    dout(10) << "ReservingReplicas::react(const ReservationFailure&)" << dendl;
  
+  // Mark PG so that we will try other PGs, before coming back to this one
+  scrbr->set_reserve_failed();
    // the Scrubber must release all resources and abort the scrubbing
    scrbr->clear_pgscrub_state();
    return transit<NotActive>();
diff --git a/src/osd/scrub_machine_lstnr.h b/src/osd/scrub_machine_lstnr.h

index 2b96161215474759da1b398bae71797d0d6efe27..b2139773b4d59f103e2119ec694592fc4962f715 100644 (file)
--- a/src/osd/scrub_machine_lstnr.h
+++ b/src/osd/scrub_machine_lstnr.h
@@ -114,6 +114,10 @@ struct ScrubMachineListener {
  
    virtual void unreserve_replicas() = 0;
  
+  [[nodiscard]] virtual bool get_reserve_failed() const = 0;
+  virtual void set_reserve_failed() = 0;
+  virtual void clear_reserve_failed() = 0;
+
    /**
     * the FSM interface into the "are we waiting for maps, either our own or from
     * replicas" state.
diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h

index 15a6cdf4dede485e14f395feff4d6bcfd8280820..3f3a618f8249b692f038d4f9272ca6f5667fc2bb 100644 (file)
--- a/src/osd/scrubber_common.h
+++ b/src/osd/scrubber_common.h
@@ -150,6 +150,10 @@ struct ScrubPgIF {
     */
    [[nodiscard]] virtual bool is_scrub_active() const = 0;
  
+  [[nodiscard]] virtual bool get_reserve_failed() const = 0;
+  virtual void set_reserve_failed() = 0;
+  virtual void clear_reserve_failed() = 0;
+
    /// are we waiting for resource reservation grants form our replicas?
    [[nodiscard]] virtual bool is_reserving() const = 0;
author	David Zafman <dzafman@redhat.com>
	Fri, 15 Jan 2021 04:39:32 +0000 (20:39 -0800)
committer	David Zafman <dzafman@redhat.com>
	Fri, 5 Mar 2021 19:41:26 +0000 (11:41 -0800)
PendingReleaseNotes		patch \| blob \| history
src/osd/OSD.cc		patch \| blob \| history
src/osd/PG.cc		patch \| blob \| history
src/osd/PG.h		patch \| blob \| history
src/osd/pg_scrubber.h		patch \| blob \| history
src/osd/scrub_machine.cc		patch \| blob \| history
src/osd/scrub_machine_lstnr.h		patch \| blob \| history
src/osd/scrubber_common.h		patch \| blob \| history