]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: maintain a set of remote reservations
authorRonen Friedman <rfriedma@redhat.com>
Sun, 12 Nov 2023 15:25:22 +0000 (09:25 -0600)
committerRonen Friedman <rfriedma@redhat.com>
Sat, 18 Nov 2023 17:44:32 +0000 (11:44 -0600)
... instead of a simple counter.

This - as a preparation for the next commit, which will decouple
the "being reserved" state from the handling of scrub requests.
The planned changes to the scrub state machine will make
it harder to know when to clear the "being reserved" state.
The changes here would allow us to err on the side of caution,
i.e. trying to "un-count" a remote reservation even if it was not
actually reserved or was already deleted.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
qa/standalone/scrub/osd-scrub-dump.sh
src/osd/scrubber/osd_scrub.cc
src/osd/scrubber/osd_scrub.h
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/scrub_resources.cc
src/osd/scrubber/scrub_resources.h

index 0129114625aa31efb8311dca2cf5f78e4ffba306..644f82d80716e02364de1f00f2b819af34a6bab7 100755 (executable)
@@ -123,7 +123,7 @@ function TEST_recover_unexpected() {
        for o in $(seq 0 $(expr $OSDS - 1))
        do
                CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations
-               scrubs=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations | jq '.scrubs_local + .scrubs_remote')
+               scrubs=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.$o) dump_scrub_reservations | jq '.scrubs_local + .granted_reservations')
                if [ $scrubs -gt $MAX_SCRUBS ]; then
                    echo "ERROR: More than $MAX_SCRUBS currently reserved"
                    return 1
index e3a71e2623475e86696bf41d690b39f1d68e25b1..99367170dbac3e0ccd35f3b8da79a9a85d907a56 100644 (file)
@@ -441,14 +441,14 @@ void OsdScrub::dec_scrubs_local()
   m_resource_bookkeeper.dec_scrubs_local();
 }
 
-bool OsdScrub::inc_scrubs_remote()
+bool OsdScrub::inc_scrubs_remote(pg_t pgid)
 {
-  return m_resource_bookkeeper.inc_scrubs_remote();
+  return m_resource_bookkeeper.inc_scrubs_remote(pgid);
 }
 
-void OsdScrub::dec_scrubs_remote()
+void OsdScrub::dec_scrubs_remote(pg_t pgid)
 {
-  m_resource_bookkeeper.dec_scrubs_remote();
+  m_resource_bookkeeper.dec_scrubs_remote(pgid);
 }
 
 void OsdScrub::mark_pg_scrub_blocked(spg_t blocked_pg)
index 570430660ed08675043b7b147441cef90cea32bc..56167df2ee6e4f0fe2f87efc1103db9708f5951e 100644 (file)
@@ -67,8 +67,8 @@ class OsdScrub {
   // updating the resource counters
   bool inc_scrubs_local();
   void dec_scrubs_local();
-  bool inc_scrubs_remote();
-  void dec_scrubs_remote();
+  bool inc_scrubs_remote(pg_t pgid);
+  void dec_scrubs_remote(pg_t pgid);
 
   // counting the number of PGs stuck while scrubbing, waiting for objects
   void mark_pg_scrub_blocked(spg_t blocked_pg);
index 4cd861b89c8f493c8db525daf36a1f2d35e1044f..70d314f0d2f6e83442ceaa16d4444aa1a045d521 100644 (file)
@@ -218,7 +218,7 @@ void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued)
 
 void PgScrubber::dec_scrubs_remote()
 {
-  m_osds->get_scrub_services().dec_scrubs_remote();
+  m_osds->get_scrub_services().dec_scrubs_remote(m_pg_id.pgid);
 }
 
 void PgScrubber::advance_token()
@@ -1708,7 +1708,7 @@ void PgScrubber::handle_scrub_reserve_request(OpRequestRef op)
   if (m_pg->cct->_conf->osd_scrub_during_recovery ||
       !m_osds->is_recovery_active()) {
 
-    granted = m_osds->get_scrub_services().inc_scrubs_remote();
+    granted = m_osds->get_scrub_services().inc_scrubs_remote(m_pg_id.pgid);
     if (granted) {
       m_fsm->process_event(ReplicaGrantReservation{});
     } else {
index 179bd5e7e0e61767d29333e43f1c57f69f8fa004..25dcec2399f132924cccd419847e67f024d0b83f 100644 (file)
@@ -4,10 +4,12 @@
 #include "./scrub_resources.h"
 
 #include <fmt/format.h>
+#include <fmt/ranges.h>
 
 #include "common/debug.h"
 
 #include "include/ceph_assert.h"
+#include "osd/osd_types_fmt.h"
 
 
 using ScrubResources = Scrub::ScrubResources;
@@ -22,25 +24,25 @@ ScrubResources::ScrubResources(
 bool ScrubResources::can_inc_scrubs() const
 {
   std::lock_guard lck{resource_lock};
-  if (scrubs_local + scrubs_remote < conf->osd_max_scrubs) {
+  if (scrubs_local + granted_reservations.size() < conf->osd_max_scrubs) {
     return true;
   }
   log_upwards(fmt::format(
       "{}== false. {} (local) + {} (remote) >= max ({})", __func__,
-      scrubs_local, scrubs_remote, conf->osd_max_scrubs));
+      scrubs_local, granted_reservations.size(), conf->osd_max_scrubs));
   return false;
 }
 
 bool ScrubResources::inc_scrubs_local()
 {
   std::lock_guard lck{resource_lock};
-  if (scrubs_local + scrubs_remote < conf->osd_max_scrubs) {
+  if (scrubs_local + granted_reservations.size() < conf->osd_max_scrubs) {
     ++scrubs_local;
     return true;
   }
   log_upwards(fmt::format(
       "{}: {} (local) + {} (remote) >= max ({})", __func__, scrubs_local,
-      scrubs_remote, conf->osd_max_scrubs));
+      granted_reservations.size(), conf->osd_max_scrubs));
   return false;
 }
 
@@ -49,42 +51,56 @@ void ScrubResources::dec_scrubs_local()
   std::lock_guard lck{resource_lock};
   log_upwards(fmt::format(
       "{}: {} -> {} (max {}, remote {})", __func__, scrubs_local,
-      (scrubs_local - 1), conf->osd_max_scrubs, scrubs_remote));
+      (scrubs_local - 1), conf->osd_max_scrubs, granted_reservations.size()));
   --scrubs_local;
   ceph_assert(scrubs_local >= 0);
 }
 
-bool ScrubResources::inc_scrubs_remote()
+bool ScrubResources::inc_scrubs_remote(pg_t pgid)
 {
   std::lock_guard lck{resource_lock};
-  if (scrubs_local + scrubs_remote < conf->osd_max_scrubs) {
+
+  // if this PG is already reserved - it's probably a benign bug.
+  // report it, but do not fail the reservation.
+  if (granted_reservations.contains(pgid)) {
+    log_upwards(fmt::format("{}: pg[{}] already reserved", __func__, pgid));
+    return true;
+  }
+
+  auto prev = granted_reservations.size();
+  if (scrubs_local + prev < conf->osd_max_scrubs) {
+    granted_reservations.insert(pgid);
     log_upwards(fmt::format(
-       "{}: {} -> {} (max {}, local {})", __func__, scrubs_remote,
-       (scrubs_remote + 1), conf->osd_max_scrubs, scrubs_local));
-    ++scrubs_remote;
+       "{}: pg[{}] {} -> {} (max {}, local {})", __func__, pgid, prev,
+       granted_reservations.size(), conf->osd_max_scrubs, scrubs_local));
     return true;
   }
 
   log_upwards(fmt::format(
-      "{}: {} (local) + {} (remote) >= max ({})", __func__, scrubs_local,
-      scrubs_remote, conf->osd_max_scrubs));
+      "{}: pg[{}] {} (local) + {} (remote) >= max ({})", __func__, pgid,
+      scrubs_local, granted_reservations.size(), conf->osd_max_scrubs));
   return false;
 }
 
-void ScrubResources::dec_scrubs_remote()
+void ScrubResources::dec_scrubs_remote(pg_t pgid)
 {
   std::lock_guard lck{resource_lock};
-  log_upwards(fmt::format(
-      "{}: {} -> {} (max {}, local {})", __func__, scrubs_remote,
-      (scrubs_remote - 1), conf->osd_max_scrubs, scrubs_local));
-  --scrubs_remote;
-  ceph_assert(scrubs_remote >= 0);
+  // we might not have this PG in the set (e.g. if we are concluding a
+  // high priority scrub, one that does not require reservations)
+  auto cnt = granted_reservations.erase(pgid);
+  if (cnt) {
+    log_upwards(fmt::format(
+       "{}: remote reservation for {} removed -> {} (max {}, local {})",
+       __func__, pgid, granted_reservations.size(), conf->osd_max_scrubs,
+       scrubs_local));
+  }
 }
 
 void ScrubResources::dump_scrub_reservations(ceph::Formatter* f) const
 {
   std::lock_guard lck{resource_lock};
   f->dump_int("scrubs_local", scrubs_local);
-  f->dump_int("scrubs_remote", scrubs_remote);
+  f->dump_int("granted_reservations", granted_reservations.size());
+  f->dump_string("PGs being served", fmt::format("{}", granted_reservations));
   f->dump_int("osd_max_scrubs", conf->osd_max_scrubs);
 }
index 890ee5d0e2fae37d707d3c52da564ff5ae47b6cd..724e206ee27e97821d0c0074a95f0c265a525215 100644 (file)
@@ -8,6 +8,7 @@
 #include "common/ceph_mutex.h"
 #include "common/config_proxy.h"
 #include "common/Formatter.h"
+#include "osd/osd_types.h"
 
 namespace Scrub {
 
@@ -28,8 +29,9 @@ class ScrubResources {
   /// the number of concurrent scrubs performed by Primaries on this OSD
   int scrubs_local{0};
 
-  /// the number of active scrub reservations granted by replicas
-  int scrubs_remote{0};
+  /// the set of PGs that have active scrub reservations as replicas
+  /// \todo come C++23 - consider std::flat_set<pg_t>
+  std::set<pg_t> granted_reservations;
 
   mutable ceph::mutex resource_lock =
       ceph::make_mutex("ScrubQueue::resource_lock");
@@ -56,10 +58,10 @@ class ScrubResources {
   void dec_scrubs_local();
 
   /// increments the number of scrubs acting as a Replica
-  bool inc_scrubs_remote();
+  bool inc_scrubs_remote(pg_t pgid);
 
   /// decrements the number of scrubs acting as a Replica
-  void dec_scrubs_remote();
+  void dec_scrubs_remote(pg_t pgid);
 
   void dump_scrub_reservations(ceph::Formatter* f) const;
 };