]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: extracting and wrapping scrub resource management objects
authorRonen Friedman <rfriedma@redhat.com>
Thu, 12 Nov 2020 17:38:44 +0000 (19:38 +0200)
committerRonen Friedman <rfriedma@redhat.com>
Thu, 10 Dec 2020 13:21:53 +0000 (15:21 +0200)
Auxiliary RAII wrappers around cluster-wide scrub resources
and data:

ReplicaReservations (Primary side): reserving/freeing scrub resources at the replicas.

LocalReservation (Primary): managing the local OSD's scrub resources

ReservedByRemotePrimary (replica side): being reserved/freed by the Primary

MapsCollectionStatus (Primary): tracking the availability of the chunk's scrub maps,
both the local one at the Primary, and the maps arriving from the replicas.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/CMakeLists.txt
src/osd/OSD.cc
src/osd/OSD.h
src/osd/PG.cc
src/osd/PG.h
src/osd/pg_scrubber.cc [new file with mode: 0644]
src/osd/pg_scrubber.h [new file with mode: 0644]
src/osd/scheduler/OpSchedulerItem.cc
src/osd/scheduler/OpSchedulerItem.h

index 7cd45171a73a1bdb82d04749cd9150cf38d6ebc9..d3b6c21c79e1c8a58859d7d406fc14d88c41f7e5 100644 (file)
@@ -11,6 +11,8 @@ endif()
 
 set(osd_srcs
   OSD.cc
+  pg_scrubber.cc
+  scrub_machine.cc
   Watch.cc
   ClassHandler.cc
   PG.cc
index db3ce3ce5929032329529b2e6c53ed3acf7b23de..bcc2a9de03b551c84f13ae98778c020df18436f1 100644 (file)
@@ -36,6 +36,8 @@
 #endif
 
 #include "osd/PG.h"
+#include "osd/scrub_machine.h"
+#include "osd/pg_scrubber.h"
 
 #include "include/types.h"
 #include "include/compat.h"
@@ -1739,6 +1741,32 @@ void OSDService::queue_for_snap_trim(PG *pg)
       pg->get_osdmap_epoch()));
 }
 
+template <class MSG_TYPE>
+void OSDService::queue_scrub_event_msg(PG* pg,
+                                      Scrub::scrub_prio_t with_priority,
+                                      unsigned int qu_priority)
+{
+  const auto epoch = pg->get_osdmap_epoch();
+  auto msg = new MSG_TYPE(pg->get_pgid(), epoch);
+  dout(15) << "queue a scrub event (" << *msg << ") for " << *pg << ". Epoch: " << epoch << dendl;
+
+  enqueue_back(OpSchedulerItem(
+    unique_ptr<OpSchedulerItem::OpQueueable>(msg), cct->_conf->osd_scrub_cost,
+    pg->scrub_requeue_priority(with_priority, qu_priority), ceph_clock_now(), 0, epoch));
+}
+
+template <class MSG_TYPE>
+void OSDService::queue_scrub_event_msg(PG* pg, Scrub::scrub_prio_t with_priority)
+{
+  const auto epoch = pg->get_osdmap_epoch();
+  auto msg = new MSG_TYPE(pg->get_pgid(), epoch);
+  dout(15) << "queue a scrub event (" << *msg << ") for " << *pg << ". Epoch: " << epoch << dendl;
+
+  enqueue_back(OpSchedulerItem(
+    unique_ptr<OpSchedulerItem::OpQueueable>(msg), cct->_conf->osd_scrub_cost,
+    pg->scrub_requeue_priority(with_priority), ceph_clock_now(), 0, epoch));
+}
+
 void OSDService::queue_for_scrub(PG *pg, bool with_high_priority)
 {
   unsigned scrub_queue_priority = pg->scrubber.priority;
@@ -1756,6 +1784,18 @@ void OSDService::queue_for_scrub(PG *pg, bool with_high_priority)
       epoch));
 }
 
+void OSDService::queue_for_scrub_granted(PG* pg, Scrub::scrub_prio_t with_priority)
+{
+  // Resulting scrub event: 'RemotesReserved'
+  queue_scrub_event_msg<PGScrubResourcesOK>(pg, with_priority);
+}
+
+void OSDService::queue_for_scrub_denied(PG* pg, Scrub::scrub_prio_t with_priority)
+{
+  // Resulting scrub event: 'ReservationFailure'
+  queue_scrub_event_msg<PGScrubDenied>(pg, with_priority);
+}
+
 void OSDService::queue_for_pg_delete(spg_t pgid, epoch_t e)
 {
   dout(10) << __func__ << " on " << pgid << " e " << e  << dendl;
index 85a783a14eaa6dc8f8ae8a9c915eca4eb0ede15b..92bb331365b0b17600255541598e5e81b12e9c3b 100644 (file)
@@ -603,6 +603,13 @@ public:
   void queue_recovery_context(PG *pg, GenContext<ThreadPool::TPHandle&> *c);
   void queue_for_snap_trim(PG *pg);
   void queue_for_scrub(PG *pg, bool with_high_priority);
+
+  /// queue the message (-> event) that all replicas reserved scrub resources for us
+  void queue_for_scrub_granted(PG* pg, Scrub::scrub_prio_t with_priority);
+
+  /// queue the message (-> event) that some replicas denied our scrub resources request
+  void queue_for_scrub_denied(PG* pg, Scrub::scrub_prio_t with_priority);
+
   void queue_for_pg_delete(spg_t pgid, epoch_t e);
   bool try_finish_pg_delete(PG *pg, unsigned old_pg_num);
 
@@ -611,6 +618,15 @@ private:
   ceph::mutex recovery_lock = ceph::make_mutex("OSDService::recovery_lock");
   std::list<std::pair<epoch_t, PGRef> > awaiting_throttle;
 
+  /// queue a scrub-related message for a PG
+  template<class MSG_TYPE>
+  void queue_scrub_event_msg(PG* pg, Scrub::scrub_prio_t with_priority, unsigned int qu_priority);
+
+  /// An alternative version of queue_scrub_event_msg(), in which the queuing priority is
+  /// provided by the executing scrub (i.e. taken from PgScrubber::m_flags)
+  template<class MSG_TYPE>
+  void queue_scrub_event_msg(PG* pg, Scrub::scrub_prio_t with_priority);
+
   utime_t defer_recovery_until;
   uint64_t recovery_ops_active;
   uint64_t recovery_ops_reserved;
index 9b66cb7cd4c64d3da822fec3b0d29ce505c33867..7090d14bc20caf65eba510179454a1a2e2a13cbd 100644 (file)
@@ -24,6 +24,7 @@
 #include "OSD.h"
 #include "OpRequest.h"
 #include "ScrubStore.h"
+#include "pg_scrubber.h"
 #include "Session.h"
 #include "osd/scheduler/OpSchedulerItem.h"
 
@@ -488,6 +489,20 @@ bool PG::queue_scrub()
   return true;
 }
 
+void PG::scrub_send_resources_granted(epoch_t epoch_queued,
+                                     [[maybe_unused]] ThreadPool::TPHandle& handle)
+{
+  dout(10) << __func__ << " queued at: " << epoch_queued << dendl;
+  //m_scrubber->send_remotes_reserved();
+}
+
+void PG::scrub_send_resources_denied(epoch_t epoch_queued,
+                                    [[maybe_unused]] ThreadPool::TPHandle& handle)
+{
+  dout(10) << __func__ << " queued at: " << epoch_queued << dendl;
+  //m_scrubber->send_reservation_failure();
+}
+
 unsigned PG::get_scrub_priority()
 {
   // a higher value -> a higher priority
@@ -1345,6 +1360,16 @@ void PG::requeue_map_waiters()
 }
 
 
+unsigned int PG::scrub_requeue_priority(Scrub::scrub_prio_t with_priority) const
+{
+  return 0; // next commit: m_scrubber->scrub_requeue_priority(with_priority);
+}
+
+unsigned int PG::scrub_requeue_priority(Scrub::scrub_prio_t with_priority, unsigned int suggested_priority) const
+{
+  return 0; // next commit: m_scrubber->scrub_requeue_priority(with_priority, suggested_priority);
+}
+
 // ==========================================================================================
 // SCRUB
 
index 5031861e816161f7e542a2d828784fe33fe62e3d..3c11c88b348e321f791cda42d2d41e620b81fbaa 100644 (file)
@@ -40,6 +40,7 @@
 #include "PeeringState.h"
 #include "recovery_types.h"
 #include "MissingLoc.h"
+#include "scrubber_common.h"
 
 #include "mgr/OSDPerfMetricTypes.h"
 
@@ -68,6 +69,9 @@ class DynamicPerfStats;
 
 namespace Scrub {
   class Store;
+  class ReplicaReservations;
+  class LocalReservation;
+  class ReservedByRemotePrimary;
 }
 
 #ifdef PG_DEBUG_REFS
@@ -164,10 +168,15 @@ class PGRecoveryStats {
 class PG : public DoutPrefixProvider, public PeeringState::PeeringListener {
   friend struct NamedState;
   friend class PeeringState;
+  friend class Scrub::ReplicaReservations;
+  friend class Scrub::LocalReservation;  // dout()-only friendship
+  friend class Scrub::ReservedByRemotePrimary;  //  dout()-only friendship
 
 public:
   const pg_shard_t pg_whoami;
   const spg_t pg_id;
+  /// flags detailing scheduling/operation characteristics of the next scrub 
+  requested_scrub_t m_planned_scrub;
 
 public:
   // -- members --
@@ -365,6 +374,10 @@ public:
 
   void scrub(epoch_t queued, ThreadPool::TPHandle &handle);
 
+  /// Queues a PGScrubResourcesOK message. Will translate into 'RemotesReserved' FSM event
+  void scrub_send_resources_granted(epoch_t queued, ThreadPool::TPHandle &handle);
+  void scrub_send_resources_denied(epoch_t queued, ThreadPool::TPHandle &handle);
+
   bool is_scrub_registered();
   void reg_next_scrub();
   void unreg_next_scrub();
@@ -516,6 +529,9 @@ public:
   }
   bool sched_scrub();
 
+  unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority, unsigned int suggested_priority) const;
+  /// the version that refers to flags_.priority
+  unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority) const;
   virtual void do_request(
     OpRequestRef& op,
     ThreadPool::TPHandle &handle
diff --git a/src/osd/pg_scrubber.cc b/src/osd/pg_scrubber.cc
new file mode 100644 (file)
index 0000000..763e776
--- /dev/null
@@ -0,0 +1,316 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "pg_scrubber.h"
+
+#include <iostream>
+#include <vector>
+
+#include "debug.h"
+
+#include "common/errno.h"
+#include "messages/MOSDOp.h"
+#include "messages/MOSDRepScrub.h"
+#include "messages/MOSDRepScrubMap.h"
+#include "messages/MOSDScrub.h"
+#include "messages/MOSDScrubReserve.h"
+
+#include "OSD.h"
+#include "ScrubStore.h"
+#include "scrub_machine.h"
+
+using namespace Scrub;
+using namespace std::chrono;
+using namespace std::chrono_literals;
+
+
+#define dout_context (m_pg->cct)
+#define dout_subsys ceph_subsys_osd
+#undef dout_prefix
+#define dout_prefix _prefix(_dout, this->m_pg)
+
+template <class T> static ostream& _prefix(std::ostream* _dout, T* t)
+{
+  return t->gen_prefix(*_dout) << " scrubber pg(" << t->pg_id << ") ";
+}
+
+ostream& operator<<(ostream& out, const requested_scrub_t& sf)
+{
+  if (sf.must_repair)
+    out << " MUST_REPAIR";
+  if (sf.auto_repair)
+    out << " planned AUTO_REPAIR";
+  if (sf.check_repair)
+    out << " planned CHECK_REPAIR";
+  if (sf.deep_scrub_on_error)
+    out << " planned DEEP_SCRUB_ON_ERROR";
+  if (sf.must_deep_scrub)
+    out << " MUST_DEEP_SCRUB";
+  if (sf.must_scrub)
+    out << " MUST_SCRUB";
+  if (sf.time_for_deep)
+    out << " TIME_FOR_DEEP";
+  if (sf.need_auto)
+    out << " NEED_AUTO";
+  if (sf.req_scrub)
+    out << " planned REQ_SCRUB";
+
+  return out;
+}
+
+// ///////////////////// ReplicaReservations //////////////////////////////////
+namespace Scrub {
+
+void ReplicaReservations::release_replica(pg_shard_t peer, epoch_t epoch)
+{
+  dout(15) << __func__ << " <ReplicaReservations> release-> " << peer << dendl;
+
+  auto m = new MOSDScrubReserve(spg_t(m_pg->info.pgid.pgid, peer.shard), epoch,
+                                   MOSDScrubReserve::RELEASE, m_pg->pg_whoami);
+  m_osds->send_message_osd_cluster(peer.osd, m, epoch);
+}
+
+ReplicaReservations::ReplicaReservations(PG* pg, pg_shard_t whoami)
+    : m_pg{pg}
+    , m_acting_set{pg->get_actingset()}
+    , m_osds{m_pg->osd}
+    , m_pending{static_cast<int>(m_acting_set.size()) - 1}
+{
+  epoch_t epoch = m_pg->get_osdmap_epoch();
+
+  // handle the special case of no replicas
+  if (m_pending <= 0) {
+    // just signal the scrub state-machine to continue
+    send_all_done();
+
+  } else {
+
+    for (auto p : m_acting_set) {
+      if (p == whoami)
+       continue;
+      auto m = new MOSDScrubReserve(spg_t(m_pg->info.pgid.pgid, p.shard), epoch,
+                                       MOSDScrubReserve::REQUEST, m_pg->pg_whoami);
+      m_osds->send_message_osd_cluster(p.osd, m, epoch);
+      m_waited_for_peers.push_back(p);
+      dout(10) << __func__ << " <ReplicaReservations> reserve<-> " << p.osd << dendl;
+    }
+  }
+}
+
+void ReplicaReservations::send_all_done()
+{
+  m_osds->queue_for_scrub_granted(m_pg, scrub_prio_t::low_priority);
+}
+
+void ReplicaReservations::send_reject()
+{
+  m_osds->queue_for_scrub_denied(m_pg, scrub_prio_t::low_priority);
+}
+
+void ReplicaReservations::release_all()
+{
+  dout(10) << __func__ << " " << m_reserved_peers << dendl;
+
+  m_had_rejections = true;  // preventing late-coming responses from triggering events
+  epoch_t epoch = m_pg->get_osdmap_epoch();
+
+  for (auto p : m_reserved_peers) {
+    release_replica(p, epoch);
+  }
+  m_reserved_peers.clear();
+
+  // note: the release will follow on the heels of the request. When tried otherwise,
+  // grants that followed a reject arrived after the whole scrub machine-state was
+  // reset, causing leaked reservations.
+  if (m_pending) {
+    for (auto p : m_waited_for_peers) {
+      release_replica(p, epoch);
+    }
+  }
+  m_waited_for_peers.clear();
+}
+
+ReplicaReservations::~ReplicaReservations()
+{
+  m_had_rejections = true;  // preventing late-coming responses from triggering events
+
+  // send un-reserve messages to all reserved replicas. We do not wait for answer (there
+  // wouldn't be one). Other incoming messages will be discarded on the way, by our
+  // owner.
+  release_all();
+}
+
+/**
+ *  @ATTN we would not reach here if the ReplicaReservation object managed by the
+ * scrubber was reset.
+ */
+void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from)
+{
+  dout(10) << __func__ << " <ReplicaReservations> granted-> " << from << dendl;
+  op->mark_started();
+
+  {
+    // reduce the amount of extra release messages. Not a must, but the log is cleaner
+    auto w = find(m_waited_for_peers.begin(), m_waited_for_peers.end(), from);
+    if (w != m_waited_for_peers.end())
+      m_waited_for_peers.erase(w);
+  }
+
+  // are we forced to reject the reservation?
+  if (m_had_rejections) {
+
+    dout(10) << " rejecting late-coming reservation from " << from << dendl;
+    release_replica(from, m_pg->get_osdmap_epoch());
+
+  } else if (std::find(m_reserved_peers.begin(), m_reserved_peers.end(), from) !=
+            m_reserved_peers.end()) {
+
+    dout(10) << " already had osd." << from << " reserved" << dendl;
+
+  } else {
+
+    dout(10) << " osd." << from << " scrub reserve = success" << dendl;
+    m_reserved_peers.push_back(from);
+    if (--m_pending == 0) {
+      send_all_done();
+    }
+  }
+}
+
+void ReplicaReservations::handle_reserve_reject(OpRequestRef op, pg_shard_t from)
+{
+  dout(10) << __func__ << " <ReplicaReservations> rejected-> " << from << dendl;
+  dout(10) << __func__ << " " << *op->get_req() << dendl;
+  op->mark_started();
+
+  {
+    // reduce the amount of extra release messages. Not a must, but the log is cleaner
+    auto w = find(m_waited_for_peers.begin(), m_waited_for_peers.end(), from);
+    if (w != m_waited_for_peers.end())
+      m_waited_for_peers.erase(w);
+  }
+
+  if (m_had_rejections) {
+
+    // our failure was already handled when the first rejection arrived
+    dout(15) << " ignoring late-coming rejection from " << from << dendl;
+
+  } else if (std::find(m_reserved_peers.begin(), m_reserved_peers.end(), from) !=
+            m_reserved_peers.end()) {
+
+    dout(15) << " already had osd." << from << " reserved" << dendl;
+
+  } else {
+
+    dout(10) << " osd." << from << " scrub reserve = fail" << dendl;
+    m_had_rejections = true;  // preventing any additional notifications
+    --m_pending;             // not sure we need this bookkeeping anymore
+    send_reject();
+  }
+}
+
+// ///////////////////// LocalReservation //////////////////////////////////
+
+LocalReservation::LocalReservation(PG* pg, OSDService* osds)
+    : m_pg{pg} // holding the "whole PG" for dout() sake
+    , m_osds{osds}
+{
+  if (!m_osds->inc_scrubs_local()) {
+    dout(10) << __func__ << ": failed to reserve locally " << dendl;
+    // the failure is signalled by not having m_holding_local_reservation set
+    return;
+  }
+
+  dout(20) << __func__ << ": local OSD scrub resources reserved" << dendl;
+  m_holding_local_reservation = true;
+}
+
+void LocalReservation::early_release()
+{
+  if (m_holding_local_reservation) {
+    m_holding_local_reservation = false;
+    m_osds->dec_scrubs_local();
+    dout(20) << __func__ << ": local OSD scrub resources freed" << dendl;
+  }
+}
+
+LocalReservation::~LocalReservation()
+{
+  early_release();
+}
+
+
+// ///////////////////// ReservedByRemotePrimary ///////////////////////////////
+
+ReservedByRemotePrimary::ReservedByRemotePrimary(PG* pg, OSDService* osds)
+    : m_pg{pg} // holding the "whole PG" for dout() sake
+    , m_osds{osds}
+{
+  if (!m_osds->inc_scrubs_remote()) {
+    dout(10) << __func__ << ": failed to reserve at Primary request" << dendl;
+    // the failure is signalled by not having m_reserved_by_remote_primary set
+    return;
+  }
+
+  dout(20) << __func__ << ": scrub resources reserved at Primary request" << dendl;
+  m_reserved_by_remote_primary = true;
+}
+
+void ReservedByRemotePrimary::early_release()
+{
+  dout(20) << "ReservedByRemotePrimary::" << __func__ << ": "
+          << m_reserved_by_remote_primary << dendl;
+  if (m_reserved_by_remote_primary) {
+    m_reserved_by_remote_primary = false;
+    m_osds->dec_scrubs_remote();
+    dout(20) << __func__ << ": scrub resources held for Primary were freed" << dendl;
+  }
+}
+
+ReservedByRemotePrimary::~ReservedByRemotePrimary()
+{
+  early_release();
+}
+
+// ///////////////////// MapsCollectionStatus ////////////////////////////////
+
+auto MapsCollectionStatus::mark_arriving_map(pg_shard_t from)
+  -> std::tuple<bool, std::string_view>
+{
+  auto fe = std::find(m_maps_awaited_for.begin(), m_maps_awaited_for.end(), from);
+  if (fe != m_maps_awaited_for.end()) {
+    // we are indeed waiting for a map from this replica
+    m_maps_awaited_for.erase(fe);
+    return std::tuple{true, ""sv};
+  } else {
+    return std::tuple{false, "unsolicited scrub-map"sv};
+  }
+}
+
+void MapsCollectionStatus::reset()
+{
+  *this = MapsCollectionStatus{};
+}
+
+std::string MapsCollectionStatus::dump() const
+{
+  std::string all;
+  for (const auto& rp : m_maps_awaited_for) {
+    all.append(rp.get_osd() + " "s);
+  }
+  return all;
+}
+
+ostream& operator<<(ostream& out, const MapsCollectionStatus& sf)
+{
+  out << " [ ";
+  for (const auto& rp : sf.m_maps_awaited_for) {
+    out << rp.get_osd() << " ";
+  }
+  if (!sf.m_local_map_ready) {
+    out << " local ";
+  }
+  return out << " ] ";
+}
+
+}  // namespace Scrub
diff --git a/src/osd/pg_scrubber.h b/src/osd/pg_scrubber.h
new file mode 100644 (file)
index 0000000..7d5ecfd
--- /dev/null
@@ -0,0 +1,135 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <cassert>
+#include <chrono>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "PG.h"
+#include "ScrubStore.h"
+#include "scrub_machine_lstnr.h"
+#include "scrubber_common.h"
+
+class Callback;
+
+namespace Scrub {
+class ScrubMachine;
+struct BuildMap;
+
+/**
+ * Reserving/freeing scrub resources at the replicas.
+ *
+ *  When constructed - sends reservation requests to the acting_set.
+ *  A rejection triggers a "couldn't acquire the replicas' scrub resources" event.
+ *  All previous requests, whether already granted or not, are explicitly released.
+ *
+ *  A note re performance: I've measured a few container alternatives for
+ *  m_reserved_peers, with its specific usage pattern. Std::set is extremely slow, as
+ *  expected. flat_set is only slightly better. Surprisingly - std::vector (with no
+ *  sorting) is better than boost::small_vec. And for std::vector: no need to pre-reserve.
+ */
+class ReplicaReservations {
+  using OrigSet = decltype(std::declval<PG>().get_actingset());
+
+  PG* m_pg;
+  OrigSet m_acting_set;
+  OSDService* m_osds;
+  std::vector<pg_shard_t> m_waited_for_peers;
+  std::vector<pg_shard_t> m_reserved_peers;
+  bool m_had_rejections{false};
+  int m_pending{-1};
+
+  void release_replica(pg_shard_t peer, epoch_t epoch);
+
+  void send_all_done();         ///< all reservations are granted
+
+  /// notify the scrubber that we have failed to reserve replicas' resources
+  void send_reject();
+
+  void release_all();
+
+ public:
+  ReplicaReservations(PG* pg, pg_shard_t whoami);
+
+  ~ReplicaReservations();
+
+  void handle_reserve_grant(OpRequestRef op, pg_shard_t from);
+
+  void handle_reserve_reject(OpRequestRef op, pg_shard_t from);
+};
+
+/**
+ *  wraps the local OSD scrub resource reservation in an RAII wrapper
+ */
+class LocalReservation {
+  PG* m_pg;
+  OSDService* m_osds;
+  bool m_holding_local_reservation{false};
+
+ public:
+  LocalReservation(PG* pg, OSDService* osds);
+  ~LocalReservation();
+  bool is_reserved() const { return m_holding_local_reservation; }
+  void early_release();
+};
+
+/**
+ *  wraps the OSD resource we are using when reserved as a replica by a scrubbing master.
+ */
+class ReservedByRemotePrimary {
+  PG* m_pg;
+  OSDService* m_osds;
+  bool m_reserved_by_remote_primary{false};
+
+ public:
+  ReservedByRemotePrimary(PG* pg, OSDService* osds);
+  ~ReservedByRemotePrimary();
+  [[nodiscard]] bool is_reserved() const { return m_reserved_by_remote_primary; }
+  void early_release();
+};
+
+/**
+ * Once all replicas' scrub maps are received, we go on to compare the maps. That is -
+ * unless we we have not yet completed building our own scrub map. MapsCollectionStatus
+ * combines the status of waiting for both the local map and the replicas, without
+ * resorting to adding dummy entries into a list.
+ */
+class MapsCollectionStatus {
+
+  bool m_local_map_ready{false};
+  std::vector<pg_shard_t> m_maps_awaited_for;
+
+ public:
+  [[nodiscard]] bool are_all_maps_available() const
+  {
+    return m_local_map_ready && m_maps_awaited_for.empty();
+  }
+
+  void mark_local_map_ready() { m_local_map_ready = true; }
+
+  void mark_replica_map_request(pg_shard_t from_whom)
+  {
+    m_maps_awaited_for.push_back(from_whom);
+  }
+
+  /// @returns true if indeed waiting for this one. Otherwise: an error string
+  auto mark_arriving_map(pg_shard_t from) -> std::tuple<bool, std::string_view>;
+
+  std::vector<pg_shard_t> get_awaited() const { return m_maps_awaited_for; }
+
+  void reset();
+
+  std::string dump() const;
+
+  friend ostream& operator<<(ostream& out, const MapsCollectionStatus& sf);
+};
+
+
+}  // namespace Scrub
index 66308dcc0e6d97bb43cea98206b6632ea2a99f36..13c360b0323f0f518187c3e0e48701bccec5a2ef 100644 (file)
@@ -46,6 +46,24 @@ void PGSnapTrim::run(
   pg->unlock();
 }
 
+void PGScrubResourcesOK::run(OSD* osd,
+                            OSDShard* sdata,
+                            PGRef& pg,
+                            ThreadPool::TPHandle& handle)
+{
+  pg->scrub_send_resources_granted(epoch_queued, handle);
+  pg->unlock();
+}
+
+void PGScrubDenied::run(OSD* osd,
+                       OSDShard* sdata,
+                       PGRef& pg,
+                       ThreadPool::TPHandle& handle)
+{
+  pg->scrub_send_resources_denied(epoch_queued, handle);
+  pg->unlock();
+}
+
 void PGScrub::run(
   OSD *osd,
   OSDShard *sdata,
index cd91ff51a1f1d6b23ac4af8df0fdbe2cfc9a9460..6850c180a85575d6892ecb3888b372f3eda6a054 100644 (file)
@@ -325,6 +325,51 @@ public:
   }
 };
 
+class PGScrubItem : public PGOpQueueable {
+ protected:
+  epoch_t epoch_queued;
+  std::string_view message_name;
+  PGScrubItem(spg_t pg, epoch_t epoch_queued, std::string_view derivative_name)
+      : PGOpQueueable{pg}, epoch_queued{epoch_queued}, message_name{derivative_name}
+  {}
+  op_type_t get_op_type() const final { return op_type_t::bg_scrub; }
+  std::ostream& print(std::ostream& rhs) const final
+  {
+    return rhs << message_name << "(pgid=" << get_pgid()
+              << "epoch_queued=" << epoch_queued << ")";
+  }
+  void run(OSD* osd,
+          OSDShard* sdata,
+          PGRef& pg,
+          ThreadPool::TPHandle& handle) override = 0;
+  op_scheduler_class get_scheduler_class() const final
+  {
+    return op_scheduler_class::background_best_effort;
+  }
+};
+
+/**
+ *  all replicas have granted our scrub resources request
+ */
+class PGScrubResourcesOK : public PGScrubItem {
+ public:
+  PGScrubResourcesOK(spg_t pg, epoch_t epoch_queued)
+      : PGScrubItem{pg, epoch_queued, "PGScrubResourcesOK"}
+  {}
+  void run(OSD* osd, OSDShard* sdata, PGRef& pg, ThreadPool::TPHandle& handle) final;
+};
+
+/**
+ *  scrub resources requests denied by replica(s)
+ */
+class PGScrubDenied : public PGScrubItem {
+ public:
+  PGScrubDenied(spg_t pg, epoch_t epoch_queued)
+      : PGScrubItem{pg, epoch_queued, "PGScrubDenied"}
+  {}
+  void run(OSD* osd, OSDShard* sdata, PGRef& pg, ThreadPool::TPHandle& handle) final;
+};
+
 class PGRecovery : public PGOpQueueable {
   epoch_t epoch_queued;
   uint64_t reserved_pushes;