osd/scrub: extract ReplicaReservations into separate files

author Ronen Friedman <rfriedma@redhat.com>

Fri, 13 Oct 2023 16:07:56 +0000 (11:07 -0500)

committer Ronen Friedman <rfriedma@redhat.com>

Sat, 14 Oct 2023 18:49:01 +0000 (21:49 +0300)
author Ronen Friedman <rfriedma@redhat.com>
Fri, 13 Oct 2023 16:07:56 +0000 (11:07 -0500)
committer Ronen Friedman <rfriedma@redhat.com>
Sat, 14 Oct 2023 18:49:01 +0000 (21:49 +0300)
diff --git a/src/osd/CMakeLists.txt b/src/osd/CMakeLists.txt

index 7d19424b404a64ef8a7e78fe5f7fdd5f0d86c74a..aad0fdb15cd1eac99730a5512cb330a14cfe6a19 100644 (file)
--- a/src/osd/CMakeLists.txt
+++ b/src/osd/CMakeLists.txt
@@ -27,6 +27,7 @@ set(osd_srcs
    scrubber/PrimaryLogScrub.cc
    scrubber/scrub_job.cc
    scrubber/scrub_machine.cc
+  scrubber/scrub_reservations.cc
    scrubber/scrub_resources.cc
    scrubber/ScrubStore.cc
    scrubber/scrub_backend.cc
diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc

index 8ee964c964b42dfbf5c656a488f4fc558a57214b..609eb0a72e6a83efbafe98687ab8f60db7d23ebd 100644 (file)
--- a/src/osd/scrubber/pg_scrubber.cc
+++ b/src/osd/scrubber/pg_scrubber.cc
@@ -2441,200 +2441,10 @@ void PgScrubber::preemption_data_t::reset()
  }
  
  
-// ///////////////////// ReplicaReservations //////////////////////////////////
-
-#undef dout_prefix
-#define dout_prefix _prefix_fn(_dout, this, __func__)
-template <class T>
-static std::ostream& _prefix_fn(std::ostream* _dout, T* t, std::string fn = "")
-{
-  return t->gen_prefix(*_dout, fn);
-}
+// ///////////////////// LocalReservation //////////////////////////////////
  
  namespace Scrub {
  
-ReplicaReservations::ReplicaReservations(ScrubMachineListener& scrbr)
-
-    : m_scrubber{scrbr}
-    , m_pg{m_scrubber.get_pg()}
-    , m_pgid{m_scrubber.get_spgid().pgid}
-    , m_osds{m_pg->get_pg_osd(ScrubberPasskey())}
-{
-  // the acting set is sorted by pg_shard_t. The reservations are to be issued
-  // in this order, so that the OSDs will receive the requests in a consistent
-  // order. This is done to reduce the chance of having two PGs that share some
-  // of their acting-set OSDs, consistently interfering with each other's
-  // reservation process.
-  auto acting = m_pg->get_actingset();
-  m_sorted_secondaries.reserve(acting.size());
-  std::copy_if(
-      acting.cbegin(), acting.cend(), std::back_inserter(m_sorted_secondaries),
-      [whoami = m_pg->pg_whoami](const pg_shard_t& shard) {
-       return shard != whoami;
-      });
-
-  m_next_to_request = m_sorted_secondaries.cbegin();
-  // send out the 1'st request (unless we have no replicas)
-  send_next_reservation_or_complete();
-}
-
-void ReplicaReservations::release_all()
-{
-  std::span<const pg_shard_t> replicas{
-      m_sorted_secondaries.cbegin(), m_next_to_request};
-  dout(10) << fmt::format("releasing {}", replicas) << dendl;
-  epoch_t epoch = m_pg->get_osdmap_epoch();
-
-  // send 'release' messages to all replicas we have managed to reserve
-  for (const auto& peer : replicas) {
-    auto m = make_message<MOSDScrubReserve>(
-       spg_t{m_pgid, peer.shard}, epoch, MOSDScrubReserve::RELEASE,
-       m_pg->pg_whoami);
-    m_pg->send_cluster_message(peer.osd, m, epoch, false);
-  }
-
-  m_sorted_secondaries.clear();
-  m_next_to_request = m_sorted_secondaries.cbegin();
-}
-
-void ReplicaReservations::discard_remote_reservations()
-{
-  dout(10) << "reset w/o issuing messages" << dendl;
-  m_sorted_secondaries.clear();
-  m_next_to_request = m_sorted_secondaries.cbegin();
-}
-
-ReplicaReservations::~ReplicaReservations()
-{
-  release_all();
-}
-
-/**
- *  @ATTN we would not reach here if the ReplicaReservation object managed by
- * the scrubber was reset.
- */
-void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from)
-{
-  // verify that the grant is from the peer we expected. If not?
-  // for now - abort the OSD. \todo reconsider the reaction.
-  if (!get_last_sent().has_value() || from != *get_last_sent()) {
-    dout(1) << fmt::format(
-                  "unexpected grant from {} (expected {})", from,
-                  get_last_sent().value_or(pg_shard_t{}))
-           << dendl;
-    ceph_assert(from == get_last_sent());
-    return;
-  }
-
-  auto elapsed = clock::now() - m_last_request_sent_at;
-
-  // log a warning if the response was slow to arrive
-  auto warn_timeout = m_scrubber.get_pg_cct()->_conf.get_val<milliseconds>(
-      "osd_scrub_slow_reservation_response");
-  if (!m_slow_response_warned && (elapsed > warn_timeout)) {
-    dout(1) << fmt::format(
-                  "slow reservation response from {} ({}ms)", from,
-                  duration_cast<milliseconds>(elapsed).count())
-           << dendl;
-    // prevent additional warnings
-    m_slow_response_warned = true;
-  }
-  dout(10) << fmt::format(
-                 "granted by {} ({} of {}) in {}ms", from,
-                 active_requests_cnt(), m_sorted_secondaries.size(),
-                 duration_cast<milliseconds>(elapsed).count())
-          << dendl;
-  send_next_reservation_or_complete();
-}
-
-void ReplicaReservations::send_next_reservation_or_complete()
-{
-  if (m_next_to_request == m_sorted_secondaries.cend()) {
-    // granted by all replicas
-    dout(10) << "remote reservation complete" << dendl;
-    m_osds->queue_for_scrub_granted(m_pg, scrub_prio_t::low_priority);
-
-  } else {
-    // send the next reservation request
-    const auto peer = *m_next_to_request;
-    const auto epoch = m_pg->get_osdmap_epoch();
-    auto m = make_message<MOSDScrubReserve>(
-       spg_t{m_pgid, peer.shard}, epoch, MOSDScrubReserve::REQUEST,
-       m_pg->pg_whoami);
-    m_pg->send_cluster_message(peer.osd, m, epoch, false);
-    m_last_request_sent_at = clock::now();
-    dout(10) << fmt::format(
-                   "reserving {} (the {} of {} replicas)", *m_next_to_request,
-                   active_requests_cnt()+1, m_sorted_secondaries.size())
-            << dendl;
-    m_next_to_request++;
-  }
-}
-
-// temporary comment: the part of handle_reserve_reject() that will not
-// be delegated to the FSM in the following commits
-void ReplicaReservations::verify_rejections_source(
-    OpRequestRef op,
-    pg_shard_t from)
-{
-  // a convenient log message for the reservation process conclusion
-  // (matches the one in send_next_reservation_or_complete())
-  dout(10) << fmt::format(
-                 "remote reservation failure. Rejected by {} ({})", from,
-                 *op->get_req())
-          << dendl;
-
-  // verify that the denial is from the peer we expected. If not?
-  // we should treat it as though the *correct* peer has rejected the request,
-  // but remember to release that peer, too.
-
-  ceph_assert(get_last_sent().has_value());
-  const auto expected = *get_last_sent();
-  if (from != expected) {
-    dout(1) << fmt::format(
-                  "unexpected rejection from {} (expected {})", from, expected)
-           << dendl;
-  } else {
-    // correct peer, wrong answer...
-    m_next_to_request--;  // no need to release this one
-  }
-}
-
-// to be delegated to the FSM in the following commits
-void ReplicaReservations::handle_reserve_reject(
-    OpRequestRef op,
-    pg_shard_t from)
-{
-  verify_rejections_source(op, from);
-  release_all();
-  m_scrubber.flag_reservations_failure();
-  m_osds->queue_for_scrub_denied(m_pg, scrub_prio_t::low_priority);
-}
-
-std::optional<pg_shard_t> ReplicaReservations::get_last_sent() const
-{
-  if (m_next_to_request == m_sorted_secondaries.cbegin()) {
-    return std::nullopt;
-  }
-  return *(m_next_to_request - 1);
-}
-
-size_t ReplicaReservations::active_requests_cnt() const
-{
-  return m_next_to_request - m_sorted_secondaries.cbegin();
-}
-
-std::ostream& ReplicaReservations::gen_prefix(
-    std::ostream& out,
-    std::string fn) const
-{
-  return m_pg->gen_prefix(out)
-        << fmt::format("scrubber::ReplicaReservations:{}: ", fn);
-}
-
-
-// ///////////////////// LocalReservation //////////////////////////////////
-
  // note: no dout()s in LocalReservation functions. Client logs interactions.
  LocalReservation::LocalReservation(OSDService* osds) : m_osds{osds}
  {
diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h

index 1370fc953c0cd985b720f6e5c26049d0f3097a30..fc7b735d219ef17bfa93ce5c2a7059f5be38e6a0 100644 (file)
--- a/src/osd/scrubber/pg_scrubber.h
+++ b/src/osd/scrubber/pg_scrubber.h
@@ -82,122 +82,12 @@ Main Scrubber interfaces:
  #include "osd_scrub_sched.h"
  #include "scrub_backend.h"
  #include "scrub_machine_lstnr.h"
+#include "scrub_reservations.h"
  
  namespace Scrub {
  class ScrubMachine;
  struct BuildMap;
  
-/**
- * Reserving/freeing scrub resources at the replicas.
- *
- * When constructed - sends reservation requests to the acting_set OSDs, one
- * by one.
- * Once a replica's OSD replies with a 'grant'ed reservation, we send a
- * reservation request to the next replica.
- * A rejection triggers a "couldn't acquire the replicas' scrub resources"
- * event. All granted reservations are released.
- *
- * Reserved replicas should be released at the end of the scrub session. The
- * one exception is if the scrub terminates upon an interval change. In that
- * scenario - the replicas discard their reservations on their own accord
- * when noticing the change in interval, and there is no need (and no
- * guaranteed way) to send them the release message.
- *
- * Timeouts:
- *
- *  Slow-Secondary Warning:
- *  Warn if a replica takes more than <conf> milliseconds to reply to a
- *  reservation request. Only one warning is issued per session.
- *
- *  Reservation Timeout:
- *  We limit the total time we wait for the replicas to respond to the
- *  reservation request. If the reservation back-and-forth does not complete
- *  within <conf> milliseconds, we give up and release all the reservations
- *  that have been acquired until that moment.
- *  (Why? because we have encountered instances where a reservation request was
- *  lost - either due to a bug or due to a network issue.)
- */
-class ReplicaReservations {
-  using clock = ceph::coarse_real_clock;
-
-  ScrubMachineListener& m_scrubber;
-  PG* m_pg;
-
-  /// shorthand for m_scrubber.get_spgid().pgid
-  const pg_t m_pgid;
-
-  /// for dout && when queueing messages to the FSM
-  OSDService* m_osds;
-
-  /// the acting set (not including myself), sorted by pg_shard_t
-  std::vector<pg_shard_t> m_sorted_secondaries;
-
-  /// the next replica to which we will send a reservation request
-  std::vector<pg_shard_t>::const_iterator m_next_to_request;
-
-  /// for logs, and for detecting slow peers
-  clock::time_point m_last_request_sent_at;
-
-  /// used to prevent multiple "slow response" warnings
-  bool m_slow_response_warned{false};
-
- public:
-  ReplicaReservations(ScrubMachineListener& scrubber);
-
-  ~ReplicaReservations();
-
-  /**
-   * The OK received from the replica (after verifying that it is indeed
-   * the replica we are expecting a reply from) is noted, and triggers
-   * one of two: either sending a reservation request to the next replica,
-   * or notifying the scrubber that we have reserved them all.
-   */
-  void handle_reserve_grant(OpRequestRef op, pg_shard_t from);
-
-  /**
-   * Verify that the sender of the received rejection is the replica we
-   * were expecting a reply from.
-   * If this is so - just mark the fact that the specific peer need not
-   * be released.
-   *
-   * Note - the actual handling of scrub session termination and of
-   * releasing the reserved replicas is done by the caller (the FSM).
-   */
-  void verify_rejections_source(OpRequestRef op, pg_shard_t from);
-
-  void handle_reserve_reject(OpRequestRef op, pg_shard_t from);
-
-  /**
-   * Notifies implementation that it is no longer responsible for releasing
-   * tracked remote reservations.
-   *
-   * The intended usage is upon interval change.  In general, replicas are
-   * responsible for releasing their own resources upon interval change without
-   * coordination from the primary.
-   *
-   * Sends no messages.
-   */
-  void discard_remote_reservations();
-
-  // note: 'public', as accessed via the 'standard' dout_prefix() macro
-  std::ostream& gen_prefix(std::ostream& out, std::string fn) const;
-
- private:
-  /// send 'release' messages to all replicas we have managed to reserve
-  void release_all();
-
-  /// the only replica we are expecting a reply from
-  std::optional<pg_shard_t> get_last_sent() const;
-
-  /// The number of requests that have been sent (and not rejected) so far.
-  size_t active_requests_cnt() const;
-
-  /**
-   * Either send a reservation request to the next replica, or notify the
-   * scrubber that we have reserved all the replicas.
-   */
-  void send_next_reservation_or_complete();
-};
  
  /**
   *  wraps the local OSD scrub resource reservation in an RAII wrapper
diff --git a/src/osd/scrubber/scrub_reservations.cc b/src/osd/scrubber/scrub_reservations.cc

new file mode 100644 (file)

index 0000000..bd5f61c
--- /dev/null
+++ b/src/osd/scrubber/scrub_reservations.cc
@@ -0,0 +1,206 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "./scrub_reservations.h"
+
+#include <span>
+
+#include "common/ceph_time.h"
+#include "messages/MOSDScrubReserve.h"
+#include "osd/OSD.h"
+#include "osd/PG.h"
+#include "osd/osd_types_fmt.h"
+
+#include "pg_scrubber.h"
+
+using namespace Scrub;
+using namespace std::chrono;
+using namespace std::chrono_literals;
+
+#define dout_context (m_osds->cct)
+#define dout_subsys ceph_subsys_osd
+#undef dout_prefix
+#define dout_prefix _prefix_fn(_dout, this, __func__)
+template <class T>
+static std::ostream& _prefix_fn(std::ostream* _dout, T* t, std::string fn = "")
+{
+  return t->gen_prefix(*_dout, fn);
+}
+
+namespace Scrub {
+
+ReplicaReservations::ReplicaReservations(ScrubMachineListener& scrbr)
+    : m_scrubber{scrbr}
+    , m_pg{m_scrubber.get_pg()}
+    , m_pgid{m_scrubber.get_spgid().pgid}
+    , m_osds{m_pg->get_pg_osd(ScrubberPasskey())}
+{
+  // the acting set is sorted by pg_shard_t. The reservations are to be issued
+  // in this order, so that the OSDs will receive the requests in a consistent
+  // order. This is done to reduce the chance of having two PGs that share some
+  // of their acting-set OSDs, consistently interfering with each other's
+  // reservation process.
+  auto acting = m_pg->get_actingset();
+  m_sorted_secondaries.reserve(acting.size());
+  std::copy_if(
+      acting.cbegin(), acting.cend(), std::back_inserter(m_sorted_secondaries),
+      [whoami = m_pg->pg_whoami](const pg_shard_t& shard) {
+       return shard != whoami;
+      });
+
+  m_next_to_request = m_sorted_secondaries.cbegin();
+  // send out the 1'st request (unless we have no replicas)
+  send_next_reservation_or_complete();
+}
+
+void ReplicaReservations::release_all()
+{
+  std::span<const pg_shard_t> replicas{
+      m_sorted_secondaries.cbegin(), m_next_to_request};
+  dout(10) << fmt::format("releasing {}", replicas) << dendl;
+  epoch_t epoch = m_pg->get_osdmap_epoch();
+
+  // send 'release' messages to all replicas we have managed to reserve
+  for (const auto& peer : replicas) {
+    auto m = make_message<MOSDScrubReserve>(
+       spg_t{m_pgid, peer.shard}, epoch, MOSDScrubReserve::RELEASE,
+       m_pg->pg_whoami);
+    m_pg->send_cluster_message(peer.osd, m, epoch, false);
+  }
+
+  m_sorted_secondaries.clear();
+  m_next_to_request = m_sorted_secondaries.cbegin();
+}
+
+void ReplicaReservations::discard_remote_reservations()
+{
+  dout(10) << "reset w/o issuing messages" << dendl;
+  m_sorted_secondaries.clear();
+  m_next_to_request = m_sorted_secondaries.cbegin();
+}
+
+ReplicaReservations::~ReplicaReservations()
+{
+  release_all();
+}
+
+void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from)
+{
+  // verify that the grant is from the peer we expected. If not?
+  // for now - abort the OSD. \todo reconsider the reaction.
+  if (!get_last_sent().has_value() || from != *get_last_sent()) {
+    dout(1) << fmt::format(
+                  "unexpected grant from {} (expected {})", from,
+                  get_last_sent().value_or(pg_shard_t{}))
+           << dendl;
+    ceph_assert(from == get_last_sent());
+    return;
+  }
+
+  auto elapsed = clock::now() - m_last_request_sent_at;
+
+  // log a warning if the response was slow to arrive
+  auto warn_timeout = m_scrubber.get_pg_cct()->_conf.get_val<milliseconds>(
+      "osd_scrub_slow_reservation_response");
+  if (!m_slow_response_warned && (elapsed > warn_timeout)) {
+    dout(1) << fmt::format(
+                  "slow reservation response from {} ({}ms)", from,
+                  duration_cast<milliseconds>(elapsed).count())
+           << dendl;
+    // prevent additional warnings
+    m_slow_response_warned = true;
+  }
+  dout(10) << fmt::format(
+                 "granted by {} ({} of {}) in {}ms", from,
+                 active_requests_cnt(), m_sorted_secondaries.size(),
+                 duration_cast<milliseconds>(elapsed).count())
+          << dendl;
+  send_next_reservation_or_complete();
+}
+
+void ReplicaReservations::send_next_reservation_or_complete()
+{
+  if (m_next_to_request == m_sorted_secondaries.cend()) {
+    // granted by all replicas
+    dout(10) << "remote reservation complete" << dendl;
+    m_osds->queue_for_scrub_granted(m_pg, scrub_prio_t::low_priority);
+
+  } else {
+    // send the next reservation request
+    const auto peer = *m_next_to_request;
+    const auto epoch = m_pg->get_osdmap_epoch();
+    auto m = make_message<MOSDScrubReserve>(
+       spg_t{m_pgid, peer.shard}, epoch, MOSDScrubReserve::REQUEST,
+       m_pg->pg_whoami);
+    m_pg->send_cluster_message(peer.osd, m, epoch, false);
+    m_last_request_sent_at = clock::now();
+    dout(10) << fmt::format(
+                   "reserving {} (the {} of {} replicas)", *m_next_to_request,
+                   active_requests_cnt()+1, m_sorted_secondaries.size())
+            << dendl;
+    m_next_to_request++;
+  }
+}
+
+// temporary comment: the part of handle_reserve_reject() that will not
+// be delegated to the FSM in the following commits
+void ReplicaReservations::verify_rejections_source(
+    OpRequestRef op,
+    pg_shard_t from)
+{
+  // a convenient log message for the reservation process conclusion
+  // (matches the one in send_next_reservation_or_complete())
+  dout(10) << fmt::format(
+                 "remote reservation failure. Rejected by {} ({})", from,
+                 *op->get_req())
+          << dendl;
+
+  // verify that the denial is from the peer we expected. If not?
+  // we should treat it as though the *correct* peer has rejected the request,
+  // but remember to release that peer, too.
+
+  ceph_assert(get_last_sent().has_value());
+  const auto expected = *get_last_sent();
+  if (from != expected) {
+    dout(1) << fmt::format(
+                  "unexpected rejection from {} (expected {})", from, expected)
+           << dendl;
+  } else {
+    // correct peer, wrong answer...
+    m_next_to_request--;  // no need to release this one
+  }
+}
+
+// to be delegated to the FSM in the following commits
+void ReplicaReservations::handle_reserve_reject(
+    OpRequestRef op,
+    pg_shard_t from)
+{
+  verify_rejections_source(op, from);
+  release_all();
+  m_scrubber.flag_reservations_failure();
+  m_osds->queue_for_scrub_denied(m_pg, scrub_prio_t::low_priority);
+}
+
+std::optional<pg_shard_t> ReplicaReservations::get_last_sent() const
+{
+  if (m_next_to_request == m_sorted_secondaries.cbegin()) {
+    return std::nullopt;
+  }
+  return *(m_next_to_request - 1);
+}
+
+size_t ReplicaReservations::active_requests_cnt() const
+{
+  return m_next_to_request - m_sorted_secondaries.cbegin();
+}
+
+std::ostream& ReplicaReservations::gen_prefix(
+    std::ostream& out,
+    std::string fn) const
+{
+  return m_pg->gen_prefix(out)
+        << fmt::format("scrubber::ReplicaReservations:{}: ", fn);
+}
+
+} // namespace Scrub
diff --git a/src/osd/scrubber/scrub_reservations.h b/src/osd/scrubber/scrub_reservations.h

new file mode 100644 (file)

index 0000000..a52f385
--- /dev/null
+++ b/src/osd/scrubber/scrub_reservations.h
@@ -0,0 +1,130 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#pragma once
+
+#include <cassert>
+#include <chrono>
+#include <optional>
+#include <string_view>
+#include <vector>
+
+#include "osd/scrubber_common.h"
+
+#include "osd_scrub_sched.h"
+#include "scrub_machine_lstnr.h"
+
+namespace Scrub {
+
+/**
+ * Reserving/freeing scrub resources at the replicas.
+ *
+ * When constructed - sends reservation requests to the acting_set OSDs, one
+ * by one.
+ * Once a replica's OSD replies with a 'grant'ed reservation, we send a
+ * reservation request to the next replica.
+ * A rejection triggers a "couldn't acquire the replicas' scrub resources"
+ * event. All granted reservations are released.
+ *
+ * Reserved replicas should be released at the end of the scrub session. The
+ * one exception is if the scrub terminates upon an interval change. In that
+ * scenario - the replicas discard their reservations on their own accord
+ * when noticing the change in interval, and there is no need (and no
+ * guaranteed way) to send them the release message.
+ *
+ * Timeouts:
+ *
+ *  Slow-Secondary Warning:
+ *  Warn if a replica takes more than <conf> milliseconds to reply to a
+ *  reservation request. Only one warning is issued per session.
+ *
+ *  Reservation Timeout:
+ *  We limit the total time we wait for the replicas to respond to the
+ *  reservation request. If the reservation back-and-forth does not complete
+ *  within <conf> milliseconds, we give up and release all the reservations
+ *  that have been acquired until that moment.
+ *  (Why? because we have encountered instances where a reservation request was
+ *  lost - either due to a bug or due to a network issue.)
+ */
+class ReplicaReservations {
+  using clock = ceph::coarse_real_clock;
+
+  ScrubMachineListener& m_scrubber;
+  PG* m_pg;
+
+  /// shorthand for m_scrubber.get_spgid().pgid
+  const pg_t m_pgid;
+
+  /// for dout && when queueing messages to the FSM
+  OSDService* m_osds;
+
+  /// the acting set (not including myself), sorted by pg_shard_t
+  std::vector<pg_shard_t> m_sorted_secondaries;
+
+  /// the next replica to which we will send a reservation request
+  std::vector<pg_shard_t>::const_iterator m_next_to_request;
+
+  /// for logs, and for detecting slow peers
+  clock::time_point m_last_request_sent_at;
+
+  /// used to prevent multiple "slow response" warnings
+  bool m_slow_response_warned{false};
+
+ public:
+  ReplicaReservations(ScrubMachineListener& scrubber);
+
+  ~ReplicaReservations();
+
+  /**
+   * The OK received from the replica (after verifying that it is indeed
+   * the replica we are expecting a reply from) is noted, and triggers
+   * one of two: either sending a reservation request to the next replica,
+   * or notifying the scrubber that we have reserved them all.
+   */
+  void handle_reserve_grant(OpRequestRef op, pg_shard_t from);
+
+  /**
+   * Verify that the sender of the received rejection is the replica we
+   * were expecting a reply from.
+   * If this is so - just mark the fact that the specific peer need not
+   * be released.
+   *
+   * Note - the actual handling of scrub session termination and of
+   * releasing the reserved replicas is done by the caller (the FSM).
+   */
+  void verify_rejections_source(OpRequestRef op, pg_shard_t from);
+
+  void handle_reserve_reject(OpRequestRef op, pg_shard_t from);
+
+  /**
+   * Notifies implementation that it is no longer responsible for releasing
+   * tracked remote reservations.
+   *
+   * The intended usage is upon interval change.  In general, replicas are
+   * responsible for releasing their own resources upon interval change without
+   * coordination from the primary.
+   *
+   * Sends no messages.
+   */
+  void discard_remote_reservations();
+
+  // note: 'public', as accessed via the 'standard' dout_prefix() macro
+  std::ostream& gen_prefix(std::ostream& out, std::string fn) const;
+
+ private:
+  /// send 'release' messages to all replicas we have managed to reserve
+  void release_all();
+
+  /// the only replica we are expecting a reply from
+  std::optional<pg_shard_t> get_last_sent() const;
+
+  /// The number of requests that have been sent (and not rejected) so far.
+  size_t active_requests_cnt() const;
+
+  /**
+   * Either send a reservation request to the next replica, or notify the
+   * scrubber that we have reserved all the replicas.
+   */
+  void send_next_reservation_or_complete();
+};
+
+} // namespace Scrub
author	Ronen Friedman <rfriedma@redhat.com>
	Fri, 13 Oct 2023 16:07:56 +0000 (11:07 -0500)
committer	Ronen Friedman <rfriedma@redhat.com>
	Sat, 14 Oct 2023 18:49:01 +0000 (21:49 +0300)
src/osd/CMakeLists.txt		patch \| blob \| history
src/osd/scrubber/pg_scrubber.cc		patch \| blob \| history
src/osd/scrubber/pg_scrubber.h		patch \| blob \| history
src/osd/scrubber/scrub_reservations.cc	[new file with mode: 0644]	patch \| blob
src/osd/scrubber/scrub_reservations.h	[new file with mode: 0644]	patch \| blob