]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: add required sub-states to handle queued reservation requests 55131/head
authorRonen Friedman <rfriedma@redhat.com>
Sun, 7 Jan 2024 14:20:46 +0000 (08:20 -0600)
committerRonen Friedman <rfriedma@redhat.com>
Sun, 28 Jan 2024 15:40:02 +0000 (09:40 -0600)
The scrub async reserver is not yet used. All requests are treated as
'legacy' requests, i.e. requests that expect an immediate grant/deny
reply.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/scrubber/osd_scrub.cc
src/osd/scrubber/osd_scrub.h
src/osd/scrubber/scrub_machine.cc
src/osd/scrubber/scrub_machine.h
src/osd/scrubber/scrub_resources.h

index bc65d64bb422ea41462438e4e1288442ef058df9..2ff70dee56d1ad2b2620e298015b3b6b81d06190 100644 (file)
@@ -473,6 +473,11 @@ bool OsdScrub::inc_scrubs_remote(pg_t pgid)
   return m_resource_bookkeeper.inc_scrubs_remote(pgid);
 }
 
+void OsdScrub::enqueue_remote_reservation(pg_t pgid)
+{
+  m_resource_bookkeeper.enqueue_remote_reservation(pgid);
+}
+
 void OsdScrub::dec_scrubs_remote(pg_t pgid)
 {
   m_resource_bookkeeper.dec_scrubs_remote(pgid);
index 12e9db312312d568df89fb9925719965551e4e24..2701a762f798171c22884fc1b8eaba77c8fdc4ef 100644 (file)
@@ -70,6 +70,7 @@ class OsdScrub {
       bool is_high_priority);
   void dec_scrubs_local();
   bool inc_scrubs_remote(pg_t pgid);
+  void enqueue_remote_reservation(pg_t pgid);
   void dec_scrubs_remote(pg_t pgid);
 
   // counting the number of PGs stuck while scrubbing, waiting for objects
index d9d03fe688964135085e491758d0e0e272f3205d..fc5238186868e85dd02433cb150a7cd08db669c8 100644 (file)
@@ -331,8 +331,8 @@ ActiveScrubbing::ActiveScrubbing(my_context ctx)
   auto& session = context<Session>();
 
   session.m_perf_set->inc(scrbcnt_active_started);
-  scrbr->get_clog()->debug() << fmt::format(
-    "{} {} starts", machine.m_pg_id, scrbr->get_op_mode_text());
+  scrbr->get_clog()->debug()
+      << fmt::format("{} {} starts", pg_id, scrbr->get_op_mode_text());
 
   scrbr->on_init();
 }
@@ -747,6 +747,7 @@ ScrubMachine::ScrubMachine(PG* pg, ScrubMachineListener* pg_scrub)
 
 ScrubMachine::~ScrubMachine() = default;
 
+
 // -------- for replicas -----------------------------------------------------
 
 // ----------------------- ReplicaActive --------------------------------
@@ -759,6 +760,8 @@ ReplicaActive::ReplicaActive(my_context ctx)
   dout(10) << "-- state -->> ReplicaActive" << dendl;
   m_pg = scrbr->get_pg();
   m_osds = m_pg->get_pg_osd(ScrubberPasskey());
+  clear_shallow_history<ReplicaIdle, 0>();
+  clear_shallow_history<ReplicaActive, 0>();
 }
 
 ReplicaActive::~ReplicaActive()
@@ -767,11 +770,10 @@ ReplicaActive::~ReplicaActive()
   if (reserved_by_my_primary) {
     dout(10) << "ReplicaActive::~ReplicaActive(): clearing reservation"
             << dendl;
-    clear_reservation_by_remote_primary();
+    clear_reservation_by_remote_primary(false);
   }
 }
 
-
 /*
  * Note: we are expected to be in the initial internal state (Idle) when
  * receiving any registration request. Our other internal states, the
@@ -780,105 +782,299 @@ ReplicaActive::~ReplicaActive()
  *
  * Process:
  * - if already reserved: clear existing reservation, then continue
- * - ask the OSD for the "reservation resource"
- * - if granted: mark it internally and notify the Primary.
- * - otherwise: just notify the requesting primary.
+ * - for async requests:
+ *   - enqueue the request with reserver;
+ *   - move to the ReplicaWaitingReservation state
+ *   - no reply is expected by the caller
+ * - for legacy requests:
+ *   - ask the OSD for the "reservation resource"
+ *   - if granted: move to ReplicaReserved and notify the Primary.
+ *   - otherwise: just notify the requesting primary.
+ *
+ * implementation note: sc::result objects cannot be copied or moved. Thus,
+ * we've resorted to returning a code indicating the next action.
  */
-void ReplicaActive::on_reserve_req(const ReplicaReserveReq& ev)
+ReplicaReactCode ReplicaActive::on_reserve_request(
+    const ReplicaReserveReq& ev,
+    bool async_request)
 {
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
   const auto m = ev.m_op->get_req<MOSDScrubReserve>();
   const auto msg_nonce = m->reservation_nonce;
-  dout(10)
-      << fmt::format(
-            "ReplicaActive::on_reserve_req() from {} (reservation_nonce:{})",
-            ev.m_from, msg_nonce)
-      << dendl;
+  dout(10) << fmt::format(
+                 "ReplicaActive::on_reserve_req() from {} request:{} is "
+                 "async?{} (reservation_nonce:{})",
+                 ev.m_from, ev, async_request, msg_nonce)
+          << dendl;
+  auto& svc = m_osds->get_scrub_services();  // shorthand
 
   if (reserved_by_my_primary) {
-    dout(10) << "ReplicaActive::on_reserve_req(): already reserved" << dendl;
-    // clear the existing reservation
-    clear_reservation_by_remote_primary();  // clears the flag, too
+    dout(10) << "ReplicaActive::on_reserve_request(): already reserved"
+            << dendl;
+    // clear the existing reservation. Clears the flag, too
+    clear_reservation_by_remote_primary(false);
   }
 
-  // ask the OSD for the reservation
-  const auto ret = get_remote_reservation();
-  if (ret.granted) {
-    reserved_by_my_primary = true;
-    dout(10) << fmt::format("{}: reserved? yes", __func__) << dendl;
+  Message* reply{nullptr};
+  ReplicaReactCode next_action{ReplicaReactCode::discard};
+
+  if (async_request) {
+    // the request is to be handled asynchronously
+    svc.enqueue_remote_reservation(pg_id.pgid);
+    next_action = ReplicaReactCode::goto_waiting_reservation;
+
   } else {
-    dout(10) << fmt::format("{}: reserved? no ({})", __func__, ret.error_msg)
-            << dendl;
+    // an immediate yes/no is required
+    const auto granted = svc.inc_scrubs_remote(scrbr->get_spgid().pgid);
+    if (granted) {
+      reserved_by_my_primary = true;
+      dout(10) << fmt::format("{}: reserved? yes", __func__) << dendl;
+      reply = new MOSDScrubReserve(
+         spg_t(pg_id.pgid, m_pg->get_primary().shard), ev.m_op->sent_epoch,
+         MOSDScrubReserve::GRANT, m_pg->pg_whoami, msg_nonce);
+      next_action = ReplicaReactCode::goto_replica_reserved;
+
+    } else {
+      dout(10) << fmt::format("{}: reserved? no", __func__) << dendl;
+      reply = new MOSDScrubReserve(
+         spg_t(pg_id.pgid, m_pg->get_primary().shard), ev.m_op->sent_epoch,
+         MOSDScrubReserve::REJECT, m_pg->pg_whoami, msg_nonce);
+      // the event is discarded
+      next_action = ReplicaReactCode::discard;
+    }
   }
 
-  Message* reply = new MOSDScrubReserve(
-      spg_t(pg_id.pgid, m_pg->get_primary().shard), ev.m_op->sent_epoch, ret.op,
-      m_pg->pg_whoami, msg_nonce);
-  m_osds->send_message_osd_cluster(reply, ev.m_op->get_req()->get_connection());
+  if (reply) {
+    m_osds->send_message_osd_cluster(
+       reply, ev.m_op->get_req()->get_connection());
+  }
+  return next_action;
 }
 
-
 void ReplicaActive::on_release(const ReplicaRelease& ev)
 {
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
-  if (!reserved_by_my_primary) {
-    dout(5) << fmt::format(
-                  "ReplicaActive::on_release() from {}: not reserved!",
-                  ev.m_from)
-           << dendl;
-    return;
-  }
   dout(10) << fmt::format("ReplicaActive::on_release() from {}", ev.m_from)
           << dendl;
-  clear_reservation_by_remote_primary();
+  clear_reservation_by_remote_primary(true);
 }
 
-
-ReplicaActive::ReservationAttemptRes ReplicaActive::get_remote_reservation()
+void ReplicaActive::clear_reservation_by_remote_primary(bool log_failure)
 {
-  using ReservationAttemptRes = ReplicaActive::ReservationAttemptRes;
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  dout(10) << fmt::format(
+                 "ReplicaActive::clear_reservation_by_remote_primary(): was "
+                 "reserved? {}",
+                 (reserved_by_my_primary ? "yes" : "no"))
+          << dendl;
+  if (reserved_by_my_primary) {
+    m_osds->get_scrub_services().dec_scrubs_remote(scrbr->get_spgid().pgid);
+    reserved_by_my_primary = false;
+  } else if (log_failure) {
+    const auto msg = fmt::format(
+       "ReplicaActive::clear_reservation_by_remote_primary(): "
+       "not reserved!");
+    dout(5) << msg << dendl;
+    scrbr->get_clog()->warn() << msg;
+  }
+}
 
-  if (m_osds->get_scrub_services().inc_scrubs_remote(scrbr->get_spgid().pgid)) {
-    return ReservationAttemptRes{MOSDScrubReserve::GRANT, "", true};
-  } else {
-    return ReservationAttemptRes{
-       MOSDScrubReserve::REJECT, "failed to reserve remotely", false};
+
+// ---------------- ReplicaActive/ReplicaIdle ---------------------------
+
+ReplicaIdle::ReplicaIdle(my_context ctx)
+    : my_base(ctx)
+    , NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaActive/ReplicaIdle")
+{
+  dout(10) << "-- state -->> ReplicaActive/ReplicaIdle" << dendl;
+}
+
+void ReplicaIdle::reset_ignored(const FullReset&)
+{
+  dout(10) << "ReplicaIdle::react(const FullReset&): FullReset ignored"
+          << dendl;
+}
+
+
+// ---------------- ReplicaIdle/ReplicaUnreserved ---------------------------
+
+ReplicaUnreserved::ReplicaUnreserved(my_context ctx)
+    : my_base(ctx)
+    , NamedSimply(
+         context<ScrubMachine>().m_scrbr,
+         "ReplicaActive/ReplicaIdle/ReplicaUnreserved")
+{
+  dout(10) << "-- state -->> ReplicaActive/ReplicaIdle/ReplicaUnreserved"
+          << dendl;
+}
+
+sc::result ReplicaUnreserved::react(const ReplicaReserveReq& ev)
+{
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  dout(10) << "ReplicaUnreserved::react(const ReplicaReserveReq&)" << dendl;
+
+  switch (context<ReplicaActive>().on_reserve_request(ev, false)) {
+    case ReplicaReactCode::discard:
+      return discard_event();
+    case ReplicaReactCode::goto_waiting_reservation:
+      return transit<ReplicaWaitingReservation>();
+    case ReplicaReactCode::goto_replica_reserved:
+      return transit<ReplicaReserved>();
+    default:
+      ceph_abort_msg("unexpected return value");
   }
+  // can't happen, but some compilers complain:
+  return transit<ReplicaReserved>();
 }
 
+sc::result ReplicaUnreserved::react(const StartReplica& ev)
+{
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  dout(10) << "ReplicaUnreserved::react(const StartReplica&)" << dendl;
+  post_event(ReplicaPushesUpd{});
+  return transit<ReplicaActiveOp>();
+}
 
-void ReplicaActive::clear_reservation_by_remote_primary()
+sc::result ReplicaUnreserved::react(const ReplicaRelease&)
 {
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
-  dout(10) << "ReplicaActive::clear_reservation_by_remote_primary()" << dendl;
-  m_osds->get_scrub_services().dec_scrubs_remote(scrbr->get_spgid().pgid);
-  reserved_by_my_primary = false;
+  dout(10) << "ReplicaUnreserved::react(const ReplicaRelease&)" << dendl;
+  // shouldn't happen. Possible (faulty) sequence: getting an op
+  // command while in ReplicaWaitingReservation (as we would just
+  // treat that as a regular op request, but will stop waiting for
+  // reservation).
+  // must cancel the queued reservation request
+  scrbr->get_clog()->error() << fmt::format(
+      "osd.{} pg[{}]: reservation released while not reserved",
+      scrbr->get_whoami(), scrbr->get_spgid());
+  context<ReplicaActive>().clear_reservation_by_remote_primary(true);
+  return discard_event();
 }
 
+sc::result ReplicaUnreserved::react(const ReserverGranted&)
+{
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  dout(10) << "ReplicaUnreserved::react(const ReserverGranted&)" << dendl;
+  // shouldn't happen. Possible (faulty) sequence: getting an op
+  // command while in ReplicaWaitingReservation (as we would just
+  // treat that as a regular op request, but will stop waiting for
+  // reservation).
+  // must unreserve
+  scrbr->get_clog()->error() << fmt::format(
+      "osd.{} pg[{}]: reservation granted while not being waited for",
+      scrbr->get_whoami(), scrbr->get_spgid());
+  context<ReplicaActive>().clear_reservation_by_remote_primary(true);
+  return discard_event();
+}
 
-void ReplicaActive::check_for_updates(const StartReplica& ev)
+
+// ---------------- ReplicaIdle/ReplicaWaitingReservation ---------------------------
+
+ReplicaWaitingReservation::ReplicaWaitingReservation(my_context ctx)
+    : my_base(ctx)
+    , NamedSimply(
+         context<ScrubMachine>().m_scrbr,
+         "ReplicaActive/ReplicaIdle/ReplicaWaitingReservation")
+{
+  dout(10)
+      << "-- state -->> ReplicaActive/ReplicaIdle/ReplicaWaitingReservation"
+      << dendl;
+}
+
+sc::result ReplicaWaitingReservation::react(const ReserverGranted&)
+{
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  dout(10) << "ReplicaWaitingReservation::react(const ReserverGranted&)"
+          << dendl;
+
+  /// \todo complete the handling of the granted reservation
+  ceph_abort_msg("not implemented yet");
+  return discard_event();
+}
+
+sc::result ReplicaWaitingReservation::react(const ReplicaRelease& ev)
+{
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  dout(10) << "ReplicaWaitingReservation::react(const ReplicaRelease&)"
+          << dendl;
+  // must cancel the queued reservation request
+  context<ReplicaActive>().on_release(ev);
+  return transit<ReplicaUnreserved>();
+}
+
+sc::result ReplicaWaitingReservation::react(const StartReplica& ev)
 {
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
-  dout(10) << "ReplicaActive::check_for_updates()" << dendl;
+  dout(10) << "ReplicaWaitingReservation::react(const StartReplica&)" << dendl;
+
+  // this shouldn't happen. We will handle it, but will also log an error.
+  scrbr->get_clog()->error() << fmt::format(
+      "osd.{} pg[{}]: new chunk request while still waiting for "
+      "reservation",
+      scrbr->get_whoami(), scrbr->get_spgid());
+  clear_shallow_history<ReplicaIdle, 0>();
   post_event(ReplicaPushesUpd{});
+  return transit<ReplicaActiveOp>();
 }
 
-// ---------------- ReplicaActive/ReplicaIdle ---------------------------
+sc::result ReplicaWaitingReservation::react(const ReplicaReserveReq& ev)
+{
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  dout(10) << "ReplicaWaitingReservation::react(const ReplicaReserveReq&)"
+          << dendl;
+  // this shouldn't happen. We will handle it, but will also log an error.
+  scrbr->get_clog()->error() << fmt::format(
+      "osd.{} pg[{}]: reservation requested while previous is pending",
+      scrbr->get_whoami(), scrbr->get_spgid());
+  // cancel the existing reservation, and re-request
+  context<ReplicaActive>().clear_reservation_by_remote_primary(true);
+  post_event(ev);
+  return transit<ReplicaUnreserved>();
+}
 
-ReplicaIdle::ReplicaIdle(my_context ctx)
+
+// ---------------- ReplicaIdle/ReplicaReserved ---------------------------
+
+ReplicaReserved::ReplicaReserved(my_context ctx)
     : my_base(ctx)
-    , NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaActive/ReplicaIdle")
+    , NamedSimply(
+         context<ScrubMachine>().m_scrbr,
+         "ReplicaActive/ReplicaIdle/ReplicaReserved")
 {
-  dout(10) << "-- state -->> ReplicaActive/ReplicaIdle" << dendl;
+  dout(10) << "-- state -->> ReplicaActive/ReplicaIdle/ReplicaReserved"
+          << dendl;
 }
 
-void ReplicaIdle::reset_ignored(const FullReset&)
+sc::result ReplicaReserved::react(const ReplicaRelease& ev)
 {
-  dout(10) << "ReplicaIdle::react(const FullReset&): FullReset ignored"
-          << dendl;
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  dout(10) << "ReplicaReserved::react(const ReplicaRelease&)" << dendl;
+  context<ReplicaActive>().on_release(ev);
+  return transit<ReplicaUnreserved>();
+}
+
+sc::result ReplicaReserved::react(const ReplicaReserveReq& ev)
+{
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  dout(10) << "ReplicaReserved::react(const ReplicaReserveReq&)" << dendl;
+  scrbr->get_clog()->error() << fmt::format(
+      "osd.{} pg[{}]: reservation requested while still reserved",
+      scrbr->get_whoami(), scrbr->get_spgid());
+  // cancel the existing reservation, and re-request
+  context<ReplicaActive>().clear_reservation_by_remote_primary(true);
+  post_event(ev);
+  return transit<ReplicaUnreserved>();
 }
 
+sc::result ReplicaReserved::react(const StartReplica& ev)
+{
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  dout(10) << "ReplicaReserved::react(const StartReplica&)" << dendl;
+  post_event(ReplicaPushesUpd{});
+  return transit<ReplicaActiveOp>();
+}
+
+
 // ------------- ReplicaActive/ReplicaActiveOp --------------------------
 
 ReplicaActiveOp::ReplicaActiveOp(my_context ctx)
@@ -909,9 +1105,8 @@ sc::result ReplicaActiveOp::react(const StartReplica&)
   dout(1) << msg << dendl;
   scrbr->get_clog()->warn() << msg;
 
-  post_event(ReplicaPushesUpd{});
-
   // exit & re-enter the state
+  post_event(ReplicaPushesUpd{});
   return transit<ReplicaActiveOp>();
 }
 
@@ -970,15 +1165,19 @@ sc::result ReplicaBuildingMap::react(const SchedReplica&)
     dout(10) << "replica scrub job preempted" << dendl;
 
     scrbr->send_preempted_replica();
-    return transit<ReplicaIdle>();
+    return transit<sc::shallow_history<ReplicaReserved>>();
   }
 
   // start or check progress of build_replica_map_chunk()
   auto ret_init = scrbr->build_replica_map_chunk();
   if (ret_init != -EINPROGRESS) {
-    return transit<ReplicaIdle>();
+    dout(10) << "ReplicaBuildingMap::react(const SchedReplica&): back to idle"
+            << dendl;
+    return transit<sc::shallow_history<ReplicaReserved>>();
   }
 
+  dout(20) << "ReplicaBuildingMap::react(const SchedReplica&): discarded"
+          << dendl;
   return discard_event();
 }
 
index beb4d7a4c0fa7f09e6ec127655d1a08140307216..9b2a800d18823abafcce3747fd272778bd37e03c 100644 (file)
 #include <boost/statechart/event_base.hpp>
 #include <boost/statechart/in_state_reaction.hpp>
 #include <boost/statechart/simple_state.hpp>
+#include <boost/statechart/shallow_history.hpp>
 #include <boost/statechart/state.hpp>
 #include <boost/statechart/state_machine.hpp>
 #include <boost/statechart/transition.hpp>
 
+#include "common/fmt_common.h"
 #include "common/version.h"
 #include "messages/MOSDOp.h"
 #include "messages/MOSDRepScrub.h"
@@ -68,6 +70,10 @@ struct OpCarryingEvent : sc::event<EV> {
   {
     *out << fmt::format("{} (from: {})", EV::event_name, m_from);
   }
+  std::string fmt_print() const
+  {
+    return fmt::format("{} (from: {})", EV::event_name, m_from);
+  }
   std::string_view print() const { return EV::event_name; }
   ~OpCarryingEvent() { on_event_discard(EV::event_name); }
 };
@@ -96,6 +102,8 @@ OP_EV(ReplicaReserveReq);
 /// explicit release request from the Primary
 OP_EV(ReplicaRelease);
 
+/// the async-reserver granted our reservation request
+OP_EV(ReserverGranted);
 
 #define MEV(E)                                          \
   struct E : sc::event<E> {                             \
@@ -222,9 +230,19 @@ struct PrimaryActive;         ///< base state for a Primary
 struct PrimaryIdle;       ///< ready for a new scrub request
 struct Session;            ///< either reserving or actively scrubbing
 
-// the active states for a replica:
-struct ReplicaActive;    ///< the quiescent state for a replica
+// the Replica states:
+struct ReplicaActive;  ///< base state for when peered as a replica
+
+/// Inactive replica state. Handles reservation requests
+struct ReplicaIdle;
+// its sub-states:
+struct ReplicaUnreserved;      ///< not reserved by a primary
+struct ReplicaWaitingReservation;  ///< a reservation request was received from
+struct ReplicaReserved;               ///< we are reserved by our primary
+
+// and when handling a single chunk scrub request op:
 struct ReplicaActiveOp;
+// its sub-states:
 struct ReplicaWaitUpdates;
 struct ReplicaBuildingMap;
 
@@ -710,42 +728,63 @@ struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing>,
  *   - maintain the "I am reserved by a primary" state;
  *   - handles reservation requests
  *
- *     - ReplicaIdle - ready for a new scrub request
- *          * initial state of ReplicaActive
+ *  - ReplicaIdle - ready for a new scrub request
+ *
+ *    - initial state of ReplicaActive
+ *    - No scrubbing is performed in this state, but reservation-related
+ *      events are handled.
+ *    - uses 'shallow history', so that when returning from ReplicaActiveOp, we
+ *       return to where we were - either reserved by our primary, or unreserved.
  *
- *     - ReplicaActiveOp - handling a single map request op
- *          * ReplicaWaitUpdates
- *         * ReplicaBuildingMap
+ *    - sub-states:
+ *      * ReplicaUnreserved - not reserved by a primary. In this state we
+ *        are waiting for either a reservation request, or a chunk scrub op.
+ *
+ *      * ReplicaWaitingReservation - a reservation request was received from
+ *        our primary. We expect a ' go ahead' from the reserver, or a
+ *        cancellation command from the primary (or an interval change).
+ *
+ *      * ReplicaReserved - we are reserved by a primary.
+ *
+ *  - ReplicaActiveOp - handling a single map request op
+ *      * ReplicaWaitUpdates
+ *      * ReplicaBuildingMap
  */
 
 struct ReplicaIdle;
 
-struct ReplicaActive : sc::state<ReplicaActive, ScrubMachine, ReplicaIdle>,
-                        NamedSimply {
+// sc::result cannot be copied or moved, so we need to postpone
+// the creation of such objects to the moment where they are
+// returned from the react() function.
+enum class ReplicaReactCode {
+  discard,
+  goto_waiting_reservation,
+  goto_replica_reserved
+};
+
+struct ReplicaActive : sc::state<
+                          ReplicaActive,
+                          ScrubMachine,
+                          mpl::list<sc::shallow_history<ReplicaIdle>>,
+                          sc::has_shallow_history>,
+                      NamedSimply {
   explicit ReplicaActive(my_context ctx);
   ~ReplicaActive();
 
   /// handle a reservation request from a primary
-  void on_reserve_req(const ReplicaReserveReq&);
+  ReplicaReactCode on_reserve_request(
+      const ReplicaReserveReq&,
+      bool async_request);
 
   /// handle a 'release' from a primary
-  void on_release(const ReplicaRelease&);
+  void on_release(const ReplicaRelease& ev);
 
-  void check_for_updates(const StartReplica&);
+  /// cancel the reserver request.
+  /// The 'failure' re 'log_failure' is logged if we are not reserved to
+  /// begin with.
+  void clear_reservation_by_remote_primary(bool log_failure);
 
-  using reactions = mpl::list<
-      // a reservation request from the primary
-      sc::in_state_reaction<
-         ReplicaReserveReq,
-         ReplicaActive,
-         &ReplicaActive::on_reserve_req>,
-      // an explicit release request from the primary
-      sc::in_state_reaction<
-         ReplicaRelease,
-         ReplicaActive,
-         &ReplicaActive::on_release>,
-      // when the interval ends - we may not be a replica anymore
-      sc::transition<IntervalChanged, NotActive>>;
+  using reactions = mpl::list<sc::transition<IntervalChanged, NotActive>>;
 
  private:
   bool reserved_by_my_primary{false};
@@ -753,40 +792,105 @@ struct ReplicaActive : sc::state<ReplicaActive, ScrubMachine, ReplicaIdle>,
   // shortcuts:
   PG* m_pg;
   OSDService* m_osds;
-
-  /// a convenience internal result structure
-  struct ReservationAttemptRes {
-    MOSDScrubReserve::ReserveMsgOp op; // GRANT or REJECT
-    std::string_view error_msg;
-    bool granted;
-  };
-
-  /// request a scrub resource from our local OSD
-  /// (after performing some checks)
-  ReservationAttemptRes get_remote_reservation();
-
-  void clear_reservation_by_remote_primary();
 };
 
 
-struct ReplicaIdle : sc::state<ReplicaIdle, ReplicaActive>, NamedSimply {
+struct ReplicaIdle : sc::state<
+                        ReplicaIdle,
+                        ReplicaActive,
+                        ReplicaUnreserved,
+                        sc::has_shallow_history>,
+                    NamedSimply {
   explicit ReplicaIdle(my_context ctx);
   ~ReplicaIdle() = default;
   void reset_ignored(const FullReset&);
+  using reactions = mpl::list<sc::in_state_reaction<
+      FullReset,
+      ReplicaIdle,
+      &ReplicaIdle::reset_ignored>>;
+};
+
+/*
+ * ReplicaUnreserved
+ *
+ * Possible events:
+ * - a reservation request from a legacy primary (i.e. a primary that does not
+ *   support queued reservations). We either deny or grant, transitioning to
+ *   ReplicaReserved directly.
+ * - a reservation request from a primary that supports queued reservations.
+ *   We transition to ReplicaWaitingReservation, and wait for the Reserver's
+ *   response.
+ * - (handled by our parent state) a chunk scrub request. We transition to
+ *   ReplicaActiveOp.
+ */
+struct ReplicaUnreserved : sc::state<ReplicaUnreserved, ReplicaIdle>,
+                          NamedSimply {
+  explicit ReplicaUnreserved(my_context ctx);
 
-  // note the execution of check_for_updates() when transitioning to
-  // ReplicaActiveOp/ReplicaWaitUpdates. That would trigger a ReplicaPushesUpd
-  // event, which will be handled by ReplicaWaitUpdates.
   using reactions = mpl::list<
-      sc::transition<
-         StartReplica,
-         ReplicaWaitUpdates,
-         ReplicaActive,
-         &ReplicaActive::check_for_updates>,
-      sc::in_state_reaction<
-         FullReset,
-         ReplicaIdle,
-         &ReplicaIdle::reset_ignored>>;
+      sc::custom_reaction<ReplicaReserveReq>,
+      sc::custom_reaction<ReplicaRelease>,
+      sc::custom_reaction<ReserverGranted>,
+      sc::custom_reaction<StartReplica>>;
+
+  sc::result react(const ReplicaReserveReq& ev);
+  sc::result react(const StartReplica& ev);
+  sc::result react(const ReserverGranted&);
+  sc::result react(const ReplicaRelease&);
+};
+
+/**
+ * ReplicaWaitingReservation
+ *
+ * Possible events:
+ * - 'go ahead' from the async reserver. We send a GRANT message to the
+ *   primary & transition to ReplicaReserved.
+ * - 'cancel' from the primary. We clear our reservation state, and transition
+ *   back to ReplicaUnreserved.
+ * - a chunk request: shouldn't happen, but we handle it anyway. An error
+ *   is logged (to trigger test failures).
+ * - on interval change: handled by our parent state.
+ */
+struct ReplicaWaitingReservation
+    : sc::state<ReplicaWaitingReservation, ReplicaIdle>,
+      NamedSimply {
+  explicit ReplicaWaitingReservation(my_context ctx);
+
+  using reactions = mpl::list<
+      // the 'normal' (expected) events:
+      sc::custom_reaction<ReplicaRelease>,
+      sc::custom_reaction<StartReplica>,
+      // unexpected (bug-induced) events:
+      sc::custom_reaction<ReplicaReserveReq>,
+      sc::custom_reaction<ReserverGranted>>;
+
+  sc::result react(const ReplicaRelease& ev);
+  sc::result react(const StartReplica& ev);
+  sc::result react(const ReserverGranted&);
+  sc::result react(const ReplicaReserveReq& ev);
+};
+
+/**
+ * ReplicaReserved
+ *
+ * Possible events:
+ * - 'cancel' from the primary. We clear our reservation state, and transition
+ *   back to ReplicaUnreserved.
+ * - a chunk scrub request. We transition to ReplicaActiveOp.
+ * - on interval change: we clear our reservation state, and transition
+ *   back to ReplicaUnreserved.
+ */
+struct ReplicaReserved : sc::state<ReplicaReserved, ReplicaIdle>, NamedSimply {
+  explicit ReplicaReserved(my_context ctx);
+
+  using reactions = mpl::list<
+      sc::custom_reaction<ReplicaReserveReq>,
+      sc::custom_reaction<StartReplica>,
+      sc::custom_reaction<ReplicaRelease>>;
+
+  sc::result react(const ReplicaReserveReq&);
+  sc::result react(const ReplicaRelease&);
+  sc::result react(const StartReplica& eq);
 };
 
 
@@ -833,7 +937,6 @@ struct ReplicaWaitUpdates : sc::state<ReplicaWaitUpdates, ReplicaActiveOp>,
   sc::result react(const ReplicaPushesUpd&);
 };
 
-
 struct ReplicaBuildingMap : sc::state<ReplicaBuildingMap, ReplicaActiveOp>,
                            NamedSimply {
   explicit ReplicaBuildingMap(my_context ctx);
index ff9587bb60b033aee07e6aa875fb123fb2c191af..afb154e3e16208184d23b53704e0604f7eeed197 100644 (file)
 #include "common/Formatter.h"
 #include "osd/osd_types.h"
 
+/*
+ * AsyncReserver for scrub 'remote' reservations
+ * -----------------------------------------------
+ *
+ * On the replica side, all reservations are treated as having the same priority.
+ * Note that 'high priority' scrubs, e.g. user-initiated scrubs, are not required
+ * to perform any reservations, and are never handled by the replicas' OSD.
+ *
+ * A queued scrub reservation request is cancelled by any of the following events:
+ *
+ * - a new interval: in this case, we do not expect to see a cancellation request
+ *   from the primary, and we can simply remove the request from the queue;
+ *
+ * - a cancellation request from the primary: probably a result of timing out on
+ *   the reservation process. Here, we can simply remove the request from the queue.
+ *
+ * - a new reservation request for the same PG: which means we had missed the
+ *   previous cancellation request. We cancel the previous request, and replace
+ *   it with the new one. We would also issue an error log message.
+ *
+ * Primary/Replica with differing versions:
+ *
+ * The updated version of MOSDScrubReserve contains a new 'OK to queue' field.
+ * For legacy Primary OSDs, this field is decoded as 'false', and the replica
+ * responds immediately, with grant/rejection.
+*/
+
 namespace Scrub {
 
 /**
@@ -73,6 +100,9 @@ class ScrubResources {
   /// increments the number of scrubs acting as a Replica
   bool inc_scrubs_remote(pg_t pgid);
 
+  /// queue a request with the scrub reserver
+  void enqueue_remote_reservation(pg_t pgid) {}
+
   /// decrements the number of scrubs acting as a Replica
   void dec_scrubs_remote(pg_t pgid);