]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: discard ReplicaIdle sub-states
authorRonen Friedman <rfriedma@redhat.com>
Mon, 25 Mar 2024 14:27:31 +0000 (09:27 -0500)
committerRonen Friedman <rfriedma@redhat.com>
Tue, 2 Apr 2024 11:16:01 +0000 (06:16 -0500)
The 'reservation requested' & 'reservation was granted' states
are now maintained directly in the ReplicaActive state.

'StartReplica' (or 'do process a chunk') - the last event
handled by the sub-states - is now handled by ReplicaIdle.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/scrubber/scrub_machine.cc
src/osd/scrubber/scrub_machine.h

index e4b41977d7744299904a266022e3eabd0d09c5be..d6c8cb43028626467cf25985bd01ed843f0cd0bf 100644 (file)
@@ -767,6 +767,12 @@ ReplicaActive::~ReplicaActive()
   clear_remote_reservation(false);
 }
 
+void ReplicaActive::exit()
+{
+  dout(20) << "ReplicaActive::exit()" << dendl;
+}
+
+
 /*
  * Note: we are expected to be in the initial internal state (Idle) when
  * receiving any registration request. ReplicaActiveOp, our other internal
@@ -943,84 +949,32 @@ ReplicaIdle::ReplicaIdle(my_context ctx)
   dout(10) << "-- state -->> ReplicaActive/ReplicaIdle" << dendl;
 }
 
-void ReplicaIdle::reset_ignored(const FullReset&)
-{
-  dout(10) << "ReplicaIdle::react(const FullReset&): FullReset ignored"
-          << dendl;
-}
-
-
-// ---------------- ReplicaIdle/ReplicaUnreserved ---------------------------
 
-ReplicaUnreserved::ReplicaUnreserved(my_context ctx)
-    : my_base(ctx)
-    , NamedSimply(
-         context<ScrubMachine>().m_scrbr,
-         "ReplicaActive/ReplicaIdle/ReplicaUnreserved")
-{
-  dout(10) << "-- state -->> ReplicaActive/ReplicaIdle/ReplicaUnreserved"
-          << dendl;
-}
-
-
-sc::result ReplicaUnreserved::react(const StartReplica& ev)
+sc::result ReplicaIdle::react(const StartReplica& ev)
 {
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
-  dout(10) << "ReplicaUnreserved::react(const StartReplica&)" << dendl;
-  post_event(ReplicaPushesUpd{});
-  return transit<ReplicaActiveOp>();
-}
-
-
-// ---------------- ReplicaIdle/ReplicaWaitingReservation ---------------------------
-
-ReplicaWaitingReservation::ReplicaWaitingReservation(my_context ctx)
-    : my_base(ctx)
-    , NamedSimply(
-         context<ScrubMachine>().m_scrbr,
-         "ReplicaActive/ReplicaIdle/ReplicaWaitingReservation")
-{
-  dout(10)
-      << "-- state -->> ReplicaActive/ReplicaIdle/ReplicaWaitingReservation"
-      << dendl;
-}
-
-sc::result ReplicaWaitingReservation::react(const StartReplica& ev)
-{
-  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
-  dout(10) << "ReplicaWaitingReservation::react(const StartReplica&)" << dendl;
-
-  // this shouldn't happen. We will handle it, but will also log an error.
-  scrbr->get_clog()->error() << fmt::format(
-      "osd.{} pg[{}]: new chunk request while still waiting for "
-      "reservation",
-      scrbr->get_whoami(), scrbr->get_spgid());
-  context<ReplicaActive>().clear_remote_reservation(true);
+  dout(10) << "ReplicaIdle::react(const StartReplica&)" << dendl;
+
+  // if we are waiting for a reservation grant from the reserver (an
+  // illegal scenario!), that reservation must be cleared.
+  if (context<ReplicaActive>().pending_reservation_nonce) {
+    scrbr->get_clog()->warn() << fmt::format(
+       "osd.{} pg[{}]: new chunk request while still waiting for "
+       "reservation",
+       scrbr->get_whoami(), scrbr->get_spgid());
+    context<ReplicaActive>().clear_remote_reservation(true);
+  }
   post_event(ReplicaPushesUpd{});
   return transit<ReplicaActiveOp>();
 }
 
 
-// ---------------- ReplicaIdle/ReplicaReserved ---------------------------
-
-ReplicaReserved::ReplicaReserved(my_context ctx)
-    : my_base(ctx)
-    , NamedSimply(
-         context<ScrubMachine>().m_scrbr,
-         "ReplicaActive/ReplicaIdle/ReplicaReserved")
+void ReplicaIdle::reset_ignored(const FullReset&)
 {
-  dout(10) << "-- state -->> ReplicaActive/ReplicaIdle/ReplicaReserved"
+  dout(10) << "ReplicaIdle::react(const FullReset&): FullReset ignored"
           << dendl;
 }
 
-sc::result ReplicaReserved::react(const StartReplica& ev)
-{
-  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
-  dout(10) << "ReplicaReserved::react(const StartReplica&)" << dendl;
-  post_event(ReplicaPushesUpd{});
-  return transit<ReplicaActiveOp>();
-}
-
 
 // ------------- ReplicaActive/ReplicaActiveOp --------------------------
 
@@ -1134,20 +1088,22 @@ sc::result ReplicaBuildingMap::react(const SchedReplica&)
     dout(10) << "replica scrub job preempted" << dendl;
 
     scrbr->send_preempted_replica();
-    return transit<ReplicaReserved>();
+    return transit<ReplicaIdle>();
   }
 
   // start or check progress of build_replica_map_chunk()
-  auto ret_init = scrbr->build_replica_map_chunk();
-  if (ret_init != -EINPROGRESS) {
-    dout(10) << "ReplicaBuildingMap::react(const SchedReplica&): back to idle"
-            << dendl;
-    return transit<ReplicaReserved>();
+  if (scrbr->build_replica_map_chunk() == -EINPROGRESS) {
+    // Must ask the backend for the next stride shortly.
+    // build_replica_map_chunk() has already requeued us.
+    dout(20) << "waiting for the backend..." << dendl;
+    return discard_event();
   }
 
-  dout(20) << "ReplicaBuildingMap::react(const SchedReplica&): discarded"
+  // Note: build_replica_map_chunk() aborts the OSD on any backend retval
+  // which is not -EINPROGRESS or 0 ('done').
+  dout(10) << "ReplicaBuildingMap::react(const SchedReplica&): chunk done"
           << dendl;
-  return discard_event();
+  return transit<ReplicaIdle>();
 }
 
 }  // namespace Scrub
index 081baaf943b2e7d4016499251383bd87433e795f..3048b00688370932a3ddc046a0bc17500eac7cba 100644 (file)
@@ -268,12 +268,8 @@ struct Session;            ///< either reserving or actively scrubbing
 // the Replica states:
 struct ReplicaActive;  ///< base state for when peered as a replica
 
-/// Inactive replica state. Handles reservation requests
+/// Inactive replica state
 struct ReplicaIdle;
-// its sub-states:
-struct ReplicaUnreserved;      ///< not reserved by a primary
-struct ReplicaWaitingReservation;  ///< a reservation request was received from
-struct ReplicaReserved;               ///< we are reserved by our primary
 
 // and when handling a single chunk scrub request op:
 struct ReplicaActiveOp;
@@ -815,6 +811,7 @@ struct ReplicaActive : sc::state<ReplicaActive, ScrubMachine, ReplicaIdle>,
                       NamedSimply {
   explicit ReplicaActive(my_context ctx);
   ~ReplicaActive();
+  void exit();
 
   /**
    * cancel a granted or pending reservation
@@ -847,6 +844,12 @@ struct ReplicaActive : sc::state<ReplicaActive, ScrubMachine, ReplicaIdle>,
    */
   sc::result react(const ReserverGranted&);
 
+  /**
+   * a reservation request with this nonce is queued at the scrub_reserver,
+   * and was not yet granted.
+   */
+  MOSDScrubReserve::reservation_nonce_t pending_reservation_nonce{0};
+
  private:
   PG* m_pg;
   OSDService* m_osds;
@@ -878,12 +881,6 @@ struct ReplicaActive : sc::state<ReplicaActive, ScrubMachine, ReplicaIdle>,
 
   reservation_status_t m_reservation_status{reservation_status_t::unreserved};
 
-  /**
-   * a reservation request with this nonce is queued at the scrub_reserver,
-   * and was not yet granted.
-   */
-  MOSDScrubReserve::reservation_nonce_t pending_reservation_nonce{0};
-
   // clang-format off
   struct RtReservationCB : public Context {
     PGRef pg;
@@ -904,83 +901,18 @@ struct ReplicaActive : sc::state<ReplicaActive, ScrubMachine, ReplicaIdle>,
 };
 
 
-struct ReplicaIdle : sc::state<
-                        ReplicaIdle,
-                        ReplicaActive,
-                        ReplicaUnreserved>,
-                    NamedSimply {
+struct ReplicaIdle : sc::state<ReplicaIdle, ReplicaActive>, NamedSimply {
   explicit ReplicaIdle(my_context ctx);
   ~ReplicaIdle() = default;
   void reset_ignored(const FullReset&);
-  using reactions = mpl::list<sc::in_state_reaction<
-      FullReset,
-      ReplicaIdle,
-      &ReplicaIdle::reset_ignored>>;
-};
-
-/*
- * ReplicaUnreserved
- *
- * Possible events:
- * - a reservation request from a legacy primary (i.e. a primary that does not
- *   support queued reservations). We either deny or grant, transitioning to
- *   ReplicaReserved directly.
- * - a reservation request from a primary that supports queued reservations.
- *   We transition to ReplicaWaitingReservation, and wait for the Reserver's
- *   response.
- * - (handled by our parent state) a chunk scrub request. We transition to
- *   ReplicaActiveOp.
- */
-struct ReplicaUnreserved : sc::state<ReplicaUnreserved, ReplicaIdle>,
-                          NamedSimply {
-  explicit ReplicaUnreserved(my_context ctx);
-
-  using reactions = mpl::list<
-      sc::custom_reaction<StartReplica>>;
-
-  sc::result react(const StartReplica& ev);
-};
-
-/**
- * ReplicaWaitingReservation
- *
- * Possible events:
- * - 'go ahead' from the async reserver. We send a GRANT message to the
- *   primary & transition to ReplicaReserved.
- * - 'cancel' from the primary. We clear our reservation state, and transition
- *   back to ReplicaUnreserved.
- * - a chunk request: shouldn't happen, but we handle it anyway. An error
- *   is logged (to trigger test failures).
- * - on interval change: handled by our parent state.
- */
-struct ReplicaWaitingReservation
-    : sc::state<ReplicaWaitingReservation, ReplicaIdle>,
-      NamedSimply {
-  explicit ReplicaWaitingReservation(my_context ctx);
-
   using reactions = mpl::list<
-      sc::custom_reaction<StartReplica>>;
-
-  sc::result react(const StartReplica& ev);
-};
-
-/**
- * ReplicaReserved
- *
- * Possible events:
- * - 'cancel' from the primary. We clear our reservation state, and transition
- *   back to ReplicaUnreserved.
- * - a chunk scrub request. We transition to ReplicaActiveOp.
- * - on interval change: we clear our reservation state, and transition
- *   back to ReplicaUnreserved.
- */
-struct ReplicaReserved : sc::state<ReplicaReserved, ReplicaIdle>, NamedSimply {
-  explicit ReplicaReserved(my_context ctx);
-
-  using reactions = mpl::list<
-      sc::custom_reaction<StartReplica>>;
+      sc::custom_reaction<StartReplica>,
+      sc::in_state_reaction<
+         FullReset,
+         ReplicaIdle,
+         &ReplicaIdle::reset_ignored>>;
 
 sc::result react(const StartReplica& eq);
sc::result react(const StartReplica& ev);
 };