]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: handle Release messages directly in ReplicaIdle
authorRonen Friedman <rfriedma@redhat.com>
Mon, 25 Mar 2024 13:06:41 +0000 (08:06 -0500)
committerRonen Friedman <rfriedma@redhat.com>
Tue, 2 Apr 2024 11:16:01 +0000 (06:16 -0500)
The expected path is that the scrubber will be in the ReplicaIdle
sub-state. If, however, we are processing a chunk (ReplicaActiveOp) -
the scrub is aborted, and the scrubber is returned to the ReplicaIdle.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/scrubber/scrub_machine.cc
src/osd/scrubber/scrub_machine.h

index 03b37c032aa5c142a9c30ea6fa080010a3fe45ee..e4b41977d7744299904a266022e3eabd0d09c5be 100644 (file)
@@ -897,14 +897,6 @@ sc::result ReplicaActive::react(const ReserverGranted& ev)
 }
 
 
-void ReplicaActive::on_release(const ReplicaRelease& ev)
-{
-  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
-  dout(10) << fmt::format("ReplicaActive::on_release() from {}", ev.m_from)
-          << dendl;
-  clear_remote_reservation(true);
-}
-
 void ReplicaActive::clear_remote_reservation(bool warn_if_no_reservation)
 {
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
@@ -917,16 +909,31 @@ void ReplicaActive::clear_remote_reservation(bool warn_if_no_reservation)
     m_osds->get_scrub_reserver().cancel_reservation(pg_id);
     reservation_granted = false;
     pending_reservation_nonce = 0;
+    ceph_assert(m_reservation_status != reservation_status_t::unreserved);
+    m_reservation_status = reservation_status_t::unreserved;
+
   } else if (warn_if_no_reservation) {
     const auto msg =
        "ReplicaActive::clear_remote_reservation(): "
        "not reserved!";
     dout(5) << msg << dendl;
-    scrbr->get_clog()->warn() << msg;
+    scrbr->get_clog()->info() << msg;
   }
 }
 
 
+sc::result ReplicaActive::react(const ReplicaRelease& ev)
+{
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  dout(10) << fmt::format(
+                 "ReplicaActive::react(const ReplicaRelease&) from {}",
+                 ev.m_from)
+          << dendl;
+  clear_remote_reservation(true);
+  return discard_event();
+}
+
+
 // ---------------- ReplicaActive/ReplicaIdle ---------------------------
 
 ReplicaIdle::ReplicaIdle(my_context ctx)
@@ -964,18 +971,6 @@ sc::result ReplicaUnreserved::react(const StartReplica& ev)
   return transit<ReplicaActiveOp>();
 }
 
-sc::result ReplicaUnreserved::react(const ReplicaRelease&)
-{
-  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
-  dout(10) << "ReplicaUnreserved::react(const ReplicaRelease&)" << dendl;
-  // this is a bug. We should never receive a release request unless we
-  // are reserved or have a pending reservation.
-  scrbr->get_clog()->error() << fmt::format(
-      "osd.{} pg[{}]: reservation released while not reserved",
-      scrbr->get_whoami(), scrbr->get_spgid());
-  return discard_event();
-}
-
 
 // ---------------- ReplicaIdle/ReplicaWaitingReservation ---------------------------
 
@@ -990,16 +985,6 @@ ReplicaWaitingReservation::ReplicaWaitingReservation(my_context ctx)
       << dendl;
 }
 
-sc::result ReplicaWaitingReservation::react(const ReplicaRelease& ev)
-{
-  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
-  dout(10) << "ReplicaWaitingReservation::react(const ReplicaRelease&)"
-          << dendl;
-  // must cancel the queued reservation request
-  context<ReplicaActive>().on_release(ev);
-  return transit<ReplicaUnreserved>();
-}
-
 sc::result ReplicaWaitingReservation::react(const StartReplica& ev)
 {
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
@@ -1028,14 +1013,6 @@ ReplicaReserved::ReplicaReserved(my_context ctx)
           << dendl;
 }
 
-sc::result ReplicaReserved::react(const ReplicaRelease& ev)
-{
-  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
-  dout(10) << "ReplicaReserved::react(const ReplicaRelease&)" << dendl;
-  context<ReplicaActive>().on_release(ev);
-  return transit<ReplicaUnreserved>();
-}
-
 sc::result ReplicaReserved::react(const StartReplica& ev)
 {
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
@@ -1098,8 +1075,7 @@ sc::result ReplicaActiveOp::react(const ReplicaReserveReq& ev)
 sc::result ReplicaActiveOp::react(const ReplicaRelease& ev)
 {
   dout(10) << "ReplicaActiveOp::react(const ReplicaRelease&)" << dendl;
-  post_event(ev);
-  return transit<ReplicaReserved>();
+  return transit<ReplicaActive>();
 }
 
 
index 3fc53da8e9e9809c4869a9099c59f8d5c318adae..081baaf943b2e7d4016499251383bd87433e795f 100644 (file)
@@ -769,16 +769,6 @@ struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing>,
  *    - No scrubbing is performed in this state, but reservation-related
  *      events are handled.
  *
- *    - sub-states:
- *      * ReplicaUnreserved - not reserved by a primary. In this state we
- *        are waiting for either a reservation request, or a chunk scrub op.
- *
- *      * ReplicaWaitingReservation - a reservation request was received from
- *        our primary. We expect a ' go ahead' from the reserver, or a
- *        cancellation command from the primary (or an interval change).
- *
- *      * ReplicaReserved - we are reserved by a primary.
- *
  *  - ReplicaActiveOp - handling a single map request op
  *      * ReplicaWaitUpdates
  *      * ReplicaBuildingMap
@@ -826,9 +816,6 @@ struct ReplicaActive : sc::state<ReplicaActive, ScrubMachine, ReplicaIdle>,
   explicit ReplicaActive(my_context ctx);
   ~ReplicaActive();
 
-  /// handle a 'release' from a primary
-  void on_release(const ReplicaRelease& ev);
-
   /**
    * cancel a granted or pending reservation
    *
@@ -842,11 +829,18 @@ struct ReplicaActive : sc::state<ReplicaActive, ScrubMachine, ReplicaIdle>,
   using reactions = mpl::list<
       sc::transition<IntervalChanged, NotActive>,
       sc::custom_reaction<ReserverGranted>,
-      sc::custom_reaction<ReplicaReserveReq>>;
+      sc::custom_reaction<ReplicaReserveReq>,
+      sc::custom_reaction<ReplicaRelease>>;
 
   /// handle a reservation request from a primary
   sc::result react(const ReplicaReserveReq& ev);
 
+  /*
+   * the Primary released the reservation.
+   * Note: The ActiveReplicaOp state will handle this event as well.
+   */
+  sc::result react(const ReplicaRelease&);
+
   /**
    * the queued reservation request was granted by the async reserver.
    * Notify the Primary.
@@ -942,12 +936,9 @@ struct ReplicaUnreserved : sc::state<ReplicaUnreserved, ReplicaIdle>,
   explicit ReplicaUnreserved(my_context ctx);
 
   using reactions = mpl::list<
-      sc::custom_reaction<StartReplica>,
-      // unexpected (bug-induced) events:
-      sc::custom_reaction<ReplicaRelease>>;
+      sc::custom_reaction<StartReplica>>;
 
   sc::result react(const StartReplica& ev);
-  sc::result react(const ReplicaRelease&);
 };
 
 /**
@@ -968,11 +959,8 @@ struct ReplicaWaitingReservation
   explicit ReplicaWaitingReservation(my_context ctx);
 
   using reactions = mpl::list<
-      // the 'normal' (expected) events:
-      sc::custom_reaction<ReplicaRelease>,
       sc::custom_reaction<StartReplica>>;
 
-  sc::result react(const ReplicaRelease& ev);
   sc::result react(const StartReplica& ev);
 };
 
@@ -990,10 +978,8 @@ struct ReplicaReserved : sc::state<ReplicaReserved, ReplicaIdle>, NamedSimply {
   explicit ReplicaReserved(my_context ctx);
 
   using reactions = mpl::list<
-      sc::custom_reaction<StartReplica>,
-      sc::custom_reaction<ReplicaRelease>>;
+      sc::custom_reaction<StartReplica>>;
 
-  sc::result react(const ReplicaRelease&);
   sc::result react(const StartReplica& eq);
 };
 
@@ -1029,10 +1015,8 @@ struct ReplicaActiveOp
 
   /**
    * a 'release' was send by the primary. Possible scenario: 'no-scrub'
-   * abort. Our two-steps reaction:
-   * - we exit the 'ActiveOp' state, and
-   * - we make sure the 'release' is remembered, to be handled by the state
-   *   we would transition into (which should be ReplicaReserved).
+   * abort. We abort the current chunk handling and re-enter ReplicaActive,
+   * releasing the reservation on the way.
    */
   sc::result react(const ReplicaRelease&);