]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: handle 'release' events sent during 'scrub abort' 56132/head
authorRonen Friedman <rfriedma@redhat.com>
Mon, 11 Mar 2024 17:54:01 +0000 (12:54 -0500)
committerRonen Friedman <rfriedma@redhat.com>
Tue, 12 Mar 2024 13:53:05 +0000 (08:53 -0500)
Scenario:
- the replica is reserved;
- the Primary initiates a chunk operation;
- the replica is in ReplicaActive/ReplicaActiveOp/ReplicaBuildingMap
- 'no-scrub' is set, and the Primary sends a 'release' event to the
  replica.

Desired behavior:
- the replica aborts the chunk operation and transitions to
  ReplicaReserved;
- the 'release' event is delivered in the new state.

Fixes: https://tracker.ceph.com/issues/64827
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/scrubber/scrub_machine.cc
src/osd/scrubber/scrub_machine.h

index ce4196e1ebbb13a832d3499a7d14b4e8275450d1..33da35cd8c85b3c2221c5d998807cee2a9e682e5 100644 (file)
@@ -886,7 +886,7 @@ void ReplicaActive::clear_remote_reservation(bool warn_if_no_reservation)
   dout(10) << fmt::format(
                  "ReplicaActive::clear_remote_reservation(): "
                  "pending_reservation_nonce {}, reservation_granted {}",
-                 reservation_granted, pending_reservation_nonce)
+                 pending_reservation_nonce, reservation_granted)
           << dendl;
   if (reservation_granted || pending_reservation_nonce) {
     m_osds->get_scrub_reserver().cancel_reservation(pg_id);
@@ -1149,6 +1149,14 @@ sc::result ReplicaActiveOp::react(const StartReplica&)
   return transit<ReplicaActiveOp>();
 }
 
+sc::result ReplicaActiveOp::react(const ReplicaRelease& ev)
+{
+  dout(10) << "ReplicaActiveOp::react(const ReplicaRelease&)" << dendl;
+  post_event(ev);
+  return transit<sc::shallow_history<ReplicaReserved>>();
+}
+
+
 // ------------- ReplicaActive/ReplicaWaitUpdates ------------------------
 
 ReplicaWaitUpdates::ReplicaWaitUpdates(my_context ctx)
index 254e7861ed9568eae79a086f2a863ece7662d30b..d56059c6bf87fd653ca51e2b9907411a898543ab 100644 (file)
@@ -1047,6 +1047,7 @@ struct ReplicaActiveOp
 
   using reactions = mpl::list<
       sc::custom_reaction<StartReplica>,
+      sc::custom_reaction<ReplicaRelease>,
       sc::transition<FullReset, ReplicaIdle>>;
 
   /**
@@ -1060,6 +1061,15 @@ struct ReplicaActiveOp
    * - and we should log this unexpected scenario clearly in the cluster log.
    */
   sc::result react(const StartReplica&);
+
+  /**
+   * a 'release' was send by the primary. Possible scenario: 'no-scrub'
+   * abort. Our two-steps reaction:
+   * - we exit the 'ActiveOp' state, and
+   * - we make sure the 'release' is remembered, to be handled by the state
+   *   we would transition into (which should be ReplicaReserved).
+   */
+  sc::result react(const ReplicaRelease&);
 };
 
 /*