osd/scrubber: associate replica state with state machine states

author Samuel Just <sjust@redhat.com>

Thu, 23 Feb 2023 05:04:17 +0000 (21:04 -0800)

committer Samuel Just <sjust@redhat.com>

Wed, 12 Apr 2023 03:39:19 +0000 (20:39 -0700)
author Samuel Just <sjust@redhat.com>
Thu, 23 Feb 2023 05:04:17 +0000 (21:04 -0800)
committer Samuel Just <sjust@redhat.com>
Wed, 12 Apr 2023 03:39:19 +0000 (20:39 -0700)
diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc

index 19288758cd73dd8b1499fc16c526f7c2e778f29d..93749e7c5a5b94b6690d9042cb92f8f8f3c2dd2b 100644 (file)
--- a/src/osd/scrubber/pg_scrubber.cc
+++ b/src/osd/scrubber/pg_scrubber.cc
@@ -215,6 +215,16 @@ void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued)
    }
  }
  
+void PgScrubber::dec_scrubs_remote()
+{
+  m_osds->get_scrub_services().dec_scrubs_remote();
+}
+
+void PgScrubber::advance_token()
+{
+  m_current_token++;
+}
+
  void PgScrubber::initiate_scrub_after_repair(epoch_t epoch_queued)
  {
    dout(15) << __func__ << " epoch: " << epoch_queued << dendl;
@@ -1392,27 +1402,6 @@ void PgScrubber::replica_scrub_op(OpRequestRef op)
      return;
    }
  
-  if (is_queued_or_active()) {
-    // this is bug!
-    // Somehow, we have received a new scrub request from our Primary, before
-    // having finished with the previous one. Did we go through an interval
-    // change without reseting the FSM? Possible responses:
-    // - crashing (the original assert_not_active() implemented that one), or
-    // - trying to recover:
-    //  - (logging enough information to debug this scenario)
-    //  - reset the FSM.
-    m_osds->clog->warn() << fmt::format(
-      "{}: error: a second scrub-op received while handling the previous one",
-      __func__);
-
-    scrub_clear_state();
-    m_osds->clog->warn() << fmt::format(
-      "{}: after a reset. Now handling the new OP",
-      __func__);
-  }
-  // make sure the FSM is at NotActive
-  m_fsm->assert_not_active();
-
    replica_scrubmap = ScrubMap{};
    replica_scrubmap_pos = ScrubMapBuilder{};
  
@@ -1587,29 +1576,19 @@ void PgScrubber::handle_scrub_reserve_request(OpRequestRef op)
      return;
    }
  
-  // Purely a debug check, PgScrubber::on_new_interval should have cleared this
-  if (m_remote_osd_resource.has_value()) {
-    epoch_t e = m_remote_osd_resource->get_reservation_epoch();
-    if (!check_interval(e)) {
-      derr << __func__ << ": BUG: stale remote osd resource from epoch " << e
-          << dendl;
-    }
-  }
-
    /* The primary may unilaterally restart the scrub process without notifying
     * replicas.  Unconditionally clear any existing state prior to handling
     * the new reservation. */
-  reset_replica_state();
+  m_fsm->process_event(FullReset{});
    
    bool granted{false};
    if (m_pg->cct->_conf->osd_scrub_during_recovery ||
        !m_osds->is_recovery_active()) {
-    m_remote_osd_resource.emplace(this, m_pg, m_osds, request_ep);
-    // OSD resources allocated?
-    granted = m_remote_osd_resource->is_reserved();
-    if (!granted) {
-      // just forget it
-      m_remote_osd_resource.reset();
+
+    granted = m_osds->get_scrub_services().inc_scrubs_remote();
+    if (granted) {
+      m_fsm->process_event(ReplicaGrantReservation{});
+    } else {
        dout(20) << __func__ << ": failed to reserve remotely" << dendl;
      }
    } else {
@@ -1660,7 +1639,7 @@ void PgScrubber::handle_scrub_reserve_release(OpRequestRef op)
     * this specific scrub session has terminated. All incoming events carrying
     *  the old tag will be discarded.
     */
-  reset_replica_state();
+  m_fsm->process_event(FullReset{});
  }
  
  void PgScrubber::discard_replica_reservations()
@@ -1676,7 +1655,6 @@ void PgScrubber::clear_scrub_reservations()
    dout(10) << __func__ << dendl;
    m_reservations.reset();        // the remote reservations
    m_local_osd_resource.reset();          // the local reservation
-  m_remote_osd_resource.reset();  // we as replica reserved for a Primary
  }
  
  void PgScrubber::message_all_replicas(int32_t opcode, std::string_view op_text)
@@ -2337,17 +2315,6 @@ void PgScrubber::reset_internal_state()
    m_be.reset();
  }
  
-void PgScrubber::reset_replica_state()
-{
-  dout(10) << fmt::format("{}: prev. token:{}", __func__, m_current_token)
-          << dendl;
-
-  m_current_token++;
-  m_remote_osd_resource.reset();
-  replica_handling_done();
-  m_fsm->process_event(FullReset{});
-}
-
  bool PgScrubber::is_token_current(Scrub::act_token_t received_token)
  {
    if (received_token == 0 || received_token == m_current_token) {
@@ -2745,41 +2712,6 @@ LocalReservation::~LocalReservation()
    }
  }
  
-// ///////////////////// ReservedByRemotePrimary ///////////////////////////////
-
-ReservedByRemotePrimary::ReservedByRemotePrimary(const PgScrubber* scrubber,
-                                                PG* pg,
-                                                OSDService* osds,
-                                                epoch_t epoch)
-    : m_scrubber{scrubber}
-    , m_pg{pg}
-    , m_osds{osds}
-    , m_reserved_at{epoch}
-{
-  if (!m_osds->get_scrub_services().inc_scrubs_remote()) {
-    dout(10) << __func__ << ": failed to reserve at Primary request" << dendl;
-    // the failure is signalled by not having m_reserved_by_remote_primary set
-    return;
-  }
-
-  dout(20) << __func__ << ": scrub resources reserved at Primary request"
-          << dendl;
-  m_reserved_by_remote_primary = true;
-}
-
-ReservedByRemotePrimary::~ReservedByRemotePrimary()
-{
-  if (m_reserved_by_remote_primary) {
-    m_reserved_by_remote_primary = false;
-    m_osds->get_scrub_services().dec_scrubs_remote();
-  }
-}
-
-std::ostream& ReservedByRemotePrimary::gen_prefix(std::ostream& out) const
-{
-  return m_scrubber->gen_prefix(out);
-}
-
  // ///////////////////// MapsCollectionStatus ////////////////////////////////
  
  auto MapsCollectionStatus::mark_arriving_map(pg_shard_t from)
diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h

index f67987a4b44e29f4dabaf80596dfc70077e4495e..b44bf40e500ad01a6d8dbf81483249d2399ca791 100644 (file)
--- a/src/osd/scrubber/pg_scrubber.h
+++ b/src/osd/scrubber/pg_scrubber.h
@@ -189,33 +189,6 @@ class LocalReservation {
    bool is_reserved() const { return m_holding_local_reservation; }
  };
  
-/**
- *  wraps the OSD resource we are using when reserved as a replica by a
- *  scrubbing primary.
- */
-class ReservedByRemotePrimary {
-  const PgScrubber* m_scrubber;         ///< we will be using its gen_prefix()
-  PG* m_pg;
-  OSDService* m_osds;
-  bool m_reserved_by_remote_primary{false};
-  const epoch_t m_reserved_at;
-
- public:
-  ReservedByRemotePrimary(const PgScrubber* scrubber,
-                         PG* pg,
-                         OSDService* osds,
-                         epoch_t epoch);
-  ~ReservedByRemotePrimary();
-  [[nodiscard]] bool is_reserved() const
-  {
-    return m_reserved_by_remote_primary;
-  }
-
-  epoch_t get_reservation_epoch() const { return m_reserved_at; }
-
-  std::ostream& gen_prefix(std::ostream& out) const;
-};
-
  /**
   * Once all replicas' scrub maps are received, we go on to compare the maps.
   * That is - unless we we have not yet completed building our own scrub map.
@@ -585,6 +558,10 @@ class PgScrubber : public ScrubPgIF,
    /// Clears `m_queued_or_active` and restarts snaptrimming
    void clear_queued_or_active() final;
  
+  void dec_scrubs_remote() final;
+
+  void advance_token() final;
+
    void mark_local_map_ready() final;
  
    [[nodiscard]] bool are_all_maps_available() const final;
@@ -665,12 +642,6 @@ class PgScrubber : public ScrubPgIF,
   private:
    void reset_internal_state();
  
-  /**
-   *  the current scrubbing operation is done. We should mark that fact, so that
-   *  all events related to the previous operation can be discarded.
-   */
-  void reset_replica_state();
-
    bool is_token_current(Scrub::act_token_t received_token);
  
    void requeue_waiting() const { m_pg->requeue_ops(m_pg->waiting_for_scrub); }
@@ -745,10 +716,6 @@ class PgScrubber : public ScrubPgIF,
    std::optional<Scrub::ReplicaReservations> m_reservations;
    std::optional<Scrub::LocalReservation> m_local_osd_resource;
  
-  /// the 'remote' resource we, as a replica, grant our Primary when it is
-  /// scrubbing
-  std::optional<Scrub::ReservedByRemotePrimary> m_remote_osd_resource;
-
    void cleanup_on_finish();  // scrub_clear_state() as called for a Primary when
                              // Active->NotActive
  
diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc

index 51857d2f1c5533393fe5e25e6884641ec0e537fa..9acb9c215e8ab96ffcf31894543c013a841615af 100644 (file)
--- a/src/osd/scrubber/scrub_machine.cc
+++ b/src/osd/scrubber/scrub_machine.cc
@@ -606,6 +606,51 @@ ScrubMachine::~ScrubMachine() = default;
  
  // -------- for replicas -----------------------------------------------------
  
+// ----------------------- ReservedReplica --------------------------------
+
+ReservedReplica::ReservedReplica(my_context ctx)
+    : my_base(ctx)
+    , NamedSimply(context<ScrubMachine>().m_scrbr, "ReservedReplica")
+{
+  dout(10) << "-- state -->> ReservedReplica" << dendl;
+}
+
+ReservedReplica::~ReservedReplica()
+{
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  scrbr->dec_scrubs_remote();
+  scrbr->advance_token();
+}
+
+// ----------------------- ReplicaIdle --------------------------------
+
+ReplicaIdle::ReplicaIdle(my_context ctx)
+    : my_base(ctx)
+    , NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaIdle")
+{
+  dout(10) << "-- state -->> ReplicaIdle" << dendl;
+}
+
+ReplicaIdle::~ReplicaIdle()
+{
+}
+
+
+// ----------------------- ReplicaActiveOp --------------------------------
+
+ReplicaActiveOp::ReplicaActiveOp(my_context ctx)
+    : my_base(ctx)
+    , NamedSimply(context<ScrubMachine>().m_scrbr, "ReplicaActiveOp")
+{
+  dout(10) << "-- state -->> ReplicaActiveOp" << dendl;
+}
+
+ReplicaActiveOp::~ReplicaActiveOp()
+{
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  scrbr->replica_handling_done();
+}
+
  // ----------------------- ReplicaWaitUpdates --------------------------------
  
  ReplicaWaitUpdates::ReplicaWaitUpdates(my_context ctx)
@@ -635,15 +680,6 @@ sc::result ReplicaWaitUpdates::react(const ReplicaPushesUpd&)
    return discard_event();
  }
  
-/**
- * the event poster is handling the scrubber reset
- */
-sc::result ReplicaWaitUpdates::react(const FullReset&)
-{
-  dout(10) << "ReplicaWaitUpdates::react(const FullReset&)" << dendl;
-  return transit<NotActive>();
-}
-
  // ----------------------- ReplicaBuildingMap -----------------------------------
  
  ReplicaBuildingMap::ReplicaBuildingMap(my_context ctx)
@@ -668,25 +704,16 @@ sc::result ReplicaBuildingMap::react(const SchedReplica&)
  
      scrbr->send_preempted_replica();
      scrbr->replica_handling_done();
-    return transit<NotActive>();
+    return transit<ReplicaIdle>();
    }
  
    // start or check progress of build_replica_map_chunk()
    auto ret_init = scrbr->build_replica_map_chunk();
    if (ret_init != -EINPROGRESS) {
-    return transit<NotActive>();
+    return transit<ReplicaIdle>();
    }
  
    return discard_event();
  }
  
-/**
- * the event poster is handling the scrubber reset
- */
-sc::result ReplicaBuildingMap::react(const FullReset&)
-{
-  dout(10) << "ReplicaBuildingMap::react(const FullReset&)" << dendl;
-  return transit<NotActive>();
-}
-
  }  // namespace Scrub
diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h

index 45af1dc8d0f6722e9df1d7d927e3e6fa9447e4c5..7bea55c9235d5f27e141e6af303a8c8569878d1c 100644 (file)
--- a/src/osd/scrubber/scrub_machine.h
+++ b/src/osd/scrubber/scrub_machine.h
@@ -115,6 +115,9 @@ MEV(DigestUpdate)
  /// maps_compare_n_cleanup() transactions are done
  MEV(MapsCompared)
  
+/// event emitted when the replica grants a reservation to the primary
+MEV(ReplicaGrantReservation)
+
  /// initiating replica scrub
  MEV(StartReplica)
  
@@ -143,8 +146,7 @@ MEV(ScrubFinished)
  struct NotActive;          ///< the quiescent state. No active scrubbing.
  struct ReservingReplicas;   ///< securing scrub resources from replicas' OSDs
  struct ActiveScrubbing;            ///< the active state for a Primary. A sub-machine.
-struct ReplicaWaitUpdates;  ///< an active state for a replica. Waiting for all
-                           ///< active operations to finish.
+struct ReplicaIdle;         ///< Initial reserved replica state
  struct ReplicaBuildingMap;         ///< an active state for a replica.
  
  
@@ -310,8 +312,7 @@ struct NotActive : sc::state<NotActive, ScrubMachine>, NamedSimply {
      mpl::list<sc::custom_reaction<StartScrub>,
               // a scrubbing that was initiated at recovery completion:
               sc::custom_reaction<AfterRepairScrub>,
-             sc::transition<StartReplica, ReplicaWaitUpdates>,
-             sc::transition<StartReplicaNoWait, ReplicaBuildingMap>>;
+             sc::transition<ReplicaGrantReservation, ReplicaIdle>>;
    sc::result react(const StartScrub&);
    sc::result react(const AfterRepairScrub&);
  };
@@ -510,6 +511,49 @@ struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing>,
  
  // ----------------------------- the "replica active" states
  
+/**
+ * ReservedReplica
+ *
+ * Parent state for replica states,  Controls lifecycle for
+ * PgScrubber::m_reservations.
+ */
+struct ReservedReplica : sc::state<ReservedReplica, ScrubMachine, ReplicaIdle>,
+                        NamedSimply {
+  explicit ReservedReplica(my_context ctx);
+  ~ReservedReplica();
+
+  using reactions = mpl::list<sc::transition<FullReset, NotActive>>;
+};
+
+struct ReplicaWaitUpdates;
+
+/**
+ * ReplicaIdle
+ *
+ * Replica is waiting for a map request.
+ */
+struct ReplicaIdle : sc::state<ReplicaIdle, ReservedReplica>,
+                    NamedSimply {
+  explicit ReplicaIdle(my_context ctx);
+  ~ReplicaIdle();
+
+  using reactions = mpl::list<
+    sc::transition<StartReplica, ReplicaWaitUpdates>,
+    sc::transition<StartReplicaNoWait, ReplicaBuildingMap>>;
+};
+
+/**
+ * ReservedActiveOp
+ *
+ * Lifetime matches handling for a single map request op
+ */
+struct ReplicaActiveOp
+  : sc::state<ReplicaActiveOp, ReservedReplica, ReplicaWaitUpdates>,
+    NamedSimply {
+  explicit ReplicaActiveOp(my_context ctx);
+  ~ReplicaActiveOp();
+};
+
  /*
   * Waiting for 'active_pushes' to complete
   *
@@ -517,24 +561,21 @@ struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing>,
   * - the details of the Primary's request were internalized by PgScrubber;
   * - 'active' scrubbing is set
   */
-struct ReplicaWaitUpdates : sc::state<ReplicaWaitUpdates, ScrubMachine>,
+struct ReplicaWaitUpdates : sc::state<ReplicaWaitUpdates, ReservedReplica>,
                             NamedSimply {
    explicit ReplicaWaitUpdates(my_context ctx);
-  using reactions = mpl::list<sc::custom_reaction<ReplicaPushesUpd>,
-                             sc::custom_reaction<FullReset>>;
+  using reactions = mpl::list<sc::custom_reaction<ReplicaPushesUpd>>;
  
    sc::result react(const ReplicaPushesUpd&);
-  sc::result react(const FullReset&);
  };
  
  
-struct ReplicaBuildingMap : sc::state<ReplicaBuildingMap, ScrubMachine>, NamedSimply {
+struct ReplicaBuildingMap : sc::state<ReplicaBuildingMap, ReservedReplica>
+                         , NamedSimply {
    explicit ReplicaBuildingMap(my_context ctx);
-  using reactions = mpl::list<sc::custom_reaction<SchedReplica>,
-                             sc::custom_reaction<FullReset>>;
+  using reactions = mpl::list<sc::custom_reaction<SchedReplica>>;
  
    sc::result react(const SchedReplica&);
-  sc::result react(const FullReset&);
  };
  
  }  // namespace Scrub
diff --git a/src/osd/scrubber/scrub_machine_lstnr.h b/src/osd/scrubber/scrub_machine_lstnr.h

index 15e3c89c89f94468fb40e34b184eb2521f7bc3c5..8f4dd8201c105e7f352cda6062d974eb8ea96858 100644 (file)
--- a/src/osd/scrubber/scrub_machine_lstnr.h
+++ b/src/osd/scrubber/scrub_machine_lstnr.h
@@ -200,6 +200,12 @@ struct ScrubMachineListener {
    virtual void set_queued_or_active() = 0;
    virtual void clear_queued_or_active() = 0;
  
+  /// Release remote scrub reservation
+  virtual void dec_scrubs_remote() = 0;
+
+  /// Advance replica token
+  virtual void advance_token() = 0;
+
    /**
     * Our scrubbing is blocked, waiting for an excessive length of time for
     * our target chunk to be unlocked. We will set the corresponding flags,
author	Samuel Just <sjust@redhat.com>
	Thu, 23 Feb 2023 05:04:17 +0000 (21:04 -0800)
committer	Samuel Just <sjust@redhat.com>
	Wed, 12 Apr 2023 03:39:19 +0000 (20:39 -0700)
src/osd/scrubber/pg_scrubber.cc		patch \| blob \| history
src/osd/scrubber/pg_scrubber.h		patch \| blob \| history
src/osd/scrubber/scrub_machine.cc		patch \| blob \| history
src/osd/scrubber/scrub_machine.h		patch \| blob \| history
src/osd/scrubber/scrub_machine_lstnr.h		patch \| blob \| history