osd/scrub: add a "clean primary" base state

author Ronen Friedman <rfriedma@redhat.com>

Thu, 21 Dec 2023 17:29:09 +0000 (11:29 -0600)

committer Ronen Friedman <rfriedma@redhat.com>

Thu, 28 Dec 2023 20:58:05 +0000 (14:58 -0600)
author Ronen Friedman <rfriedma@redhat.com>
Thu, 21 Dec 2023 17:29:09 +0000 (11:29 -0600)
committer Ronen Friedman <rfriedma@redhat.com>
Thu, 28 Dec 2023 20:58:05 +0000 (14:58 -0600)
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index 8420fd432a3ff1b621715549828d416b188a02b7..3138b8c32f9cc77094d61a3ed52f962c45d5a49b 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -1861,6 +1861,16 @@ void PG::on_active_exit()
    agent_stop();
  }
  
+Context* PG::on_clean()
+{
+  if (is_active()) {
+    kick_snap_trim();
+  }
+  m_scrubber->on_primary_active_clean();
+  requeue_ops(waiting_for_clean_to_primary_repair);
+  return finish_recovery();
+}
+
  void PG::on_active_advmap(const OSDMapRef &osdmap)
  {
    const auto& new_removed_snaps = osdmap->get_new_removed_snaps();
diff --git a/src/osd/PG.h b/src/osd/PG.h

index 9272000eb34d825d0c216b1125d9ffd4d1b6a773..2a823f675969af02041c2e0c09f44a35f20b3cea 100644 (file)
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -608,13 +608,7 @@ public:
  
    void on_active_exit() override;
  
-  Context *on_clean() override {
-    if (is_active()) {
-      kick_snap_trim();
-    }
-    requeue_ops(waiting_for_clean_to_primary_repair);
-    return finish_recovery();
-  }
+  Context *on_clean() override;
  
    void on_activate(interval_set<snapid_t> snaps) override;
  
diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc

index 253962f0fd492b64debd679a923584861b26599d..dd279f720add56423de1f80e3209dbe0d20c8b21 100644 (file)
--- a/src/osd/scrubber/pg_scrubber.cc
+++ b/src/osd/scrubber/pg_scrubber.cc
@@ -152,7 +152,7 @@ bool PgScrubber::verify_against_abort(epoch_t epoch_to_verify)
  
    // if we were not aware of the abort before - kill the scrub.
    if (epoch_to_verify >= m_last_aborted) {
-    scrub_clear_state();
+    m_fsm->process_event(FullReset{});
      m_last_aborted = std::max(epoch_to_verify, m_epoch_start);
    }
    return false;
@@ -407,7 +407,7 @@ void PgScrubber::reset_epoch(epoch_t epoch_queued)
  {
    dout(10) << __func__ << " state deep? " << state_test(PG_STATE_DEEP_SCRUB)
            << dendl;
-  m_fsm->assert_not_active();
+  m_fsm->assert_not_in_session();
  
    m_epoch_start = epoch_queued;
    ceph_assert(m_is_deep == state_test(PG_STATE_DEEP_SCRUB));
@@ -462,6 +462,11 @@ void PgScrubber::on_new_interval()
            << dendl;
  
    m_fsm->process_event(IntervalChanged{});
+  // the following asserts were added due to a past bug, where PG flags were
+  // left set in some scenarios.
+  ceph_assert(!is_queued_or_active());
+  ceph_assert(!state_test(PG_STATE_SCRUBBING));
+  ceph_assert(!state_test(PG_STATE_DEEP_SCRUB));
    rm_from_osd_scrubbing();
  }
  
@@ -517,6 +522,14 @@ void PgScrubber::on_pg_activate(const requested_scrub_t& request_flags)
            << dendl;
  }
  
+void PgScrubber::on_primary_active_clean()
+{
+  dout(10) << fmt::format(
+                 "{}: reg state: {}", __func__, m_scrub_job->state_desc())
+          << dendl;
+  m_fsm->process_event(PrimaryActivate{});
+}
+
  /*
   * A note re the call to publish_stats_to_osd() below:
   * - we are called from either request_rescrubbing() or scrub_requested().
diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h

index c6be51b2cae63bf6609c10dc22a232a06a7bc5d2..af667f732143c82d3968827886cb58cbe7fdd34a 100644 (file)
--- a/src/osd/scrubber/pg_scrubber.h
+++ b/src/osd/scrubber/pg_scrubber.h
@@ -314,6 +314,8 @@ class PgScrubber : public ScrubPgIF,
  
    void on_new_interval() final;
  
+  void on_primary_active_clean() final;
+
    void on_replica_activate() final;
  
    void scrub_clear_state() final;
diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc

index e0da61dde0f85dd723e6bd97de863c052180fec4..1928eed7d0cd4ea08a61129e25c5cc2505ff70be 100644 (file)
--- a/src/osd/scrubber/scrub_machine.cc
+++ b/src/osd/scrubber/scrub_machine.cc
@@ -47,9 +47,9 @@ void on_event_discard(std::string_view nm)
    dout(20) << " event: --^^^^---- " << nm << dendl;
  }
  
-void ScrubMachine::assert_not_active() const
+void ScrubMachine::assert_not_in_session() const
  {
-  ceph_assert(state_cast<const NotActive*>());
+  ceph_assert(!state_cast<const Session*>());
  }
  
  bool ScrubMachine::is_reserving() const
@@ -114,27 +114,64 @@ NotActive::NotActive(my_context ctx)
    scrbr->clear_queued_or_active();
  }
  
-sc::result NotActive::react(const StartScrub&)
+
+// ----------------------- PrimaryActive --------------------------------
+
+PrimaryActive::PrimaryActive(my_context ctx)
+    : my_base(ctx)
+    , NamedSimply(context<ScrubMachine>().m_scrbr, "PrimaryActive")
+{
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  dout(10) << "-- state -->> PrimaryActive" << dendl;
+}
+
+PrimaryActive::~PrimaryActive()
+{
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  // we may have set some PG state flags without reaching Session.
+  // And we may be holding a 'local resource'.
+  scrbr->clear_pgscrub_state();
+  scrbr->rm_from_osd_scrubbing();
+}
+
+
+// ---------------- PrimaryActive/PrimaryIdle ---------------------------
+
+PrimaryIdle::PrimaryIdle(my_context ctx)
+    : my_base(ctx)
+    , NamedSimply(context<ScrubMachine>().m_scrbr, "PrimaryActive/PrimaryIdle")
+{
+  dout(10) << "-- state -->> PrimaryActive/PrimaryIdle" << dendl;
+}
+
+sc::result PrimaryIdle::react(const StartScrub&)
  {
-  dout(10) << "NotActive::react(const StartScrub&)" << dendl;
+  dout(10) << "PrimaryIdle::react(const StartScrub&)" << dendl;
    DECLARE_LOCALS;
    return transit<ReservingReplicas>();
  }
  
-sc::result NotActive::react(const AfterRepairScrub&)
+sc::result PrimaryIdle::react(const AfterRepairScrub&)
  {
-  dout(10) << "NotActive::react(const AfterRepairScrub&)" << dendl;
+  dout(10) << "PrimaryIdle::react(const AfterRepairScrub&)" << dendl;
    DECLARE_LOCALS;
    return transit<ReservingReplicas>();
  }
  
+void PrimaryIdle::clear_state(const FullReset&) {
+  dout(10) << "PrimaryIdle::react(const FullReset&): clearing state flags"
+           << dendl;
+  DECLARE_LOCALS;
+  scrbr->clear_pgscrub_state();
+}
+
  // ----------------------- Session -----------------------------------------
  
  Session::Session(my_context ctx)
      : my_base(ctx)
-    , NamedSimply(context<ScrubMachine>().m_scrbr, "Session")
+    , NamedSimply(context<ScrubMachine>().m_scrbr, "PrimaryActive/Session")
  {
-  dout(10) << "-- state -->> Session" << dendl;
+  dout(10) << "-- state -->> PrimaryActive/Session" << dendl;
    DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
  
    // while we've checked the 'someone is reserving' flag before queueing
@@ -242,7 +279,7 @@ sc::result ReservingReplicas::react(const ReplicaReject& ev)
    scrbr->flag_reservations_failure();
  
    // 'Session' state dtor stops the scrubber
-  return transit<NotActive>();
+  return transit<PrimaryIdle>();
  }
  
  sc::result ReservingReplicas::react(const ReservationTimeout&)
@@ -261,7 +298,7 @@ sc::result ReservingReplicas::react(const ReservationTimeout&)
    // cause the scrubber to stop the scrub session, marking 'reservation
    // failure' as the cause (affecting future scheduling)
    scrbr->flag_reservations_failure();
-  return transit<NotActive>();
+  return transit<PrimaryIdle>();
  }
  
  // ----------------------- ActiveScrubbing -----------------------------------
@@ -678,7 +715,7 @@ sc::result WaitDigestUpdate::react(const ScrubFinished&)
    session.m_session_started_at = ScrubTimePoint{};
  
    scrbr->scrub_finish();
-  return transit<NotActive>();
+  return transit<PrimaryIdle>();
  }
  
  ScrubMachine::ScrubMachine(PG* pg, ScrubMachineListener* pg_scrub)
@@ -813,6 +850,11 @@ ReplicaIdle::ReplicaIdle(my_context ctx)
    dout(10) << "-- state -->> ReplicaActive/ReplicaIdle" << dendl;
  }
  
+void ReplicaIdle::reset_ignored(const FullReset&)
+{
+  dout(10) << "ReplicaIdle::react(const FullReset&): FullReset ignored"
+          << dendl;
+}
  
  // ------------- ReplicaActive/ReplicaActiveOp --------------------------
  
diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h

index df9a4c45e95f8fea8f12036321c8410e06fb28f6..fbd8f8b3a35960abd0abb7ecd2d11d08e37d3739 100644 (file)
--- a/src/osd/scrubber/scrub_machine.h
+++ b/src/osd/scrubber/scrub_machine.h
@@ -162,6 +162,9 @@ MEV(IntLocalMapDone)
  /// scrub_snapshot_metadata()
  MEV(DigestUpdate)
  
+/// peered as Primary - and clean
+MEV(PrimaryActivate)
+
  /// we are a replica for this PG
  MEV(ReplicaActivate)
  
@@ -176,7 +179,9 @@ MEV(ReplicaPushesUpd)
  
  /**
   * IntervalChanged
+ * The only path from PrimaryActive or ReplicaActive down to NotActive.
   *
+ * Note re reserved replicas:
   * This event notifies the ScrubMachine that it is no longer responsible for
   * releasing replica state.  It will generally be submitted upon a PG interval
   * change.
@@ -189,7 +194,12 @@ MEV(ReplicaPushesUpd)
   */
  MEV(IntervalChanged)
  
-/// guarantee that the FSM is in the quiescent state (i.e. NotActive)
+/**
+ * stops the scrubbing session, and resets the scrubber.
+ * For a replica - aborts the handling of the current request.
+ * In both cases - a transition to the peering mode quiescent state (i.e.
+ * PrimaryIdle or ReplicaIdle).
+ */
  MEV(FullReset)
  
  /// finished handling this chunk. Go get the next one
@@ -203,9 +213,15 @@ MEV(ScrubFinished)
  //
  
  struct NotActive;          ///< the quiescent state. No active scrubbing.
-struct Session;            ///< either reserving or actively scrubbing
  struct ReservingReplicas;   ///< securing scrub resources from replicas' OSDs
  struct ActiveScrubbing;            ///< the active state for a Primary. A sub-machine.
+
+// the states for a Primary:
+// note: PrimaryActive <==> in the OSD scrub queue
+struct PrimaryActive;     ///< base state for a Primary
+struct PrimaryIdle;       ///< ready for a new scrub request
+struct Session;            ///< either reserving or actively scrubbing
+
  // the active states for a replica:
  struct ReplicaActive;    ///< the quiescent state for a replica
  struct ReplicaActiveOp;
@@ -225,7 +241,7 @@ class ScrubMachine : public sc::state_machine<ScrubMachine, NotActive> {
    ScrubMachineListener* m_scrbr;
    std::ostream& gen_prefix(std::ostream& out) const;
  
-  void assert_not_active() const;
+  void assert_not_in_session() const;
    [[nodiscard]] bool is_reserving() const;
    [[nodiscard]] bool is_accepting_updates() const;
  
@@ -359,40 +375,94 @@ public:
  
  // ///////////////// the states //////////////////////// //
  
+/*
+ * When not scrubbing, the FSM is in one of three states:
+ *
+ * <> PrimaryActive - we are a Primary and active. The PG
+ * is queued for some future scrubs in the OSD's scrub queue.
+ *
+ * <> ReplicaActive - we are a replica. In this state, we are
+ * expecting either a replica reservation request from the Primary, or a
+ * scrubbing request for a specific chunk.
+ *
+ * <> NotActive - the quiescent state. No active scrubbing.
+ * We are neither an active Primary nor a replica.
+ */
+struct NotActive : sc::state<NotActive, ScrubMachine>, NamedSimply {
+  explicit NotActive(my_context ctx);
+
+  using reactions = mpl::list<
+      // peering done, and we are a replica
+      sc::transition<ReplicaActivate, ReplicaActive>,
+      // peering done, and we are a Primary
+      sc::transition<PrimaryActivate, PrimaryActive>>;
+};
+
+// ----------------------- when Primary --------------------------------------
+// ---------------------------------------------------------------------------
+
+
+/*
+ *  The primary states:
+ *
+ *  PrimaryActive - starts when peering ends with us as a primary,
+ *     and we are active and clean.
+ *   - when in this state - we (our scrub targets) are queued in the
+ *     OSD's scrub queue.
+ *
+ *  Sub-states:
+ *     - PrimaryIdle - ready for a new scrub request
+ *          * initial state of PrimaryActive
+ *
+ *     - Session - handling a single scrub session
+ */
+
+struct PrimaryIdle;
  
  /**
- *  The Scrubber's base (quiescent) state.
- *  Scrubbing is triggered by one of the following events:
+ *  PrimaryActive
+ *
+ *  The basic state for an active Primary. Ready to accept a new scrub request.
+ *  State managed here: being in the OSD's scrub queue (unless when scrubbing).
   *
+ *  Scrubbing is triggered by one of the following events:
   *  - (standard scenario for a Primary): 'StartScrub'. Initiates the OSDs
   *    resources reservation process. Will be issued by PG::scrub(), following a
   *    queued "PGScrub" op.
- *
   *  - a special end-of-recovery Primary scrub event ('AfterRepairScrub').
- *
- *  - (if already in ReplicaActive): an incoming MOSDRepScrub triggers
- *    'StartReplica'.
- *
- *  note (20.8.21): originally, AfterRepairScrub was triggering a scrub without
- *  waiting for replica resources to be acquired. But once replicas started
- *  using the resource-request to identify and tag the scrub session, this
- *  bypass cannot be supported anymore.
   */
-struct NotActive : sc::state<NotActive, ScrubMachine>, NamedSimply {
-  explicit NotActive(my_context ctx);
+struct PrimaryActive : sc::state<PrimaryActive, ScrubMachine, PrimaryIdle>,
+                        NamedSimply {
+  explicit PrimaryActive(my_context ctx);
+  ~PrimaryActive();
+
+  using reactions = mpl::list<
+      // when the interval ends - we may not be a primary anymore
+      sc::transition<IntervalChanged, NotActive>>;
+};
+
+/**
+ * \ATTN: set_op_parameters() is called while we are still in this state (waiting
+ * for a queued OSD message to trigger the transition into Session). Thus,
+ * even in this 'idle' state - there is some state we must take care to reset.
+ * Specifically - the PG state flags we were playing with in set_op_parameters().
+ */
+struct PrimaryIdle : sc::state<PrimaryIdle, PrimaryActive>, NamedSimply {
+  explicit PrimaryIdle(my_context ctx);
+  ~PrimaryIdle() = default;
+  void clear_state(const FullReset&);
  
    using reactions = mpl::list<
        sc::custom_reaction<StartScrub>,
        // a scrubbing that was initiated at recovery completion:
        sc::custom_reaction<AfterRepairScrub>,
-      // peering done, and we are a replica
-      sc::transition<ReplicaActivate, ReplicaActive>>;
+      // undoing set_op_params(), if aborted before starting the scrub:
+      sc::in_state_reaction<FullReset, PrimaryIdle, &PrimaryIdle::clear_state>>;
  
    sc::result react(const StartScrub&);
    sc::result react(const AfterRepairScrub&);
  };
  
-
  /**
   *  Session
   *
@@ -407,12 +477,12 @@ struct NotActive : sc::state<NotActive, ScrubMachine>, NamedSimply {
   *  reservations are released. This is because we know that the replicas are
   *  also resetting their reservations.
   */
-struct Session : sc::state<Session, ScrubMachine, ReservingReplicas>,
+struct Session : sc::state<Session, PrimaryActive, ReservingReplicas>,
                   NamedSimply {
    explicit Session(my_context ctx);
    ~Session();
  
-  using reactions = mpl::list<sc::transition<FullReset, NotActive>,
+  using reactions = mpl::list<sc::transition<FullReset, PrimaryIdle>,
                                sc::custom_reaction<IntervalChanged>>;
  
    sc::result react(const IntervalChanged&);
@@ -612,7 +682,9 @@ struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing>,
    sc::result react(const ScrubFinished&);
  };
  
-// ----------------------------- the "replica active" states
+
+// ---------------------------------------------------------------------------
+// ----------------------------- the "replica active" states -----------------
  
  /*
   *  The replica states:
@@ -683,15 +755,21 @@ struct ReplicaActive : sc::state<ReplicaActive, ScrubMachine, ReplicaIdle>,
  struct ReplicaIdle : sc::state<ReplicaIdle, ReplicaActive>, NamedSimply {
    explicit ReplicaIdle(my_context ctx);
    ~ReplicaIdle() = default;
+  void reset_ignored(const FullReset&);
  
    // note the execution of check_for_updates() when transitioning to
    // ReplicaActiveOp/ReplicaWaitUpdates. That would trigger a ReplicaPushesUpd
    // event, which will be handled by ReplicaWaitUpdates.
-  using reactions = mpl::list<sc::transition<
-      StartReplica,
-      ReplicaWaitUpdates,
-      ReplicaActive,
-      &ReplicaActive::check_for_updates>>;
+  using reactions = mpl::list<
+      sc::transition<
+         StartReplica,
+         ReplicaWaitUpdates,
+         ReplicaActive,
+         &ReplicaActive::check_for_updates>,
+      sc::in_state_reaction<
+         FullReset,
+         ReplicaIdle,
+         &ReplicaIdle::reset_ignored>>;
  };
  
  
@@ -706,7 +784,9 @@ struct ReplicaActiveOp
    explicit ReplicaActiveOp(my_context ctx);
    ~ReplicaActiveOp();
  
-  using reactions = mpl::list<sc::custom_reaction<StartReplica>>;
+  using reactions = mpl::list<
+      sc::custom_reaction<StartReplica>,
+      sc::transition<FullReset, ReplicaIdle>>;
  
    /**
     * Handling the unexpected (read - caused by a bug) case of receiving a
diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h

index 30d305036f444d17376608da5032d856480e0e4a..fbbef578ae6930bb907490973a056b02595a6164 100644 (file)
--- a/src/osd/scrubber_common.h
+++ b/src/osd/scrubber_common.h
@@ -319,6 +319,10 @@ struct ScrubPgIF {
    /// the OSD scrub queue
    virtual void on_new_interval() = 0;
  
+  /// we are peered as primary, and the PG is active and clean
+  /// Scrubber's internal FSM should be ActivePrimary
+  virtual void on_primary_active_clean() = 0;
+
    /// we are peered as a replica
    virtual void on_replica_activate() = 0;
author	Ronen Friedman <rfriedma@redhat.com>
	Thu, 21 Dec 2023 17:29:09 +0000 (11:29 -0600)
committer	Ronen Friedman <rfriedma@redhat.com>
	Thu, 28 Dec 2023 20:58:05 +0000 (14:58 -0600)
src/osd/PG.cc		patch \| blob \| history
src/osd/PG.h		patch \| blob \| history
src/osd/scrubber/pg_scrubber.cc		patch \| blob \| history
src/osd/scrubber/pg_scrubber.h		patch \| blob \| history
src/osd/scrubber/scrub_machine.cc		patch \| blob \| history
src/osd/scrubber/scrub_machine.h		patch \| blob \| history
src/osd/scrubber_common.h		patch \| blob \| history