return transit<ReservingReplicas>();
}
+// ----------------------- Session -----------------------------------------
+
+Session::Session(my_context ctx)
+ : my_base(ctx)
+ , NamedSimply(context<ScrubMachine>().m_scrbr, "Session")
+{
+ dout(10) << "-- state -->> Session" << dendl;
+ DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
+
+ // while we've checked the 'someone is reserving' flag before queueing
+ // the start-scrub event, it's possible that the flag was set in the meantime.
+ // Handling this case here requires adding a new sub-state, and the
+ // complication of reporting a failure to the caller in a new failure
+ // path. On the other hand - ignoring an ongoing reservation on rare
+ // occasions will cause no harm.
+ // We choose ignorance.
+ std::ignore = scrbr->set_reserving_now();
+}
+
+Session::~Session()
+{
+ DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
+
+ // note the interaction between clearing the 'queued' flag and two
+ // other states: the snap-mapper and the scrubber internal state.
+ // All of these must be cleared in the correct order, and the snap mapper
+ // (re-triggered by resetting the 'queued' flag) must not resume before
+ // the scrubber is reset.
+ scrbr->clear_pgscrub_state();
+}
+
+
// ----------------------- ReservingReplicas ---------------------------------
ReservingReplicas::ReservingReplicas(my_context ctx)
: my_base(ctx)
- , NamedSimply(context<ScrubMachine>().m_scrbr, "ReservingReplicas")
+ , NamedSimply(context<ScrubMachine>().m_scrbr, "Session/ReservingReplicas")
{
dout(10) << "-- state -->> ReservingReplicas" << dendl;
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
- // prevent the OSD from starting another scrub while we are trying to secure
- // replicas resources
- if (!scrbr->set_reserving_now()) {
- dout(1) << "ReservingReplicas::ReservingReplicas() some other PG is "
- "already reserving replicas resources"
- << dendl;
- post_event(ReservationFailure{});
- return;
- }
- m_holding_isreserving_flag = true;
scrbr->reserve_replicas();
auto timeout = scrbr->get_cct()->_conf.get_val<
ReservingReplicas::~ReservingReplicas()
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
- if (m_holding_isreserving_flag) {
- scrbr->clear_reserving_now();
- }
+ // it's OK to try and clear the flag even if we don't hold it
+ // (the flag remembers the actual holder)
+ scrbr->clear_reserving_now();
}
sc::result ReservingReplicas::react(const ReservationTimeout&)
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
dout(10) << "ReservingReplicas::react(const ReservationFailure&)" << dendl;
-
- // the Scrubber must release all resources and abort the scrubbing
- scrbr->clear_pgscrub_state();
- return transit<NotActive>();
-}
-
-/**
- * note: the event poster is handling the scrubber reset
- */
-sc::result ReservingReplicas::react(const FullReset&)
-{
- dout(10) << "ReservingReplicas::react(const FullReset&)" << dendl;
return transit<NotActive>();
}
ActiveScrubbing::ActiveScrubbing(my_context ctx)
: my_base(ctx)
- , NamedSimply(context<ScrubMachine>().m_scrbr, "ActiveScrubbing")
+ , NamedSimply(context<ScrubMachine>().m_scrbr, "Session/ActiveScrubbing")
{
dout(10) << "-- state -->> ActiveScrubbing" << dendl;
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
dout(15) << __func__ << dendl;
- scrbr->unreserve_replicas();
- scrbr->clear_queued_or_active();
}
/*
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
dout(10) << __func__ << dendl;
- scrbr->clear_pgscrub_state();
- return transit<NotActive>();
-}
-
-sc::result ActiveScrubbing::react(const FullReset&)
-{
- dout(10) << "ActiveScrubbing::react(const FullReset&)" << dendl;
- // caller takes care of clearing the scrubber & FSM states
return transit<NotActive>();
}
*/
RangeBlocked::RangeBlocked(my_context ctx)
: my_base(ctx)
- , NamedSimply(context<ScrubMachine>().m_scrbr, "Act/RangeBlocked")
+ , NamedSimply(context<ScrubMachine>().m_scrbr, "Session/Act/RangeBlocked")
{
- dout(10) << "-- state -->> Act/RangeBlocked" << dendl;
+ dout(10) << "-- state -->> Session/Act/RangeBlocked" << dendl;
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
auto grace = scrbr->get_range_blocked_grace();
*/
PendingTimer::PendingTimer(my_context ctx)
: my_base(ctx)
- , NamedSimply(context<ScrubMachine>().m_scrbr, "Act/PendingTimer")
+ , NamedSimply(context<ScrubMachine>().m_scrbr, "Session/Act/PendingTimer")
{
- dout(10) << "-- state -->> Act/PendingTimer" << dendl;
+ dout(10) << "-- state -->> Session/Act/PendingTimer" << dendl;
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
auto sleep_time = scrbr->get_scrub_sleep_time();
*/
NewChunk::NewChunk(my_context ctx)
: my_base(ctx)
- , NamedSimply(context<ScrubMachine>().m_scrbr, "Act/NewChunk")
+ , NamedSimply(context<ScrubMachine>().m_scrbr, "Session/Act/NewChunk")
{
- dout(10) << "-- state -->> Act/NewChunk" << dendl;
+ dout(10) << "-- state -->> Session/Act/NewChunk" << dendl;
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
scrbr->get_preemptor().adjust_parameters();
WaitPushes::WaitPushes(my_context ctx)
: my_base(ctx)
- , NamedSimply(context<ScrubMachine>().m_scrbr, "Act/WaitPushes")
+ , NamedSimply(context<ScrubMachine>().m_scrbr, "Session/Act/WaitPushes")
{
- dout(10) << " -- state -->> Act/WaitPushes" << dendl;
+ dout(10) << " -- state -->> Session/Act/WaitPushes" << dendl;
post_event(ActivePushesUpd{});
}
WaitLastUpdate::WaitLastUpdate(my_context ctx)
: my_base(ctx)
- , NamedSimply(context<ScrubMachine>().m_scrbr, "Act/WaitLastUpdate")
+ , NamedSimply(context<ScrubMachine>().m_scrbr, "Session/Act/WaitLastUpdate")
{
- dout(10) << " -- state -->> Act/WaitLastUpdate" << dendl;
+ dout(10) << " -- state -->> Session/Act/WaitLastUpdate" << dendl;
post_event(UpdatesApplied{});
}
BuildMap::BuildMap(my_context ctx)
: my_base(ctx)
- , NamedSimply(context<ScrubMachine>().m_scrbr, "Act/BuildMap")
+ , NamedSimply(context<ScrubMachine>().m_scrbr, "Session/Act/BuildMap")
{
- dout(10) << " -- state -->> Act/BuildMap" << dendl;
+ dout(10) << " -- state -->> Session/Act/BuildMap" << dendl;
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
// no need to check for an epoch change, as all possible flows that brought
DrainReplMaps::DrainReplMaps(my_context ctx)
: my_base(ctx)
- , NamedSimply(context<ScrubMachine>().m_scrbr, "Act/DrainReplMaps")
+ , NamedSimply(context<ScrubMachine>().m_scrbr, "Session/Act/DrainReplMaps")
{
- dout(10) << "-- state -->> Act/DrainReplMaps" << dendl;
+ dout(10) << "-- state -->> Session/Act/DrainReplMaps" << dendl;
// we may have got all maps already. Send the event that will make us check.
post_event(GotReplicas{});
}
WaitReplicas::WaitReplicas(my_context ctx)
: my_base(ctx)
- , NamedSimply(context<ScrubMachine>().m_scrbr, "Act/WaitReplicas")
+ , NamedSimply(context<ScrubMachine>().m_scrbr, "Session/Act/WaitReplicas")
{
- dout(10) << "-- state -->> Act/WaitReplicas" << dendl;
+ dout(10) << "-- state -->> Session/Act/WaitReplicas" << dendl;
post_event(GotReplicas{});
}
WaitDigestUpdate::WaitDigestUpdate(my_context ctx)
: my_base(ctx)
- , NamedSimply(context<ScrubMachine>().m_scrbr, "Act/WaitDigestUpdate")
+ , NamedSimply(context<ScrubMachine>().m_scrbr, "Session/Act/WaitDigestUpdate")
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
- dout(10) << "-- state -->> Act/WaitDigestUpdate" << dendl;
+ dout(10) << "-- state -->> Session/Act/WaitDigestUpdate" << dendl;
// perform an initial check: maybe we already
// have all the updates we need:
//
struct NotActive; ///< the quiescent state. No active scrubbing.
+struct Session; ///< either reserving or actively scrubbing
struct ReservingReplicas; ///< securing scrub resources from replicas' OSDs
struct ActiveScrubbing; ///< the active state for a Primary. A sub-machine.
struct ReplicaIdle; ///< Initial reserved replica state
[[nodiscard]] bool is_reserving() const;
[[nodiscard]] bool is_accepting_updates() const;
+
+// ///////////////// aux declarations & functions //////////////////////// //
+
+
private:
/**
* scheduled_event_state_t
*
* Heap allocated, ref-counted state shared between scheduled event callback
* and timer_event_token_t. Ensures that callback and timer_event_token_t
- * can be safetly destroyed in either order while still allowing for
+ * can be safely destroyed in either order while still allowing for
* cancellation.
*/
struct scheduled_event_state_t {
~scheduled_event_state_t() {
/* For the moment, this assert encodes an assumption that we always
* retain the token until the event either fires or is canceled.
- * If a user needs/wants to relaxt that requirement, this assert can
+ * If a user needs/wants to relax that requirement, this assert can
* be removed */
assert(!cb_token);
}
* schedule_timer_event_after
*
* Schedules event EventT{Args...} to be delivered duration in the future.
- * The implementation implicitely drops the event on interval change. The
+ * The implementation implicitly drops the event on interval change. The
* returned timer_event_token_t can be used to cancel the event prior to
* its delivery -- it should generally be embedded as a member in the state
* intended to handle the event. See the comment on timer_event_token_t
}
};
+
+// ///////////////// the states //////////////////////// //
+
+
/**
* The Scrubber's base (quiescent) state.
* Scrubbing is triggered by one of the following events:
sc::result react(const AfterRepairScrub&);
};
-struct ReservingReplicas : sc::state<ReservingReplicas, ScrubMachine>,
+
+/**
+ * Session
+ *
+ * This state encompasses the two main "active" states: ReservingReplicas and
+ * ActiveScrubbing.
+ * 'Session' is the owner of all the resources that are allocated for a
+ * scrub session performed as a Primary.
+ *
+ * Exit from this state is either following an interval change, or with
+ * 'FullReset' (that would cover all other completion/termination paths).
+ * Note that if terminating the session following an interval change - no
+ * reservations are released. This is because we know that the replicas are
+ * also resetting their reservations.
+ */
+struct Session : sc::state<Session, ScrubMachine, ReservingReplicas>, NamedSimply {
+ explicit Session(my_context ctx);
+ ~Session();
+
+ using reactions = mpl::list<sc::transition<FullReset, NotActive>>;
+ /// \todo handle interval change
+};
+
+struct ReservingReplicas : sc::state<ReservingReplicas, Session>,
NamedSimply {
explicit ReservingReplicas(my_context ctx);
~ReservingReplicas();
- using reactions = mpl::list<sc::custom_reaction<FullReset>,
+ using reactions = mpl::list<
// all replicas granted our resources request
sc::transition<RemotesReserved, ActiveScrubbing>,
sc::custom_reaction<ReservationTimeout>,
ceph::coarse_real_clock::now();
ScrubMachine::timer_event_token_t m_timeout_token;
- /// if true - we must 'clear_reserving_now()' upon exit
- bool m_holding_isreserving_flag{false};
-
- sc::result react(const FullReset&);
-
sc::result react(const ReservationTimeout&);
/// at least one replica denied us the scrub resources we've requested
struct WaitDigestUpdate;
struct ActiveScrubbing
- : sc::state<ActiveScrubbing, ScrubMachine, PendingTimer>, NamedSimply {
+ : sc::state<ActiveScrubbing, Session, PendingTimer>, NamedSimply {
explicit ActiveScrubbing(my_context ctx);
~ActiveScrubbing();
- using reactions = mpl::list<sc::custom_reaction<InternalError>,
- sc::custom_reaction<FullReset>>;
+ using reactions = mpl::list<sc::custom_reaction<InternalError>>;
- sc::result react(const FullReset&);
sc::result react(const InternalError&);
};