/*
* Note: we are expected to be in the initial internal state (Idle) when
- * receiving any registration request. Our other internal states, the
- * active ones, have their own handler for this event, and will treat it
+ * receiving any registration request. ReplicaActiveOp, our other internal
+ * state, has its own handler for this event, and will treat it
* as an abort request.
*
* Process:
* implementation note: sc::result objects cannot be copied or moved. Thus,
* we've resorted to returning a code indicating the next action.
*/
-ReplicaReactCode ReplicaActive::on_reserve_request(
- const ReplicaReserveReq& ev,
- bool async_request)
+sc::result ReplicaActive::react(const ReplicaReserveReq& ev)
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
- const auto m = ev.m_op->get_req<MOSDScrubReserve>();
+ const auto& m = *(ev.m_op->get_req<MOSDScrubReserve>());
+
+ // should we handle the request asynchronously, using the reserver?
+ const auto async_disabled = scrbr->get_pg_cct()->_conf.get_val<bool>(
+ "osd_scrub_disable_reservation_queuing");
+ const bool async_request = !async_disabled && m.wait_for_resources;
dout(10) << fmt::format(
- "ReplicaActive::on_reserve_req() request:{} async_request:{} "
- "reservation_nonce:{}",
- ev, async_request, m->reservation_nonce)
+ "ReplicaActive::react(const ReplicaReserveReq&): async "
+ "request?:{} disabled?:{} -> async? {}",
+ m.wait_for_resources, async_disabled, async_request)
<< dendl;
- ceph_assert(!reservation_granted);
- ceph_assert(!pending_reservation_nonce);
- ReplicaReactCode next_action{ReplicaReactCode::discard};
- AsyncScrubResData request_details{
- pg_id, ev.m_from, ev.m_op->sent_epoch, m->reservation_nonce};
+ if (m_reservation_status != reservation_status_t::unreserved) {
+ // we are not expected to be in this state when a new request arrives.
+ // Exit-then-reenter ReplicaActive, causing the existing reservation - be
+ // it granted or pending - to be cleared. This maneuver also aborts
+ // any on-going chunk request.
+ dout(1) << fmt::format(
+ "{}: unexpected request (granted:{} request:{})", __func__,
+ reservation_granted, m)
+ << dendl;
+ post_event(ev);
+ return transit<ReplicaActive>();
+ }
+
auto& reserver = m_osds->get_scrub_reserver();
if (async_request) {
// the request is to be handled asynchronously
- dout(20) << fmt::format(
- "{}: async request: {} details:{}", __func__, ev,
- request_details)
+ AsyncScrubResData request_details{
+ pg_id, ev.m_from, ev.m_op->sent_epoch, m.reservation_nonce};
+ dout(15) << fmt::format(
+ "ReplicaActive::react(const ReplicaReserveReq& ev): async "
+ "request: {} details:{}",
+ ev, request_details)
<< dendl;
- pending_reservation_nonce = m->reservation_nonce;
+
+ pending_reservation_nonce = m.reservation_nonce;
const auto reservation_cb = new RtReservationCB(m_pg, request_details);
- reserver.request_reservation(pg_id, reservation_cb, 0, nullptr);
- next_action = ReplicaReactCode::goto_waiting_reservation;
+ reserver.request_reservation(pg_id, reservation_cb, /*prio=*/0, nullptr);
+ m_reservation_status = reservation_status_t::requested_or_granted;
} else {
// an immediate yes/no is required
reservation_granted = reserver.request_reservation_or_fail(pg_id);
if (reservation_granted) {
dout(10) << fmt::format("{}: reserved? yes", __func__) << dendl;
+ m_reservation_status = reservation_status_t::requested_or_granted;
reply = new MOSDScrubReserve(
spg_t(pg_id.pgid, m_pg->get_primary().shard), ev.m_op->sent_epoch,
- MOSDScrubReserve::GRANT, m_pg->pg_whoami, m->reservation_nonce);
- next_action = ReplicaReactCode::goto_replica_reserved;
+ MOSDScrubReserve::GRANT, m_pg->pg_whoami, m.reservation_nonce);
} else {
dout(10) << fmt::format("{}: reserved? no", __func__) << dendl;
reply = new MOSDScrubReserve(
spg_t(pg_id.pgid, m_pg->get_primary().shard), ev.m_op->sent_epoch,
- MOSDScrubReserve::REJECT, m_pg->pg_whoami, m->reservation_nonce);
- // the event is discarded
- next_action = ReplicaReactCode::discard;
+ MOSDScrubReserve::REJECT, m_pg->pg_whoami, m.reservation_nonce);
}
m_osds->send_message_osd_cluster(
reply, ev.m_op->get_req()->get_connection());
}
-
- return next_action;
+ return discard_event();
}
-bool ReplicaActive::granted_by_reserver(const AsyncScrubResData& reservation)
+
+sc::result ReplicaActive::react(const ReserverGranted& ev)
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
- dout(10) << fmt::format("{}: reservation granted: {}", __func__, reservation)
- << dendl;
+ const AsyncScrubResData& reservation = ev.value;
+ dout(10)
+ << fmt::format(
+ "ReplicaActive::react(const ReserverGranted&). Reservation:{}",
+ reservation)
+ << dendl;
- /// verify that the granted reservation is the one we were waiting for
+ /**
+ * discard (and log) unexpected 'reservation granted' messages
+ * from the async reserver. 'Unexpected' here - either not carrying the
+ * ID of our last request from the reserver, or arriving when there is
+ * no 'open request' made to the reserver.
+ * As canceled reservations may still be triggered, this is not
+ * necessarily a bug.
+ */
if (reservation.nonce != pending_reservation_nonce) {
dout(5) << fmt::format(
- "{}: reservation_nonce mismatch: {} != {}", __func__, reservation.nonce,
- pending_reservation_nonce) << dendl;
- return false;
+ "ReplicaActive::react(const ReserverGranted&): "
+ "reservation_nonce mismatch: {} != {}",
+ reservation.nonce, pending_reservation_nonce)
+ << dendl;
+ return discard_event();
}
reservation_granted = true;
MOSDScrubReserve::GRANT, m_pg->pg_whoami, pending_reservation_nonce);
m_pg->send_cluster_message(
m_pg->get_primary().osd, grant_msg, reservation.request_epoch, false);
- return true;
+ return discard_event();
}
+
void ReplicaActive::on_release(const ReplicaRelease& ev)
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
}
}
-void ReplicaActive::ignore_unhandled_grant(const ReserverGranted&)
-{
- dout(10) << "ReplicaActive::react(const ReserverGranted&): ignored"
- << dendl;
-}
-
// ---------------- ReplicaActive/ReplicaIdle ---------------------------
<< dendl;
}
-sc::result ReplicaUnreserved::react(const ReplicaReserveReq& ev)
-{
- DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
- dout(10) << "ReplicaUnreserved::react(const ReplicaReserveReq&)" << dendl;
-
- const auto& m = *(ev.m_op->get_req<MOSDScrubReserve>());
- const auto async_disabled = scrbr->get_pg_cct()->_conf.get_val<bool>(
- "osd_scrub_disable_reservation_queuing");
- const bool async_request = !async_disabled && m.wait_for_resources;
- dout(15) << fmt::format(
- "ReplicaUnreserved::react(const ReplicaReserveReq&): "
- "request:{} disabled?:{} -> async? {}", m.wait_for_resources,
- async_disabled, async_request)
- << dendl;
-
- switch (context<ReplicaActive>().on_reserve_request(ev, async_request)) {
- case ReplicaReactCode::discard:
- return discard_event();
- case ReplicaReactCode::goto_waiting_reservation:
- return transit<ReplicaWaitingReservation>();
- case ReplicaReactCode::goto_replica_reserved:
- return transit<ReplicaReserved>();
- default:
- ceph_abort_msg("unexpected return value");
- }
- // can't happen, but some compilers complain:
- return transit<ReplicaReserved>();
-}
sc::result ReplicaUnreserved::react(const StartReplica& ev)
{
return discard_event();
}
-sc::result ReplicaUnreserved::react(const ReserverGranted&)
-{
- DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
- dout(10) << "ReplicaUnreserved::react(const ReserverGranted&)" << dendl;
- // shouldn't happen. Might be a result of a cancelled reservation
- // that was still delivered.
- // must unreserve
- dout(5) << "ReplicaUnreserved::react(const ReserverGranted&): reservation "
- "granted while not being waited for"
- << dendl;
- context<ReplicaActive>().clear_remote_reservation(false);
- return discard_event();
-}
-
// ---------------- ReplicaIdle/ReplicaWaitingReservation ---------------------------
<< dendl;
}
-sc::result ReplicaWaitingReservation::react(const ReserverGranted& ev)
-{
- DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
- dout(10) << fmt::format(
- "ReplicaWaitingReservation::react(const ReserverGranted&): "
- "event:{}",
- ev)
- << dendl;
- if (context<ReplicaActive>().granted_by_reserver(ev.value)) {
- return transit<ReplicaReserved>();
- }
- return discard_event();
-}
-
sc::result ReplicaWaitingReservation::react(const ReplicaRelease& ev)
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
return transit<ReplicaActiveOp>();
}
-sc::result ReplicaWaitingReservation::react(const ReplicaReserveReq& ev)
-{
- DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
- dout(10) << "ReplicaWaitingReservation::react(const ReplicaReserveReq&)"
- << dendl;
- // this shouldn't happen. We will handle it, but will also log an error.
- scrbr->get_clog()->error() << fmt::format(
- "osd.{} pg[{}]: reservation requested while previous is pending",
- scrbr->get_whoami(), scrbr->get_spgid());
- // cancel the existing reservation, and re-request
- context<ReplicaActive>().clear_remote_reservation(true);
- post_event(ev);
- return transit<ReplicaUnreserved>();
-}
-
// ---------------- ReplicaIdle/ReplicaReserved ---------------------------
return transit<ReplicaUnreserved>();
}
-sc::result ReplicaReserved::react(const ReplicaReserveReq& ev)
-{
- DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
- dout(10) << "ReplicaReserved::react(const ReplicaReserveReq&)" << dendl;
- scrbr->get_clog()->error() << fmt::format(
- "osd.{} pg[{}]: reservation requested while still reserved",
- scrbr->get_whoami(), scrbr->get_spgid());
- // This is a bug. We should never receive a new request unless the
- // previous one was cancelled - either by the primary, or on interval
- // change.
- // cancel the existing reservation, and re-request
- context<ReplicaActive>().clear_remote_reservation(true);
- post_event(ev);
- return transit<ReplicaUnreserved>();
-}
-
sc::result ReplicaReserved::react(const StartReplica& ev)
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
return transit<ReplicaActiveOp>();
}
+
+sc::result ReplicaActiveOp::react(const ReplicaReserveReq& ev)
+{
+ DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
+ dout(10) << "ReplicaActiveOp::react(const ReplicaReserveReq&)" << dendl;
+ scrbr->get_clog()->warn() << fmt::format(
+ "osd.{} pg[{}]: reservation requested while processing a chunk",
+ scrbr->get_whoami(), scrbr->get_spgid());
+ // exit-n-renter ReplicaActive (aborting the chunk & freeing the reservation
+ // in the process)
+ post_event(ev);
+ return transit<ReplicaActive>();
+}
+
+
sc::result ReplicaActiveOp::react(const ReplicaRelease& ev)
{
dout(10) << "ReplicaActiveOp::react(const ReplicaRelease&)" << dendl;
namespace sc = ::boost::statechart;
namespace mpl = ::boost::mpl;
+enum class reservation_status_t {
+ unreserved,
+ requested_or_granted ///< i.e. must be released
+};
+
//
// EVENTS
//
struct ReplicaIdle;
-// sc::result cannot be copied or moved, so we need to postpone
-// the creation of such objects to the moment where they are
-// returned from the react() function.
-enum class ReplicaReactCode {
- discard,
- goto_waiting_reservation,
- goto_replica_reserved
-};
-
-struct ReplicaActive : sc::state<
- ReplicaActive,
- ScrubMachine,
- mpl::list<sc::shallow_history<ReplicaIdle>>,
- sc::has_shallow_history>,
+struct ReplicaActive : sc::state<ReplicaActive, ScrubMachine, ReplicaIdle>,
NamedSimply {
explicit ReplicaActive(my_context ctx);
~ReplicaActive();
- /// handle a reservation request from a primary
- ReplicaReactCode on_reserve_request(
- const ReplicaReserveReq&,
- bool async_request);
-
- /**
- * the queued reservation request was granted by the async reserver.
- * Notify the Primary.
- * Returns 'false' if the reservation is not the last one to be received
- * by this replica.
- */
- bool granted_by_reserver(const AsyncScrubResData& resevation);
-
/// handle a 'release' from a primary
void on_release(const ReplicaRelease& ev);
*/
void clear_remote_reservation(bool warn_if_no_reservation);
- /**
- * discard (and log) unhandled 'reservation granted' messages
- * from the async reserver.
- * As canceled reservations may still be triggered, this is not
- * necessarily a bug.
- */
- void ignore_unhandled_grant(const ReserverGranted&);
-
using reactions = mpl::list<
sc::transition<IntervalChanged, NotActive>,
- sc::in_state_reaction<
- ReserverGranted,
- ReplicaActive,
- &ReplicaActive::ignore_unhandled_grant>>;
+ sc::custom_reaction<ReserverGranted>,
+ sc::custom_reaction<ReplicaReserveReq>>;
+
+ /// handle a reservation request from a primary
+ sc::result react(const ReplicaReserveReq& ev);
+
+ /**
+ * the queued reservation request was granted by the async reserver.
+ * Notify the Primary.
+ */
+ sc::result react(const ReserverGranted&);
private:
PG* m_pg;
* Note that in the event that the primary is too old to support asynchronous
* reservation, MOSDScrubReserve::wait_for_resources will be set to false by
* the decoder and we bypass the 2'nd case above.
- * See ReplicaActive::on_reserve_request().
*/
bool reservation_granted{false};
+ reservation_status_t m_reservation_status{reservation_status_t::unreserved};
+
/**
* a reservation request with this nonce is queued at the scrub_reserver,
* and was not yet granted.
explicit ReplicaUnreserved(my_context ctx);
using reactions = mpl::list<
- sc::custom_reaction<ReplicaReserveReq>,
sc::custom_reaction<StartReplica>,
// unexpected (bug-induced) events:
- sc::custom_reaction<ReplicaRelease>,
- sc::custom_reaction<ReserverGranted>>;
+ sc::custom_reaction<ReplicaRelease>>;
- sc::result react(const ReplicaReserveReq& ev);
sc::result react(const StartReplica& ev);
- sc::result react(const ReserverGranted&);
sc::result react(const ReplicaRelease&);
};
using reactions = mpl::list<
// the 'normal' (expected) events:
sc::custom_reaction<ReplicaRelease>,
- sc::custom_reaction<StartReplica>,
- // unexpected (bug-induced) events:
- sc::custom_reaction<ReplicaReserveReq>,
- sc::custom_reaction<ReserverGranted>>;
+ sc::custom_reaction<StartReplica>>;
sc::result react(const ReplicaRelease& ev);
sc::result react(const StartReplica& ev);
- sc::result react(const ReserverGranted&);
- sc::result react(const ReplicaReserveReq& ev);
};
/**
explicit ReplicaReserved(my_context ctx);
using reactions = mpl::list<
- sc::custom_reaction<ReplicaReserveReq>,
sc::custom_reaction<StartReplica>,
sc::custom_reaction<ReplicaRelease>>;
- sc::result react(const ReplicaReserveReq&);
sc::result react(const ReplicaRelease&);
sc::result react(const StartReplica& eq);
};
using reactions = mpl::list<
sc::custom_reaction<StartReplica>,
sc::custom_reaction<ReplicaRelease>,
+ sc::custom_reaction<ReplicaReserveReq>,
sc::transition<FullReset, ReplicaIdle>>;
/**
* we would transition into (which should be ReplicaReserved).
*/
sc::result react(const ReplicaRelease&);
+
+ /**
+ * handling unexpected 'reservation requests' while we are in the middle
+ * of serving a chunk request.
+ * This is a bug. We log it, abort the operation, and re-enter ReplicaActive
+ * to handle the reservation request.
+ */
+ sc::result react(const ReplicaReserveReq& ev);
};
/*