ReplicaActive::~ReplicaActive()
{
- DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
- if (reserved_by_my_primary) {
- dout(10) << "ReplicaActive::~ReplicaActive(): clearing reservation"
- << dendl;
- clear_reservation_by_remote_primary(false);
- }
+ clear_remote_reservation(false);
}
/*
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
const auto m = ev.m_op->get_req<MOSDScrubReserve>();
- const auto msg_nonce = m->reservation_nonce;
dout(10) << fmt::format(
- "ReplicaActive::on_reserve_req() from {} request:{} is "
- "async?{} (reservation_nonce:{})",
- ev.m_from, ev, async_request, msg_nonce)
+ "ReplicaActive::on_reserve_req() request:{} async_request:{} "
+ "reservation_nonce:{}",
+ ev, async_request, m->reservation_nonce)
<< dendl;
- auto& svc = m_osds->get_scrub_services(); // shorthand
-
- if (reserved_by_my_primary) {
- dout(10) << "ReplicaActive::on_reserve_request(): already reserved"
- << dendl;
- // clear the existing reservation. Clears the flag, too
- clear_reservation_by_remote_primary(false);
- }
- Message* reply{nullptr};
+ ceph_assert(!reservation_granted);
+ ceph_assert(!pending_reservation_nonce);
ReplicaReactCode next_action{ReplicaReactCode::discard};
+ AsyncScrubResData request_details{
+ pg_id, ev.m_from, ev.m_op->sent_epoch, m->reservation_nonce};
+ auto& reserver = m_osds->get_scrub_reserver();
if (async_request) {
// the request is to be handled asynchronously
- svc.enqueue_remote_reservation(pg_id.pgid);
+ dout(20) << fmt::format(
+ "{}: async request: {} details:{}", __func__, ev,
+ request_details)
+ << dendl;
+ pending_reservation_nonce = m->reservation_nonce;
+ const auto reservation_cb = new RtReservationCB(m_pg, request_details);
+ reserver.request_reservation(pg_id, reservation_cb, 0, nullptr);
next_action = ReplicaReactCode::goto_waiting_reservation;
} else {
// an immediate yes/no is required
- const auto granted = svc.inc_scrubs_remote(scrbr->get_spgid().pgid);
- if (granted) {
- reserved_by_my_primary = true;
+ Message* reply{nullptr};
+ reservation_granted = reserver.request_reservation_or_fail(pg_id);
+ if (reservation_granted) {
dout(10) << fmt::format("{}: reserved? yes", __func__) << dendl;
reply = new MOSDScrubReserve(
spg_t(pg_id.pgid, m_pg->get_primary().shard), ev.m_op->sent_epoch,
- MOSDScrubReserve::GRANT, m_pg->pg_whoami, msg_nonce);
+ MOSDScrubReserve::GRANT, m_pg->pg_whoami, m->reservation_nonce);
next_action = ReplicaReactCode::goto_replica_reserved;
} else {
dout(10) << fmt::format("{}: reserved? no", __func__) << dendl;
reply = new MOSDScrubReserve(
spg_t(pg_id.pgid, m_pg->get_primary().shard), ev.m_op->sent_epoch,
- MOSDScrubReserve::REJECT, m_pg->pg_whoami, msg_nonce);
+ MOSDScrubReserve::REJECT, m_pg->pg_whoami, m->reservation_nonce);
// the event is discarded
next_action = ReplicaReactCode::discard;
}
- }
- if (reply) {
m_osds->send_message_osd_cluster(
reply, ev.m_op->get_req()->get_connection());
}
+
return next_action;
}
+bool ReplicaActive::granted_by_reserver(const AsyncScrubResData& reservation)
+{
+ DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
+ dout(10) << fmt::format("{}: reservation granted: {}", __func__, reservation)
+ << dendl;
+
+ /// verify that the granted reservation is the one we were waiting for
+ if (reservation.nonce != pending_reservation_nonce) {
+ dout(5) << fmt::format(
+ "{}: reservation_nonce mismatch: {} != {}", __func__, reservation.nonce,
+ pending_reservation_nonce) << dendl;
+ return false;
+ }
+
+ reservation_granted = true;
+ pending_reservation_nonce = 0; // no longer pending
+
+ // notify the primary
+ auto grant_msg = make_message<MOSDScrubReserve>(
+ spg_t(pg_id.pgid, m_pg->get_primary().shard), reservation.request_epoch,
+ MOSDScrubReserve::GRANT, m_pg->pg_whoami, pending_reservation_nonce);
+ m_pg->send_cluster_message(
+ m_pg->get_primary().osd, grant_msg, reservation.request_epoch, false);
+ return true;
+}
+
void ReplicaActive::on_release(const ReplicaRelease& ev)
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
dout(10) << fmt::format("ReplicaActive::on_release() from {}", ev.m_from)
<< dendl;
- clear_reservation_by_remote_primary(true);
+ clear_remote_reservation(true);
}
-void ReplicaActive::clear_reservation_by_remote_primary(bool log_failure)
+void ReplicaActive::clear_remote_reservation(bool warn_if_no_reservation)
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
dout(10) << fmt::format(
- "ReplicaActive::clear_reservation_by_remote_primary(): was "
- "reserved? {}",
- (reserved_by_my_primary ? "yes" : "no"))
+ "ReplicaActive::clear_remote_reservation(): "
+ "pending_reservation_nonce {}, reservation_granted {}",
+ reservation_granted, pending_reservation_nonce)
<< dendl;
- if (reserved_by_my_primary) {
- m_osds->get_scrub_services().dec_scrubs_remote(scrbr->get_spgid().pgid);
- reserved_by_my_primary = false;
- } else if (log_failure) {
- const auto msg = fmt::format(
- "ReplicaActive::clear_reservation_by_remote_primary(): "
- "not reserved!");
+ if (reservation_granted || pending_reservation_nonce) {
+ m_osds->get_scrub_reserver().cancel_reservation(pg_id);
+ reservation_granted = false;
+ pending_reservation_nonce = 0;
+ } else if (warn_if_no_reservation) {
+ const auto msg =
+ "ReplicaActive::clear_remote_reservation(): "
+ "not reserved!";
dout(5) << msg << dendl;
scrbr->get_clog()->warn() << msg;
}
}
+void ReplicaActive::ignore_unhandled_grant(const ReserverGranted&)
+{
+ dout(10) << "ReplicaActive::react(const ReserverGranted&): ignored"
+ << dendl;
+}
+
// ---------------- ReplicaActive/ReplicaIdle ---------------------------
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
dout(10) << "ReplicaUnreserved::react(const ReplicaReserveReq&)" << dendl;
- switch (context<ReplicaActive>().on_reserve_request(ev, false)) {
+ bool async_request = true && ev.m_op->get_req<MOSDScrubReserve>()->wait_for_resources /* && a config */;
+ switch (context<ReplicaActive>().on_reserve_request(ev, async_request)) {
case ReplicaReactCode::discard:
return discard_event();
case ReplicaReactCode::goto_waiting_reservation:
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
dout(10) << "ReplicaUnreserved::react(const ReplicaRelease&)" << dendl;
- // shouldn't happen. Possible (faulty) sequence: getting an op
- // command while in ReplicaWaitingReservation (as we would just
- // treat that as a regular op request, but will stop waiting for
- // reservation).
- // must cancel the queued reservation request
+ // this is a bug. We should never receive a release request unless we
+ // are reserved or have a pending reservation.
scrbr->get_clog()->error() << fmt::format(
"osd.{} pg[{}]: reservation released while not reserved",
scrbr->get_whoami(), scrbr->get_spgid());
- context<ReplicaActive>().clear_reservation_by_remote_primary(true);
return discard_event();
}
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
dout(10) << "ReplicaUnreserved::react(const ReserverGranted&)" << dendl;
- // shouldn't happen. Possible (faulty) sequence: getting an op
- // command while in ReplicaWaitingReservation (as we would just
- // treat that as a regular op request, but will stop waiting for
- // reservation).
+ // shouldn't happen. Might be a result of a cancelled reservation
+ // that was still delivered.
// must unreserve
- scrbr->get_clog()->error() << fmt::format(
- "osd.{} pg[{}]: reservation granted while not being waited for",
- scrbr->get_whoami(), scrbr->get_spgid());
- context<ReplicaActive>().clear_reservation_by_remote_primary(true);
+ dout(5) << "ReplicaUnreserved::react(const ReserverGranted&): reservation "
+ "granted while not being waited for"
+ << dendl;
+ context<ReplicaActive>().clear_remote_reservation(false);
return discard_event();
}
<< dendl;
}
-sc::result ReplicaWaitingReservation::react(const ReserverGranted&)
+sc::result ReplicaWaitingReservation::react(const ReserverGranted& ev)
{
DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases
- dout(10) << "ReplicaWaitingReservation::react(const ReserverGranted&)"
+ dout(10) << fmt::format(
+ "ReplicaWaitingReservation::react(const ReserverGranted&): "
+ "event:{}",
+ ev)
<< dendl;
-
- /// \todo complete the handling of the granted reservation
- ceph_abort_msg("not implemented yet");
+ if (context<ReplicaActive>().granted_by_reserver(ev.value)) {
+ return transit<ReplicaReserved>();
+ }
return discard_event();
}
"osd.{} pg[{}]: new chunk request while still waiting for "
"reservation",
scrbr->get_whoami(), scrbr->get_spgid());
+ context<ReplicaActive>().clear_remote_reservation(true);
clear_shallow_history<ReplicaIdle, 0>();
post_event(ReplicaPushesUpd{});
return transit<ReplicaActiveOp>();
"osd.{} pg[{}]: reservation requested while previous is pending",
scrbr->get_whoami(), scrbr->get_spgid());
// cancel the existing reservation, and re-request
- context<ReplicaActive>().clear_reservation_by_remote_primary(true);
+ context<ReplicaActive>().clear_remote_reservation(true);
post_event(ev);
return transit<ReplicaUnreserved>();
}
scrbr->get_clog()->error() << fmt::format(
"osd.{} pg[{}]: reservation requested while still reserved",
scrbr->get_whoami(), scrbr->get_spgid());
+ // This is a bug. We should never receive a new request unless the
+ // previous one was cancelled - either by the primary, or on interval
+ // change.
// cancel the existing reservation, and re-request
- context<ReplicaActive>().clear_reservation_by_remote_primary(true);
+ context<ReplicaActive>().clear_remote_reservation(true);
post_event(ev);
return transit<ReplicaUnreserved>();
}
#include <boost/statechart/transition.hpp>
#include "common/fmt_common.h"
+#include "include/Context.h"
#include "common/version.h"
#include "messages/MOSDOp.h"
#include "messages/MOSDRepScrub.h"
#include "messages/MOSDRepScrubMap.h"
-#include "messages/MOSDScrubReserve.h"
-
-#include "include/Context.h"
#include "osd/scrubber_common.h"
#include "scrub_machine_lstnr.h"
/// the async-reserver granted our reservation request
-OP_EV(ReserverGranted);
+VALUE_EVENT(ReserverGranted, AsyncScrubResData);
#define MEV(E) \
struct E : sc::event<E> { \
* * ReplicaWaitUpdates
* * ReplicaBuildingMap
*/
+/*
+ * AsyncReserver for scrub 'remote' reservations
+ * -----------------------------------------------
+ *
+ * Unless disabled by 'osd_scrub_disable_reservation_queuing' (*), scrub
+ * reservation requests are handled by an async reserver: they are queued,
+ * until the number of concurrent scrubs is below the configured limit.
+
+ * (*) Note: the 'osd_scrub_disable_reservation_queuing' option is a temporary
+ * debug measure, and will be removed without deprecation in a future release.
+ *
+ * On the replica side, all reservations are treated as having the same priority.
+ * Note that 'high priority' scrubs, e.g. user-initiated scrubs, do not perform
+ * reservations on replicas at all.
+ *
+ * A queued scrub reservation request is cancelled by any of the following events:
+ *
+ * - a new interval: in this case, we do not expect to see a cancellation request
+ * from the primary, and we can simply remove the request from the queue;
+ *
+ * - a cancellation request from the primary: probably a result of timing out on
+ * the reservation process. Here, we can simply remove the request from the queue.
+ *
+ * - a new reservation request for the same PG: this is a bug. We had missed the
+ * previous cancellation request, which could never happen.
+ * We cancel the previous request, and replace
+ * it with the new one. We would also issue an error log message.
+ *
+ * Primary/Replica with differing versions:
+ *
+ * The updated version of MOSDScrubReserve contains a new 'wait_for_resources'
+ * field. For legacy Primary OSDs, this field is decoded as 'false', and the
+ * replica responds immediately, with grant/rejection.
+*/
+
struct ReplicaIdle;
const ReplicaReserveReq&,
bool async_request);
+ /**
+ * the queued reservation request was granted by the async reserver.
+ * Notify the Primary.
+ * Returns 'false' if the reservation is not the last one to be received
+ * by this replica.
+ */
+ bool granted_by_reserver(const AsyncScrubResData& resevation);
+
/// handle a 'release' from a primary
void on_release(const ReplicaRelease& ev);
- /// cancel the reserver request.
- /// The 'failure' re 'log_failure' is logged if we are not reserved to
- /// begin with.
- void clear_reservation_by_remote_primary(bool log_failure);
+ /**
+ * cancel a granted or pending reservation
+ *
+ * warn_if_no_reservation is set to true if the call is in response to a
+ * cancellation from the primary. In that event, we *must* find a
+ * a granted or pending reservation and failing to do so warrants
+ * a warning to clog as it is a bug.
+ */
+ void clear_remote_reservation(bool warn_if_no_reservation);
+
+ /**
+ * discard (and log) unhandled 'reservation granted' messages
+ * from the async reserver.
+ * As canceled reservations may still be triggered, this is not
+ * necessarily a bug.
+ */
+ void ignore_unhandled_grant(const ReserverGranted&);
- using reactions = mpl::list<sc::transition<IntervalChanged, NotActive>>;
+ using reactions = mpl::list<
+ sc::transition<IntervalChanged, NotActive>,
+ sc::in_state_reaction<
+ ReserverGranted,
+ ReplicaActive,
+ &ReplicaActive::ignore_unhandled_grant>>;
private:
- bool reserved_by_my_primary{false};
-
- // shortcuts:
PG* m_pg;
OSDService* m_osds;
+
+ // --- remote reservation machinery
+
+ /*
+ * 'reservation_granted' is set to 'true' when we have grant confirmation
+ * to the primary, and the reservation has not yet been canceled (either
+ * by the primary or following an interval change).
+ *
+ * Note the interaction with 'pending_reservation_nonce': the combination
+ * of these two variables is used to track the state of the reservation
+ * with the scrub_reserver. The possible combinations:
+ * - pending_reservation_nonce == 0 && !reservation_granted -- no reservation
+ * was granted, and none is pending;
+ * - pending_reservation_nonce != 0 && !reservation_granted -- we have a
+ * pending cb in the AsyncReserver for a request with nonce
+ * 'pending_reservation_nonce'
+ * - pending_reservation_nonce == 0 && reservation_granted -- we have sent
+ * a response to the primary granting the reservation
+ * (invariant: !((pending_reservation_nonce != 0) && reservation_granted)
+ *
+ * Note that in the event that the primary is too old to support asynchronous
+ * reservation, MOSDScrubReserve::wait_for_resources will be set to false by
+ * the decoder and we bypass the 2'nd case above.
+ * See ReplicaActive::on_reserve_request().
+ */
+ bool reservation_granted{false};
+
+ /**
+ * a reservation request with this nonce is queued at the scrub_reserver,
+ * and was not yet granted.
+ */
+ MOSDScrubReserve::reservation_nonce_t pending_reservation_nonce{0};
+
+ // clang-format off
+ struct RtReservationCB : public Context {
+ PGRef pg;
+ AsyncScrubResData res_data;
+
+ explicit RtReservationCB(PGRef pg, AsyncScrubResData request_details)
+ : pg{pg}
+ , res_data{request_details}
+ {}
+
+ void finish(int) override {
+ pg->lock();
+ pg->m_scrubber->send_granted_by_reserver(res_data);
+ pg->unlock();
+ }
+ };
+ // clang-format on
};
using reactions = mpl::list<
sc::custom_reaction<ReplicaReserveReq>,
+ sc::custom_reaction<StartReplica>,
+ // unexpected (bug-induced) events:
sc::custom_reaction<ReplicaRelease>,
- sc::custom_reaction<ReserverGranted>,
- sc::custom_reaction<StartReplica>>;
+ sc::custom_reaction<ReserverGranted>>;
sc::result react(const ReplicaReserveReq& ev);
sc::result react(const StartReplica& ev);