From 2463c6463d1ed38a2e15a0960ed1530a47851489 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 22 Oct 2017 22:37:36 -0500 Subject: [PATCH] osd/PG: allow preemption of remote backfill reservations If we have granted a remote backfill reservation, and a higher priority request comes in, send a REVOKE message back to the primary and drop the reservation (allowing the higher-priority reservation to be GRANTed). We can only do this if the primary is running new code because it must understand the REVOKE message. Signed-off-by: Sage Weil --- src/messages/MBackfillReserve.h | 6 +++- src/osd/OSD.cc | 6 ++++ src/osd/PG.cc | 53 ++++++++++++++++++++++++++++++++- src/osd/PG.h | 10 +++++-- 4 files changed, 71 insertions(+), 4 deletions(-) diff --git a/src/messages/MBackfillReserve.h b/src/messages/MBackfillReserve.h index 0d4814c2f672a..19b5531fb5410 100644 --- a/src/messages/MBackfillReserve.h +++ b/src/messages/MBackfillReserve.h @@ -29,6 +29,7 @@ public: REJECT = 2, // replica->primary: sorry, try again later (*) RELEASE = 3, // primary->replcia: release the slot i reserved before TOOFULL = 4, // replica->primary: too full, stop backfilling + REVOKE = 5, // replica->primary: i'm taking back the slot i gave you // (*) NOTE: prior to luminous, REJECT was overloaded to also mean release }; uint32_t type; @@ -66,6 +67,9 @@ public: case TOOFULL: out << "TOOFULL "; break; + case REVOKE: + out << "REVOKE "; + break; } out << " pgid: " << pgid << ", query_epoch: " << query_epoch; if (type == REQUEST) out << ", prio: " << priority; @@ -87,7 +91,7 @@ public: header.compat_version = 3; ::encode(pgid.pgid, payload); ::encode(query_epoch, payload); - ::encode((type == RELEASE || type == TOOFULL) ? + ::encode((type == RELEASE || type == TOOFULL || type == REVOKE) ? REJECT : type, payload); ::encode(priority, payload); ::encode(pgid.shard, payload); diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 4faf8649e6f0f..a5d17df494e69 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -8569,6 +8569,12 @@ void OSD::handle_pg_backfill_reserve(OpRequestRef op) m->query_epoch, m->query_epoch, PG::RemoteReservationRevokedTooFull())); + } else if (m->type == MBackfillReserve::REVOKE) { + evt = PG::CephPeeringEvtRef( + new PG::CephPeeringEvt( + m->query_epoch, + m->query_epoch, + PG::RemoteReservationRevoked())); } else { ceph_abort(); } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 68151a9f3dc97..1c9eca70aba1e 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -6419,6 +6419,34 @@ PG::RecoveryState::Backfilling::react(const RemoteReservationRevokedTooFull &) return transit(); } +boost::statechart::result +PG::RecoveryState::Backfilling::react(const RemoteReservationRevoked &) +{ + PG *pg = context< RecoveryMachine >().pg; + pg->osd->local_reserver.cancel_reservation(pg->info.pgid); + pg->state_set(PG_STATE_BACKFILL_WAIT); + + for (set::iterator it = pg->backfill_targets.begin(); + it != pg->backfill_targets.end(); + ++it) { + assert(*it != pg->pg_whoami); + ConnectionRef con = pg->osd->get_con_osd_cluster( + it->osd, pg->get_osdmap()->get_epoch()); + if (con) { + pg->osd->send_message_osd_cluster( + new MBackfillReserve( + MBackfillReserve::RELEASE, + spg_t(pg->info.pgid.pgid, it->shard), + pg->get_osdmap()->get_epoch()), + con.get()); + } + } + + pg->waiting_on_backfill.clear(); + + return transit(); +} + void PG::RecoveryState::Backfilling::exit() { context< RecoveryMachine >().log_exit(state_name, enter_time); @@ -6681,11 +6709,20 @@ PG::RecoveryState::RepNotRecovering::react(const RequestBackfillPrio &evt) << ss.str() << dendl; post_event(RejectRemoteReservation()); } else { + Context *preempt = nullptr; + if (HAVE_FEATURE(pg->upacting_features, SERVER_MIMIC)) { + // older peers will interpret preemption as TOOFULL + preempt = new QueuePeeringEvt( + pg, pg->get_osdmap()->get_epoch(), + RemoteBackfillPreempted()); + } pg->osd->remote_reserver.request_reservation( pg->info.pgid, new QueuePeeringEvt( pg, pg->get_osdmap()->get_epoch(), - RemoteBackfillReserved()), evt.priority); + RemoteBackfillReserved()), + evt.priority, + preempt); } return transit(); } @@ -6796,6 +6833,20 @@ PG::RecoveryState::RepRecovering::react(const BackfillTooFull &) return discard_event(); } +boost::statechart::result +PG::RecoveryState::RepRecovering::react(const RemoteBackfillPreempted &) +{ + PG *pg = context< RecoveryMachine >().pg; + pg->osd->send_message_osd_cluster( + pg->primary.osd, + new MBackfillReserve( + MBackfillReserve::REVOKE, + spg_t(pg->info.pgid.pgid, pg->primary.shard), + pg->get_osdmap()->get_epoch()), + pg->get_osdmap()->get_epoch()); + return discard_event(); +} + void PG::RecoveryState::RepRecovering::exit() { context< RecoveryMachine >().log_exit(state_name, enter_time); diff --git a/src/osd/PG.h b/src/osd/PG.h index 005090929cd99..97d43186d93db 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1854,10 +1854,12 @@ protected: public: TrivialEvent(RemoteReservationRejected) TrivialEvent(RemoteReservationRevokedTooFull) + TrivialEvent(RemoteReservationRevoked) TrivialEvent(RemoteReservationCanceled) TrivialEvent(RequestBackfill) TrivialEvent(RecoveryDone) protected: + TrivialEvent(RemoteBackfillPreempted) TrivialEvent(BackfillTooFull) TrivialEvent(RecoveryTooFull) @@ -2178,7 +2180,8 @@ protected: boost::statechart::custom_reaction< DeferBackfill >, boost::statechart::custom_reaction< UnfoundBackfill >, boost::statechart::custom_reaction< RemoteReservationRejected >, - boost::statechart::custom_reaction< RemoteReservationRevokedTooFull> + boost::statechart::custom_reaction< RemoteReservationRevokedTooFull>, + boost::statechart::custom_reaction< RemoteReservationRevoked> > reactions; explicit Backfilling(my_context ctx); boost::statechart::result react(const RemoteReservationRejected& evt) { @@ -2187,6 +2190,7 @@ protected: return discard_event(); } boost::statechart::result react(const RemoteReservationRevokedTooFull& evt); + boost::statechart::result react(const RemoteReservationRevoked& evt); boost::statechart::result react(const DeferBackfill& evt); boost::statechart::result react(const UnfoundBackfill& evt); void exit(); @@ -2286,10 +2290,12 @@ protected: // for compat with old peers boost::statechart::transition< RemoteReservationRejected, RepNotRecovering >, boost::statechart::transition< RemoteReservationCanceled, RepNotRecovering >, - boost::statechart::custom_reaction< BackfillTooFull > + boost::statechart::custom_reaction< BackfillTooFull >, + boost::statechart::custom_reaction< RemoteBackfillPreempted > > reactions; explicit RepRecovering(my_context ctx); boost::statechart::result react(const BackfillTooFull &evt); + boost::statechart::result react(const RemoteBackfillPreempted &evt); void exit(); }; -- 2.39.5