From 643715be74d0e6ec412eb7a3f239622a64dc0d8b Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Fri, 20 Aug 2021 15:33:25 +0000 Subject: [PATCH] osd/scrub: remove the fast track (w/o resources acquisition) for repair scrubs Previously, after-repair scrubs were started without waiting for either local or remote OSDs' scrub resources. The tagging of scrub sessions by the replicas is based on monitoring replica-request and replica-release messages from the primary. Scrub-map requests arriving without any reservations interfere with this mechanism. The benefits of this fast-track were limited at best, and do not justify the complexity of a solution that accommodates both the bypass and the tagging. Signed-off-by: Ronen Friedman (cherry picked from commit 7011d73ef243b1999a574c786523e4f3fa865f9d) --- src/osd/PG.cc | 2 +- src/osd/PG.h | 1 + src/osd/scrub_machine.h | 7 ++++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index b3fb71c4f6253..6121ca1cfc38d 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2564,7 +2564,7 @@ std::pair PG::do_delete_work( epoch_t e = get_osdmap()->get_epoch(); PGRef pgref(this); auto delete_requeue_callback = new LambdaContext([this, pgref, e](int r) { - dout(20) << __func__ << " wake up at " + dout(20) << "do_delete_work() [cb] wake up at " << ceph_clock_now() << ", re-queuing delete" << dendl; std::scoped_lock locker{*this}; diff --git a/src/osd/PG.h b/src/osd/PG.h index 544bb9481bcb1..214785bdeb90a 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -390,6 +390,7 @@ public: /** * a special version of PG::scrub(), which: * - is initiated after repair, and + * (not true anymore:) * - is not required to allocate local/remote OSD scrub resources */ void recovery_scrub(epoch_t queued, ThreadPool::TPHandle& handle) diff --git a/src/osd/scrub_machine.h b/src/osd/scrub_machine.h index f3033881b12bf..3ba1e92309e5c 100644 --- a/src/osd/scrub_machine.h +++ b/src/osd/scrub_machine.h @@ -143,6 +143,11 @@ class ScrubMachine : public sc::state_machine { * not required to reserve resources. * - (for a replica) 'StartReplica' or 'StartReplicaNoWait', triggered by an incoming * MOSDRepScrub message. + * + * note (20.8.21): originally, AfterRepairScrub was triggering a scrub without waiting + * for replica resources to be acquired. But once replicas started using the + * resource-request to identify and tag the scrub session, this bypass cannot be + * supported anymore. */ struct NotActive : sc::state { explicit NotActive(my_context ctx); @@ -150,7 +155,7 @@ struct NotActive : sc::state { using reactions = mpl::list, // a scrubbing that was initiated at recovery completion, // and requires no resource reservations: - sc::transition, + sc::transition, sc::transition, sc::transition>; }; -- 2.39.5