From 7011d73ef243b1999a574c786523e4f3fa865f9d Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Fri, 20 Aug 2021 15:33:25 +0000 Subject: [PATCH] osd/scrub: remove the fast track (w/o resources acquisition) for repair scrubs Previously, after-repair scrubs were started without waiting for either local or remote OSDs' scrub resources. The tagging of scrub sessions by the replicas is based on monitoring replica-request and replica-release messages from the primary. Scrub-map requests arriving without any reservations interfere with this mechanism. The benefits of this fast-track were limited at best, and do not justify the complexity of a solution that accommodates both the bypass and the tagging. Signed-off-by: Ronen Friedman --- src/osd/PG.cc | 2 +- src/osd/PG.h | 1 + src/osd/scrub_machine.h | 7 ++++++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 40d58561c35..644a6a9be9a 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2558,7 +2558,7 @@ std::pair PG::do_delete_work( epoch_t e = get_osdmap()->get_epoch(); PGRef pgref(this); auto delete_requeue_callback = new LambdaContext([this, pgref, e](int r) { - dout(20) << __func__ << " wake up at " + dout(20) << "do_delete_work() [cb] wake up at " << ceph_clock_now() << ", re-queuing delete" << dendl; std::scoped_lock locker{*this}; diff --git a/src/osd/PG.h b/src/osd/PG.h index 4f765d9f2d8..6afa900ed6c 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -384,6 +384,7 @@ public: /** * a special version of PG::scrub(), which: * - is initiated after repair, and + * (not true anymore:) * - is not required to allocate local/remote OSD scrub resources */ void recovery_scrub(epoch_t queued, ThreadPool::TPHandle& handle) diff --git a/src/osd/scrub_machine.h b/src/osd/scrub_machine.h index dd44f106152..998bc5fe9c4 100644 --- a/src/osd/scrub_machine.h +++ b/src/osd/scrub_machine.h @@ -144,6 +144,11 @@ class ScrubMachine : public sc::state_machine { * not required to reserve resources. * - (for a replica) 'StartReplica' or 'StartReplicaNoWait', triggered by an incoming * MOSDRepScrub message. + * + * note (20.8.21): originally, AfterRepairScrub was triggering a scrub without waiting + * for replica resources to be acquired. But once replicas started using the + * resource-request to identify and tag the scrub session, this bypass cannot be + * supported anymore. */ struct NotActive : sc::state { explicit NotActive(my_context ctx); @@ -151,7 +156,7 @@ struct NotActive : sc::state { using reactions = mpl::list, // a scrubbing that was initiated at recovery completion, // and requires no resource reservations: - sc::transition, + sc::transition, sc::transition, sc::transition>; }; -- 2.47.3