From: Ronen Friedman Date: Fri, 20 Aug 2021 15:33:25 +0000 (+0000) Subject: osd/scrub: remove the fast track (w/o resources acquisition) for repair scrubs X-Git-Tag: v16.2.8~40^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=643715be74d0e6ec412eb7a3f239622a64dc0d8b;p=ceph.git osd/scrub: remove the fast track (w/o resources acquisition) for repair scrubs Previously, after-repair scrubs were started without waiting for either local or remote OSDs' scrub resources. The tagging of scrub sessions by the replicas is based on monitoring replica-request and replica-release messages from the primary. Scrub-map requests arriving without any reservations interfere with this mechanism. The benefits of this fast-track were limited at best, and do not justify the complexity of a solution that accommodates both the bypass and the tagging. Signed-off-by: Ronen Friedman (cherry picked from commit 7011d73ef243b1999a574c786523e4f3fa865f9d) --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index b3fb71c4f6253..6121ca1cfc38d 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2564,7 +2564,7 @@ std::pair PG::do_delete_work( epoch_t e = get_osdmap()->get_epoch(); PGRef pgref(this); auto delete_requeue_callback = new LambdaContext([this, pgref, e](int r) { - dout(20) << __func__ << " wake up at " + dout(20) << "do_delete_work() [cb] wake up at " << ceph_clock_now() << ", re-queuing delete" << dendl; std::scoped_lock locker{*this}; diff --git a/src/osd/PG.h b/src/osd/PG.h index 544bb9481bcb1..214785bdeb90a 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -390,6 +390,7 @@ public: /** * a special version of PG::scrub(), which: * - is initiated after repair, and + * (not true anymore:) * - is not required to allocate local/remote OSD scrub resources */ void recovery_scrub(epoch_t queued, ThreadPool::TPHandle& handle) diff --git a/src/osd/scrub_machine.h b/src/osd/scrub_machine.h index f3033881b12bf..3ba1e92309e5c 100644 --- a/src/osd/scrub_machine.h +++ b/src/osd/scrub_machine.h @@ -143,6 +143,11 @@ class ScrubMachine : public sc::state_machine { * not required to reserve resources. * - (for a replica) 'StartReplica' or 'StartReplicaNoWait', triggered by an incoming * MOSDRepScrub message. + * + * note (20.8.21): originally, AfterRepairScrub was triggering a scrub without waiting + * for replica resources to be acquired. But once replicas started using the + * resource-request to identify and tag the scrub session, this bypass cannot be + * supported anymore. */ struct NotActive : sc::state { explicit NotActive(my_context ctx); @@ -150,7 +155,7 @@ struct NotActive : sc::state { using reactions = mpl::list, // a scrubbing that was initiated at recovery completion, // and requires no resource reservations: - sc::transition, + sc::transition, sc::transition, sc::transition>; };