]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: remove the fast track (w/o resources acquisition) for repair scrubs 45374/head
authorRonen Friedman <rfriedma@redhat.com>
Fri, 20 Aug 2021 15:33:25 +0000 (15:33 +0000)
committerRonen Friedman <rfriedma@redhat.com>
Wed, 16 Mar 2022 08:45:26 +0000 (08:45 +0000)
Previously, after-repair scrubs were started without waiting for either local
or remote OSDs' scrub resources.
The tagging of scrub sessions by the replicas is based on monitoring replica-request
and replica-release messages from the primary. Scrub-map requests arriving without
any reservations interfere with this mechanism.
The benefits of this fast-track were limited at best, and do not justify the complexity
of a solution that accommodates both the bypass and the tagging.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
(cherry picked from commit 7011d73ef243b1999a574c786523e4f3fa865f9d)

src/osd/PG.cc
src/osd/PG.h
src/osd/scrub_machine.h

index b3fb71c4f6253b181f30c112d61e1e23ad1a227f..6121ca1cfc38de7b220e6291984eedec9809da23 100644 (file)
@@ -2564,7 +2564,7 @@ std::pair<ghobject_t, bool> PG::do_delete_work(
       epoch_t e = get_osdmap()->get_epoch();
       PGRef pgref(this);
       auto delete_requeue_callback = new LambdaContext([this, pgref, e](int r) {
-        dout(20) << __func__ << " wake up at "
+        dout(20) << "do_delete_work() [cb] wake up at "
                  << ceph_clock_now()
                 << ", re-queuing delete" << dendl;
         std::scoped_lock locker{*this};
index 544bb9481bcb10a7e9ada921871161f41164ed2b..214785bdeb90a91775d58c8753e9e9ad92421c46 100644 (file)
@@ -390,6 +390,7 @@ public:
   /**
    *  a special version of PG::scrub(), which:
    *  - is initiated after repair, and
+   * (not true anymore:)
    *  - is not required to allocate local/remote OSD scrub resources
    */
   void recovery_scrub(epoch_t queued, ThreadPool::TPHandle& handle)
index f3033881b12bf8e81f30bcb384d129d77d9adea1..3ba1e92309e5c971f4781185f2b45d1deac4275c 100644 (file)
@@ -143,6 +143,11 @@ class ScrubMachine : public sc::state_machine<ScrubMachine, NotActive> {
  *    not required to reserve resources.
  *  - (for a replica) 'StartReplica' or 'StartReplicaNoWait', triggered by an incoming
  *    MOSDRepScrub message.
+ *
+ *  note (20.8.21): originally, AfterRepairScrub was triggering a scrub without waiting
+ *   for replica resources to be acquired. But once replicas started using the
+ *   resource-request to identify and tag the scrub session, this bypass cannot be
+ *   supported anymore.
  */
 struct NotActive : sc::state<NotActive, ScrubMachine> {
   explicit NotActive(my_context ctx);
@@ -150,7 +155,7 @@ struct NotActive : sc::state<NotActive, ScrubMachine> {
   using reactions = mpl::list<sc::transition<StartScrub, ReservingReplicas>,
                              // a scrubbing that was initiated at recovery completion,
                              // and requires no resource reservations:
-                             sc::transition<AfterRepairScrub, ActiveScrubbing>,
+                             sc::transition<AfterRepairScrub, ReservingReplicas>,
                              sc::transition<StartReplica, ReplicaWaitUpdates>,
                              sc::transition<StartReplicaNoWait, ActiveReplica>>;
 };