]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: handle reservation completion within the Scrubber FSM
authorRonen Friedman <rfriedma@redhat.com>
Sat, 14 Oct 2023 12:36:06 +0000 (07:36 -0500)
committerRonen Friedman <rfriedma@redhat.com>
Sat, 14 Oct 2023 18:49:01 +0000 (21:49 +0300)
with special handling for the 0-replica case.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/OSD.cc
src/osd/OSD.h
src/osd/PG.h
src/osd/scheduler/OpSchedulerItem.cc
src/osd/scheduler/OpSchedulerItem.h
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/pg_scrubber.h
src/osd/scrubber/scrub_machine.cc
src/osd/scrubber/scrub_reservations.cc
src/osd/scrubber/scrub_reservations.h
src/osd/scrubber_common.h

index 88a0bc037564bc2b0ef07d581fd1d26aa1c17de3..fa938c08278132e18e2105343ab7da25702c3cfc 100644 (file)
@@ -1799,12 +1799,6 @@ void OSDService::queue_for_rep_scrub_resched(PG* pg,
                                           act_token);
 }
 
-void OSDService::queue_for_scrub_granted(PG* pg, Scrub::scrub_prio_t with_priority)
-{
-  // Resulting scrub event: 'RemotesReserved'
-  queue_scrub_event_msg<PGScrubResourcesOK>(pg, with_priority);
-}
-
 void OSDService::queue_for_scrub_resched(PG* pg, Scrub::scrub_prio_t with_priority)
 {
   // Resulting scrub event: 'InternalSchedScrub'
index d86443351cf28084ff27e2a6b2088a0e114e65f8..38f9a6ca8475a41a563242a967bb92d6cdb9e81e 100644 (file)
@@ -504,9 +504,6 @@ public:
 
   void queue_scrub_after_repair(PG* pg, Scrub::scrub_prio_t with_priority);
 
-  /// queue the message (-> event) that all replicas have reserved scrub resources for us
-  void queue_for_scrub_granted(PG* pg, Scrub::scrub_prio_t with_priority);
-
   /// Signals either (a) the end of a sleep period, or (b) a recheck of the availability
   /// of the primary map being created by the backend.
   void queue_for_scrub_resched(PG* pg, Scrub::scrub_prio_t with_priority);
index fe335b85e0003e5073dbf2003ecb2e8b00c567b9..70c1d12b2105ced2bc7ea016e228f075a222740a 100644 (file)
@@ -449,11 +449,6 @@ public:
                        "SchedReplica");
   }
 
-  void scrub_send_resources_granted(epoch_t queued, ThreadPool::TPHandle& handle)
-  {
-    forward_scrub_event(&ScrubPgIF::send_remotes_reserved, queued, "RemotesReserved");
-  }
-
   void scrub_send_scrub_resched(epoch_t queued, ThreadPool::TPHandle& handle)
   {
     forward_scrub_event(&ScrubPgIF::send_scrub_resched, queued, "InternalSchedScrub");
index 0641aafdc1c94310caa2c627c92cffa185a8ea08..750fc2a4f58ef650d93f6967f82b526e6e223895 100644 (file)
@@ -77,15 +77,6 @@ void PGScrubResched::run(OSD* osd,
   pg->unlock();
 }
 
-void PGScrubResourcesOK::run(OSD* osd,
-                            OSDShard* sdata,
-                            PGRef& pg,
-                            ThreadPool::TPHandle& handle)
-{
-  pg->scrub_send_resources_granted(epoch_queued, handle);
-  pg->unlock();
-}
-
 void PGScrubPushesUpdate::run(OSD* osd,
                              OSDShard* sdata,
                              PGRef& pg,
index 2803169a9bf973eccc75ee3c3c6a8b2c7fd408d2..7fb7125a14169b1bf64f71f7a98d7dda7824b8d4 100644 (file)
@@ -373,17 +373,6 @@ class PGScrubResched : public PGScrubItem {
   void run(OSD* osd, OSDShard* sdata, PGRef& pg, ThreadPool::TPHandle& handle) final;
 };
 
-/**
- *  all replicas have granted our scrub resources request
- */
-class PGScrubResourcesOK : public PGScrubItem {
- public:
-  PGScrubResourcesOK(spg_t pg, epoch_t epoch_queued)
-      : PGScrubItem{pg, epoch_queued, "PGScrubResourcesOK"}
-  {}
-  void run(OSD* osd, OSDShard* sdata, PGRef& pg, ThreadPool::TPHandle& handle) final;
-};
-
 /**
  *  called when a repair process completes, to initiate scrubbing. No local/remote
  *  resources are allocated.
index 98290a06ac4591e1683c9c81f2f1a7784f07aae2..b090ec113ee3de81bd36af7d6fe9b46f19b387c4 100644 (file)
@@ -359,17 +359,6 @@ void PgScrubber::send_replica_pushes_upd(epoch_t epoch_queued)
   dout(10) << "scrubber event --<< " << __func__ << dendl;
 }
 
-void PgScrubber::send_remotes_reserved(epoch_t epoch_queued)
-{
-  dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued
-          << dendl;
-  // note: scrub is not active yet
-  if (check_interval(epoch_queued)) {
-    m_fsm->process_event(RemotesReserved{});
-  }
-  dout(10) << "scrubber event --<< " << __func__ << dendl;
-}
-
 void PgScrubber::send_chunk_free(epoch_t epoch_queued)
 {
   dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued
index 0c8fa8c34fb790cafbfd93ea9689448ac7530d67..97bf7da8f22a173fd9c1591cd9083ecf0c36f667 100644 (file)
@@ -445,8 +445,6 @@ class PgScrubber : public ScrubPgIF,
 
   void send_preempted_replica() final;
 
-  void send_remotes_reserved(epoch_t epoch_queued) final;
-
   /**
    *  does the PG have newer updates than what we (the scrubber) know?
    */
index cc257a47f0e314e254a026493c92b5ae7859d4da..40b43b6e07702fedd75d04d5c39ab3daf88d3eea 100644 (file)
@@ -163,13 +163,22 @@ ReservingReplicas::ReservingReplicas(my_context ctx)
   // initiate the reservation process
   context<Session>().m_reservations.emplace(*scrbr);
 
-  auto timeout = scrbr->get_pg_cct()->_conf.get_val<milliseconds>(
-      "osd_scrub_reservation_timeout");
-  if (timeout.count() > 0) {
-    // Start a timer to handle case where the replicas take a long time to
-    // ack the reservation.  See ReservationTimeout handler below.
-    m_timeout_token = machine.schedule_timer_event_after<ReservationTimeout>(
-      timeout);
+  if (context<Session>().m_reservations->get_last_sent()) {
+    // the 1'st reservation request was sent
+
+    auto timeout = scrbr->get_pg_cct()->_conf.get_val<milliseconds>(
+       "osd_scrub_reservation_timeout");
+    if (timeout.count() > 0) {
+      // Start a timer to handle case where the replicas take a long time to
+      // ack the reservation.  See ReservationTimeout handler below.
+      m_timeout_token =
+         machine.schedule_timer_event_after<ReservationTimeout>(timeout);
+    }
+  } else {
+    // no replicas to reserve
+    dout(10) << "no replicas to reserve" << dendl;
+    // can't transit directly from here
+    post_event(RemotesReserved{});
   }
 }
 
@@ -186,7 +195,11 @@ sc::result ReservingReplicas::react(const ReplicaGrant& ev)
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
   dout(10) << "ReservingReplicas::react(const ReplicaGrant&)" << dendl;
 
-  context<Session>().m_reservations->handle_reserve_grant(ev.m_op, ev.m_from);
+  if (context<Session>().m_reservations->handle_reserve_grant(
+         ev.m_op, ev.m_from)) {
+    // we are done with the reservation process
+    return transit<ActiveScrubbing>();
+  }
   return discard_event();
 }
 
index 011ace2a28363b4d9d3015f5b19000995b6f6cd2..4d187a2a644f1b0b2e7bccdaf7903a6e704d97f2 100644 (file)
@@ -84,7 +84,7 @@ ReplicaReservations::~ReplicaReservations()
   release_all();
 }
 
-void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from)
+bool ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from)
 {
   // verify that the grant is from the peer we expected. If not?
   // for now - abort the OSD. \todo reconsider the reaction.
@@ -94,7 +94,7 @@ void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from)
                   get_last_sent().value_or(pg_shard_t{}))
            << dendl;
     ceph_assert(from == get_last_sent());
-    return;
+    return false;
   }
 
   auto elapsed = clock::now() - m_last_request_sent_at;
@@ -115,31 +115,31 @@ void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from)
                  active_requests_cnt(), m_sorted_secondaries.size(),
                  duration_cast<milliseconds>(elapsed).count())
           << dendl;
-  send_next_reservation_or_complete();
+  return send_next_reservation_or_complete();
 }
 
-void ReplicaReservations::send_next_reservation_or_complete()
+bool ReplicaReservations::send_next_reservation_or_complete()
 {
   if (m_next_to_request == m_sorted_secondaries.cend()) {
     // granted by all replicas
     dout(10) << "remote reservation complete" << dendl;
-    m_osds->queue_for_scrub_granted(m_pg, scrub_prio_t::low_priority);
-
-  } else {
-    // send the next reservation request
-    const auto peer = *m_next_to_request;
-    const auto epoch = m_pg->get_osdmap_epoch();
-    auto m = make_message<MOSDScrubReserve>(
-       spg_t{m_pgid, peer.shard}, epoch, MOSDScrubReserve::REQUEST,
-       m_pg->pg_whoami);
-    m_pg->send_cluster_message(peer.osd, m, epoch, false);
-    m_last_request_sent_at = clock::now();
-    dout(10) << fmt::format(
-                   "reserving {} (the {} of {} replicas)", *m_next_to_request,
-                   active_requests_cnt()+1, m_sorted_secondaries.size())
-            << dendl;
-    m_next_to_request++;
+    return true;  // done
   }
+
+  // send the next reservation request
+  const auto peer = *m_next_to_request;
+  const auto epoch = m_pg->get_osdmap_epoch();
+  auto m = make_message<MOSDScrubReserve>(
+      spg_t{m_pgid, peer.shard}, epoch, MOSDScrubReserve::REQUEST,
+      m_pg->pg_whoami);
+  m_pg->send_cluster_message(peer.osd, m, epoch, false);
+  m_last_request_sent_at = clock::now();
+  dout(10) << fmt::format(
+                 "reserving {} (the {} of {} replicas)", *m_next_to_request,
+                 active_requests_cnt() + 1, m_sorted_secondaries.size())
+          << dendl;
+  m_next_to_request++;
+  return false;
 }
 
 void ReplicaReservations::verify_rejections_source(
index 634e7e580027fed687b2df853663b7c961445f17..a603c70735631e8729688980066a16c7fb98297d 100644 (file)
@@ -79,8 +79,11 @@ class ReplicaReservations {
    * the replica we are expecting a reply from) is noted, and triggers
    * one of two: either sending a reservation request to the next replica,
    * or notifying the scrubber that we have reserved them all.
+   *
+   * \returns true if there are no more replicas to send reservation requests
+   * (i.e., the scrubber should proceed to the next phase), false otherwise.
    */
-  void handle_reserve_grant(OpRequestRef op, pg_shard_t from);
+  bool handle_reserve_grant(OpRequestRef op, pg_shard_t from);
 
   /**
    * Verify that the sender of the received rejection is the replica we
@@ -105,6 +108,9 @@ class ReplicaReservations {
    */
   void discard_remote_reservations();
 
+  /// the only replica we are expecting a reply from
+  std::optional<pg_shard_t> get_last_sent() const;
+
   // note: 'public', as accessed via the 'standard' dout_prefix() macro
   std::ostream& gen_prefix(std::ostream& out, std::string fn) const;
 
@@ -112,17 +118,14 @@ class ReplicaReservations {
   /// send 'release' messages to all replicas we have managed to reserve
   void release_all();
 
-  /// the only replica we are expecting a reply from
-  std::optional<pg_shard_t> get_last_sent() const;
-
   /// The number of requests that have been sent (and not rejected) so far.
   size_t active_requests_cnt() const;
 
   /**
-   * Either send a reservation request to the next replica, or notify the
-   * scrubber that we have reserved all the replicas.
+   * Send a reservation request to the next replica.
+   * - if there are no more replicas to send requests to, return true
    */
-  void send_next_reservation_or_complete();
+  bool send_next_reservation_or_complete();
 };
 
 } // namespace Scrub
index 61be0bd5a62c4121693edb841f59a81c6a6891ac..745ea2388b67b43170c1d1e4bb091e20c9f396b8 100644 (file)
@@ -356,12 +356,6 @@ struct ScrubPgIF {
    */
   virtual void clear_pgscrub_state() = 0;
 
-  /**
-   *  triggers the 'RemotesReserved' (all replicas granted scrub resources)
-   *  state-machine event
-   */
-  virtual void send_remotes_reserved(epoch_t epoch_queued) = 0;
-
   virtual void cleanup_store(ObjectStore::Transaction* t) = 0;
 
   virtual bool get_store_errors(const scrub_ls_arg_t& arg,