]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: remove detection & handling of reservation timeouts
authorRonen Friedman <rfriedma@redhat.com>
Fri, 22 Mar 2024 13:05:34 +0000 (08:05 -0500)
committerRonen Friedman <rfriedma@redhat.com>
Wed, 24 Apr 2024 11:33:27 +0000 (06:33 -0500)
as no timeout can be set for reserver-based (queued)
reservation requests.

Fixes: https://tracker.ceph.com/issues/65044
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
(cherry picked from commit e8db183ce4f1c6db34129cc1c8c57912643d1ace)
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/scrubber/scrub_machine.cc
src/osd/scrubber/scrub_machine.h

index ce4196e1ebbb13a832d3499a7d14b4e8275450d1..541cb5df94e1b8fa8415e118ea0ef22ce81815f5 100644 (file)
@@ -234,18 +234,7 @@ ReservingReplicas::ReservingReplicas(my_context ctx)
       *scrbr, context<PrimaryActive>().last_request_sent_nonce,
       *session.m_perf_set);
 
-  if (session.m_reservations->get_last_sent()) {
-    // the 1'st reservation request was sent
-
-    auto timeout = scrbr->get_pg_cct()->_conf.get_val<milliseconds>(
-       "osd_scrub_reservation_timeout");
-    if (timeout.count() > 0) {
-      // Start a timer to handle case where the replicas take a long time to
-      // ack the reservation.  See ReservationTimeout handler below.
-      m_timeout_token =
-         machine.schedule_timer_event_after<ReservationTimeout>(timeout);
-    }
-  } else {
+  if (!session.m_reservations->get_last_sent()) {
     // no replicas to reserve
     dout(10) << "no replicas to reserve" << dendl;
     // can't transit directly from here
@@ -301,24 +290,6 @@ sc::result ReservingReplicas::react(const ReplicaReject& ev)
   return transit<PrimaryIdle>();
 }
 
-sc::result ReservingReplicas::react(const ReservationTimeout&)
-{
-  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
-  auto& session = context<Session>();
-  dout(10) << "ReservingReplicas::react(const ReservationTimeout&)" << dendl;
-  session.m_reservations->log_failure_and_duration(scrbcnt_resrv_timed_out);
-
-  const auto msg = fmt::format(
-      "osd.{} PgScrubber: {} timeout on reserving replicas (since {})",
-      scrbr->get_whoami(), scrbr->get_spgid(), entered_at);
-  dout(1) << msg << dendl;
-  scrbr->get_clog()->warn() << msg;
-
-  // cause the scrubber to stop the scrub session, marking 'reservation
-  // failure' as the cause (affecting future scheduling)
-  scrbr->flag_reservations_failure();
-  return transit<PrimaryIdle>();
-}
 
 // ----------------------- ActiveScrubbing -----------------------------------
 
index 254e7861ed9568eae79a086f2a863ece7662d30b..a9902c8df81438c476e51ffc61a1b04864a62750 100644 (file)
@@ -156,9 +156,6 @@ VALUE_EVENT(ReserverGranted, AsyncScrubResData);
 /// all replicas have granted our reserve request
 MEV(RemotesReserved)
 
-/// reservations have timed out
-MEV(ReservationTimeout)
-
 /// initiate a new scrubbing session (relevant if we are a Primary)
 MEV(StartScrub)
 
@@ -565,25 +562,21 @@ struct Session : sc::state<Session, PrimaryActive, ReservingReplicas>,
   ScrubTimePoint m_session_started_at{ScrubClock::now()};
 };
 
-struct ReservingReplicas : sc::state<ReservingReplicas, Session>,
-                          NamedSimply {
+struct ReservingReplicas : sc::state<ReservingReplicas, Session>, NamedSimply {
   explicit ReservingReplicas(my_context ctx);
   ~ReservingReplicas();
-  using reactions = mpl::list<sc::custom_reaction<ReplicaGrant>,
-                             sc::custom_reaction<ReplicaReject>,
-                             sc::transition<RemotesReserved, ActiveScrubbing>,
-                             sc::custom_reaction<ReservationTimeout>>;
+  using reactions = mpl::list<
+      sc::custom_reaction<ReplicaGrant>,
+      sc::custom_reaction<ReplicaReject>,
+      sc::transition<RemotesReserved, ActiveScrubbing>>;
 
   ScrubTimePoint entered_at = ScrubClock::now();
-  ScrubMachine::timer_event_token_t m_timeout_token;
 
   /// a "raw" event carrying a peer's grant response
   sc::result react(const ReplicaGrant&);
 
   /// a "raw" event carrying a peer's denial response
   sc::result react(const ReplicaReject&);
-
-  sc::result react(const ReservationTimeout&);
 };