]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: allow longer waits for replicas to respond 63940/head
authorRonen Friedman <rfriedma@redhat.com>
Sun, 15 Jun 2025 19:40:59 +0000 (14:40 -0500)
committerRonen Friedman <rfriedma@redhat.com>
Sun, 15 Jun 2025 19:49:30 +0000 (14:49 -0500)
Increase the two timeouts associated with replica
responses to scrub requests.

This solves the issue of, when a cluster event causes
some form of repeering (e.g. an OSD in the active set
is down), having a request time out before the new
interval is established. This scenario does not
lead to any real data loss or crashes, but it does
result in log warnings (and failed tests).

Fixes: https://tracker.ceph.com/issues/68698
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/common/options/osd.yaml.in
src/osd/scrubber/scrub_machine.cc

index b3e2a96396dea26a66fcad96637d3dc0b8e7805c..ccb64d853901051387527a3732cc3be4a312bdfa 100644 (file)
@@ -600,7 +600,7 @@ options:
   desc: Duration before issuing a cluster-log warning
   long_desc: Waiting too long for a replica to respond (after at least half of the
     replicas have responded).
-  default: 2200
+  default: 22000
   min: 500
   see_also:
   - osd_scrub_reservation_timeout
@@ -613,7 +613,7 @@ options:
   desc: Duration before aborting the scrub session
   long_desc: Waiting too long for some replicas to respond to
     scrub reservation requests.
-  default: 5000
+  default: 50000
   min: 2000
   see_also:
   - osd_scrub_slow_reservation_response
index 33c1d97a840480efbeb0405271ca02cc8efff53b..66f7a7b02461f7c4ffe680c1eaa6b99954df1584 100644 (file)
@@ -151,7 +151,7 @@ sc::result ReservingReplicas::react(const ReservationTimeout&)
   scrbr->get_clog()->warn()
     << "osd." << scrbr->get_whoami()
     << " PgScrubber: " << scrbr->get_spgid()
-    << " timeout on reserving replicsa (since " << entered_at
+    << " timeout on reserving replicas (since " << entered_at
     << ")";
 
   scrbr->on_replica_reservation_timeout();