From fe6dc0994c17ee2771c7246b1a1734ae91f7c5b5 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Sun, 15 Jun 2025 14:40:59 -0500 Subject: [PATCH] osd/scrub: allow longer waits for replicas to respond Increase the two timeouts associated with replica responses to scrub requests. This solves the issue of, when a cluster event causes some form of repeering (e.g. an OSD in the active set is down), having a request time out before the new interval is established. This scenario does not lead to any real data loss or crashes, but it does result in log warnings (and failed tests). Fixes: https://tracker.ceph.com/issues/68698 Signed-off-by: Ronen Friedman --- src/common/options/osd.yaml.in | 4 ++-- src/osd/scrubber/scrub_machine.cc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in index b3e2a96396dea..ccb64d8539010 100644 --- a/src/common/options/osd.yaml.in +++ b/src/common/options/osd.yaml.in @@ -600,7 +600,7 @@ options: desc: Duration before issuing a cluster-log warning long_desc: Waiting too long for a replica to respond (after at least half of the replicas have responded). - default: 2200 + default: 22000 min: 500 see_also: - osd_scrub_reservation_timeout @@ -613,7 +613,7 @@ options: desc: Duration before aborting the scrub session long_desc: Waiting too long for some replicas to respond to scrub reservation requests. - default: 5000 + default: 50000 min: 2000 see_also: - osd_scrub_slow_reservation_response diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc index 33c1d97a84048..66f7a7b02461f 100644 --- a/src/osd/scrubber/scrub_machine.cc +++ b/src/osd/scrubber/scrub_machine.cc @@ -151,7 +151,7 @@ sc::result ReservingReplicas::react(const ReservationTimeout&) scrbr->get_clog()->warn() << "osd." << scrbr->get_whoami() << " PgScrubber: " << scrbr->get_spgid() - << " timeout on reserving replicsa (since " << entered_at + << " timeout on reserving replicas (since " << entered_at << ")"; scrbr->on_replica_reservation_timeout(); -- 2.39.5