From 996d9064c6b484f238e6676377087c438d2b9a01 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Mon, 20 Jun 2022 12:47:57 +0000 Subject: [PATCH] scrub/osd: disable blocked-scrub warnings during some tests As some Teuthology tests seem to block objects for long minutes, we must not issue the "scrub is blocked for too long" warning (that warning causes the tests to fail). A new configuration parameter now controls the grace period before the warning is issued. Some tests were modified to set this configuration parameter to a large value. Signed-off-by: Ronen Friedman --- .../crimson-rados/basic/tasks/rados_api_tests.yaml | 1 + qa/tasks/thrashosds-health.yaml | 1 + src/common/options/osd.yaml.in | 8 ++++++++ src/osd/scrubber/pg_scrubber.cc | 12 +++++++++++- 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/qa/suites/crimson-rados/basic/tasks/rados_api_tests.yaml b/qa/suites/crimson-rados/basic/tasks/rados_api_tests.yaml index 328019f9fcf..ad8c921425b 100644 --- a/qa/suites/crimson-rados/basic/tasks/rados_api_tests.yaml +++ b/qa/suites/crimson-rados/basic/tasks/rados_api_tests.yaml @@ -19,6 +19,7 @@ overrides: osd: osd class load list: "*" osd class default list: "*" + osd blocked scrub grace period: 3600 tasks: - workunit: clients: diff --git a/qa/tasks/thrashosds-health.yaml b/qa/tasks/thrashosds-health.yaml index 1b2560d4eba..2989cc30708 100644 --- a/qa/tasks/thrashosds-health.yaml +++ b/qa/tasks/thrashosds-health.yaml @@ -3,6 +3,7 @@ overrides: conf: osd: osd max markdown count: 1000 + osd blocked scrub grace period: 3600 log-ignorelist: - overall HEALTH_ - \(OSDMAP_FLAGS\) diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in index e170364a248..ece86684ef8 100644 --- a/src/common/options/osd.yaml.in +++ b/src/common/options/osd.yaml.in @@ -447,6 +447,14 @@ options: see_also: - osd_deep_scrub_large_omap_object_key_threshold with_legacy: true +# when scrubbing blocks on a locked object +- name: osd_blocked_scrub_grace_period + type: int + level: advanced + desc: Time (seconds) before issuing a cluster-log warning + long_desc: Waiting too long for an object in the scrubbed chunk to be unlocked. + default: 120 + with_legacy: true # where rados plugins are stored - name: osd_class_dir type: str diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index fd408083448..114e287aae4 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -775,7 +775,17 @@ bool PgScrubber::range_intersects_scrub(const hobject_t& start, Scrub::BlockedRangeWarning PgScrubber::acquire_blocked_alarm() { - ceph::timespan grace_period{m_debug_blockrange?4s:120s}; + int grace = get_pg_cct()->_conf->osd_blocked_scrub_grace_period; + if (grace == 0) { + // we will not be sending any alarms re the blocked object + dout(20) + << __func__ + << ": blocked-alarm disabled ('osd_blocked_scrub_grace_period' set to 0)" + << dendl; + return nullptr; + } + ceph::timespan grace_period{m_debug_blockrange ? 4s : seconds{grace}}; + dout(30) << __func__ << ": timeout:" << grace_period.count() << dendl; return std::make_unique(m_osds, grace_period, *this, -- 2.39.5