From: Ronen Friedman Date: Fri, 8 Aug 2025 13:03:16 +0000 (-0500) Subject: osd/scrub: do not limit operator-initiated repairs X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F64915%2Fhead;p=ceph.git osd/scrub: do not limit operator-initiated repairs 'auto-repair' scrubs are limited to a maximum of 'scrub_auto_repair_num_errors' damaged objects. However, operator-initiated repairs should not be limited by that number. Alas, a bug in a previous commit (97de817) modified the code in such a way that it applied the 'scrub_auto_repair_num_errors' limit to all repairs, including operator-initiated ones. This commit fixes that. Fixes: https://tracker.ceph.com/issues/72438 Note: the fix is similar to 'Tentacle' & 'main' fixes (PR#64860 & PR#64849), but - as the surrounding code was changed, this is not a backport. Signed-off-by: Ronen Friedman --- diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index cb259f8d7bd1..bea4634fc73c 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -1791,11 +1791,16 @@ void PgScrubber::scrub_finish() // if the repair request comes from auto-repair and there is a large // number of objects known to be damaged, we cancel the auto-repair - if (m_is_repair && m_flags.auto_repair && + if (m_is_repair && m_flags.auto_repair && !m_flags.required && m_be->authoritative_peers_count() > static_cast(m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)) { - dout(10) << __func__ << " undoing the repair" << dendl; + dout(5) << fmt::format( + "{}: undoing the repair. Damaged objects count ({}) is " + "above configured limit ({})", + __func__, m_be->authoritative_peers_count(), + m_pg->cct->_conf->osd_scrub_auto_repair_num_errors) + << dendl; state_clear(PG_STATE_REPAIR); // not expected to be set, anyway m_is_repair = false; update_op_mode_text();