]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: do not limit operator-initiated repairs 64915/head
authorRonen Friedman <rfriedma@redhat.com>
Fri, 8 Aug 2025 13:03:16 +0000 (08:03 -0500)
committerRonen Friedman <rfriedma@redhat.com>
Fri, 8 Aug 2025 13:03:16 +0000 (08:03 -0500)
'auto-repair' scrubs are limited to a maximum of
'scrub_auto_repair_num_errors' damaged objects.
However, operator-initiated repairs should not be limited
by that number. Alas, a bug in a previous commit
(97de817) modified the code in such a way that it applied the
'scrub_auto_repair_num_errors' limit to all repairs,
including operator-initiated ones. This commit fixes that.

Fixes: https://tracker.ceph.com/issues/72438
Note: the fix is similar to 'Tentacle' & 'main' fixes
(PR#64860 & PR#64849), but - as the surrounding code
was changed, this is not a backport.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/scrubber/pg_scrubber.cc

index cb259f8d7bd13326f5cdf5b6118bf94efbd84f30..bea4634fc73c4ba0798f34936fb61bc72c643f93 100644 (file)
@@ -1791,11 +1791,16 @@ void PgScrubber::scrub_finish()
 
   // if the repair request comes from auto-repair and there is a large
   // number of objects known to be damaged, we cancel the auto-repair
-  if (m_is_repair && m_flags.auto_repair &&
+  if (m_is_repair && m_flags.auto_repair && !m_flags.required &&
       m_be->authoritative_peers_count() >
        static_cast<int>(m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)) {
 
-    dout(10) << __func__ << " undoing the repair" << dendl;
+    dout(5) << fmt::format(
+               "{}: undoing the repair. Damaged objects count ({}) is "
+               "above configured limit ({})",
+               __func__, m_be->authoritative_peers_count(),
+               m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)
+      << dendl;
     state_clear(PG_STATE_REPAIR);  // not expected to be set, anyway
     m_is_repair = false;
     update_op_mode_text();