]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: no auto-repair in 'after_repair' type of scrubs 63525/head
authorRonen Friedman <rfriedma@redhat.com>
Tue, 27 May 2025 15:13:09 +0000 (10:13 -0500)
committerRonen Friedman <rfriedma@redhat.com>
Tue, 27 May 2025 15:13:09 +0000 (10:13 -0500)
The deep scrubs that are initiated after a full "peering" repair, are
not supposed to auto-repair any errors - just report them.

This behavior detail was inadvertently changed recently, and is fixed
here.

Fixes: https://tracker.ceph.com/issues/71463
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/scrub_job.cc
src/osd/scrubber/scrub_job.h
src/osd/scrubber/scrub_queue_entry.h

index e314a2238cfb1a4c44ebdf9f48d758c57c2a7348..9dfc429070bbba4a3f457913fa121f0934db0045 100644 (file)
@@ -1606,7 +1606,9 @@ void PgScrubber::set_op_parameters(ScrubPGPreconds pg_cond)
     state_set(PG_STATE_DEEP_SCRUB);
   }
 
-  m_flags.auto_repair = m_is_deep && pg_cond.can_autorepair;
+  m_flags.auto_repair =
+      m_is_deep && pg_cond.can_autorepair &&
+      ScrubJob::is_autorepair_allowed(m_active_target->urgency());
 
   // m_is_repair is set for all repair cases - for operator-requested
   // repairs, for deep-scrubs initiated automatically after a shallow scrub
@@ -1845,7 +1847,10 @@ void PgScrubber::scrub_finish()
        static_cast<int>(m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)) {
     ceph_assert(!m_is_deep);
     do_auto_scrub = true;
-    dout(15) << __func__ << " Try to auto repair after scrub errors" << dendl;
+    dout(10) << fmt::format("{}: will initiate a deep scrub to fix {} errors",
+                            __func__,
+                            m_be->authoritative_peers_count())
+             << dendl;
   }
 
   m_flags.deep_scrub_on_error = false;
index a5e870e3853f217a23ba0d7176824cba3b720fb9..7de1ade57e69cce731505fbc1d8d0e683ecdcdfb 100644 (file)
@@ -411,6 +411,12 @@ bool ScrubJob::has_high_queue_priority(urgency_t urgency)
 
 bool ScrubJob::is_repair_implied(urgency_t urgency)
 {
-  return urgency == urgency_t::after_repair ||
+  return urgency == urgency_t::repairing || urgency == urgency_t::must_repair;
+}
+
+bool ScrubJob::is_autorepair_allowed(urgency_t urgency)
+{
+  // note: 'after-repair' scrubs are not allowed to auto-repair
+  return urgency == urgency_t::periodic_regular ||
         urgency == urgency_t::repairing || urgency == urgency_t::must_repair;
 }
index 062f747605c936cbd4c90b8d759bb1359b55eb42..c170c3eb881a55faae20c72290edc91ba6bee1f0 100644 (file)
@@ -371,6 +371,8 @@ class ScrubJob {
   static bool has_high_queue_priority(urgency_t urgency);
 
   static bool is_repair_implied(urgency_t urgency);
+
+  static bool is_autorepair_allowed(urgency_t urgency);
 };
 }  // namespace Scrub
 
index 88af31a0ad1a968aa38911403302787b7f205283..edaff94524a0dac0bf72f158f0b33f86c3a4e405 100644 (file)
@@ -34,8 +34,7 @@ namespace Scrub {
  *
  * 'after_repair' - triggered immediately after a recovery process
  *   ('m_after_repair_scrub_required' was set).
- *   This type of scrub is always deep.
- *   (note: this urgency level is not implemented in this commit)
+ *   This type of scrub is always deep, and never auto-repairs.
  *
  * 'repairing' - the target is currently being deep-scrubbed with the repair
  *   flag set. Triggered by a previous shallow scrub that ended with errors.