]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: added the scrub-abort command
authorRonen Friedman <rfriedma@redhat.com>
Sun, 7 Dec 2025 14:34:05 +0000 (08:34 -0600)
committerRonen Friedman <rfriedma@redhat.com>
Tue, 20 Jan 2026 16:40:03 +0000 (16:40 +0000)
and its handling in the PgScrubber.

Fixes: https://tracker.ceph.com/issues/74133
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/OSD.cc
src/osd/PrimaryLogPG.cc
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/pg_scrubber.h
src/osd/scrubber_common.h

index 5e8ef8927cd12d61c25c4e853f6559217dd9d248..4de408d146f0438bdcd84b16354a341ed64533e6 100644 (file)
@@ -2781,6 +2781,7 @@ void OSD::asok_command(
       prefix == "list_unfound" ||
       prefix == "scrub" ||
       prefix == "deep-scrub" ||
+      prefix == "scrub-abort" ||
       prefix == "schedule-scrub" ||      ///< dev/tests only!
       prefix == "schedule-deep-scrub"    ///< dev/tests only!
     ) {
@@ -4545,6 +4546,12 @@ void OSD::final_init()
     asok_hook,
     "Trigger a deep scrub");
   ceph_assert(r == 0);
+  r = admin_socket->register_command(
+    "scrub-abort "
+    "name=pgid,type=CephPgid,req=false",
+    asok_hook,
+    "Abort an ongoing scrub. Cancel any operator-initiated scrub");
+  ceph_assert(r == 0);
   // debug/test commands (faking the timestamps)
   r = admin_socket->register_command(
     "schedule-scrub "
index b0f12f6925c5e930e618050ec6ea8b71d23de9a3..883a32a77e6e433c1140b95ff9381d48c9095abd 100644 (file)
@@ -1202,6 +1202,16 @@ void PrimaryLogPG::do_command(
     outbl.append(ss.str());
   }
 
+  else if (prefix == "scrub-abort") {
+    if (is_primary()) {
+      m_scrubber->on_operator_abort_scrub(f.get());
+    } else {
+      ss << "Not primary";
+      ret = -EPERM;
+      outbl.append(ss.str());
+    }
+  }
+
   // the test/debug commands that schedule a scrub by modifying timestamps
   else if (prefix == "schedule-scrub" || prefix == "schedule-deep-scrub") {
     if (is_primary()) {
index 1bd1ffc309feb1fef5dc91f65e510871859e3cc6..0d58e1546f55337a3f0da9a2a6c59e779c34ba1b 100644 (file)
@@ -771,6 +771,85 @@ void PgScrubber::on_operator_forced_scrub(
 }
 
 
+/**
+ * Operation:
+ * - if the PG is being scrubbed - just send the operator-abort event to
+ *   the FSM. That would stop the ongoing scrub session, and remove the
+ *   (possible) operator-requested priority from both PG targets (shallow
+ *   and deep).
+ * - otherwise - manually manipulate the two urgencies.
+ */
+void PgScrubber::on_operator_abort_scrub(ceph::Formatter* f)
+{
+  Formatter::ObjectSection asok_resp_section{*f, "result"sv};
+  if (!is_primary() || !m_scrub_job) {
+    dout(10) << fmt::format(
+                   "{}: pg[{}]: not Primary or no scrub-job", __func__,
+                   m_pg_id.pgid)
+            << dendl;
+    f->dump_bool("applicable", false);
+    f->dump_bool("active", false);
+    return;
+  }
+
+  dout(5) << fmt::format(
+                 "{}: pg[{}]: job on entry: {}", __func__, m_pg_id.pgid,
+                 *m_scrub_job)
+          << dendl;
+  ceph_assert(m_pg->is_locked());
+  if (is_scrub_active()) {
+    m_fsm->process_event(OperatorAbort{});
+    f->dump_bool("applicable", true);
+    f->dump_bool("active", true);
+
+  } else if (!m_scrub_job->is_registered()) {
+    const auto err_text = fmt::format(
+        "{}: pg[{}] is not registered for scrubbing", __func__, m_pg_id.pgid);
+    dout(5) << err_text << dendl;
+    f->dump_bool("applicable", false);
+    f->dump_bool("active", false);
+    f->dump_string("error", err_text);
+
+  } else {
+    // not scrubbing now. Remove any operator-requested priority from
+    // both targets.
+
+    if (m_scrub_job->is_queued()) {
+      // one or both of the targets are in the queue. Remove them.
+      m_osds->get_scrub_services().remove_from_osd_queue(m_pg_id);
+      m_scrub_job->clear_both_targets_queued();
+      dout(20) << fmt::format(
+                      "{}: pg[{}] dequeuing for an update", __func__,
+                      m_pg_id.pgid)
+               << dendl;
+    }
+
+    // if any of the targets was set to operator-initiated urgency -
+    // remove that designation, and reschedule both.
+    const auto scrub_time_now = ceph_clock_now();
+    const bool adj_shallow = downgrade_on_operator_abort(
+        m_scrub_job->get_target(scrub_level_t::shallow), scrub_time_now);
+    // note: must not short-circuit!
+    const bool adj_deep = downgrade_on_operator_abort(
+        m_scrub_job->get_target(scrub_level_t::deep), scrub_time_now);
+    if (adj_shallow || adj_deep) {
+      update_targets(scrub_time_now);
+      dout(10) << fmt::format("{}: adjusted job: {}", __func__, *m_scrub_job)
+               << dendl;
+    }
+    m_osds->get_scrub_services().enqueue_scrub_job(*m_scrub_job);
+    m_scrub_job->set_both_targets_queued();
+    f->dump_bool("applicable", true);
+    f->dump_bool("active", false);
+  }
+  dout(5) << fmt::format(
+                 "{}: pg[{}] job at exit: {}", __func__, m_pg_id.pgid,
+                 *m_scrub_job)
+          << dendl;
+  m_pg->publish_stats_to_osd();
+}
+
+
 // ----------------------------------------------------------------------------
 
 bool PgScrubber::has_pg_marked_new_updates() const
index 1f5ec95e841e5ada1f98e6ff5d8dfc323423d122..abf0a726ee4f8c37ea126dc2964c01e30f339cfb 100644 (file)
@@ -340,6 +340,9 @@ class PgScrubber : public ScrubPgIF,
       scrub_level_t scrub_level,
       scrub_type_t scrub_type) final;
 
+  void on_operator_abort_scrub(
+      ceph::Formatter* f) final;
+
   /**
    * let the scrubber know that a recovery operation has completed.
    * This might trigger an 'after repair' scrub.
index b57eec5d81c58281d8805b163002d99826890f2c..844c6943588e23ae736e809a49208f07d6e415dd 100644 (file)
@@ -482,6 +482,10 @@ struct ScrubPgIF {
     ceph::Formatter* f,
     scrub_level_t scrub_level) = 0;
 
+  /// abort an ongoing scrub, and cancel any pending operator scrub request
+  virtual void on_operator_abort_scrub(
+    ceph::Formatter* f) = 0;
+
   virtual void dump_scrubber(ceph::Formatter* f) const = 0;
 
   /**