From: Ronen Friedman Date: Sun, 7 Dec 2025 14:34:05 +0000 (-0600) Subject: osd/scrub: added the scrub-abort command X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0884eec3cae7ac7d81e0dce6b12526262c12047e;p=ceph.git osd/scrub: added the scrub-abort command and its handling in the PgScrubber. Fixes: https://tracker.ceph.com/issues/74133 Signed-off-by: Ronen Friedman --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 5e8ef8927cd..4de408d146f 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -2781,6 +2781,7 @@ void OSD::asok_command( prefix == "list_unfound" || prefix == "scrub" || prefix == "deep-scrub" || + prefix == "scrub-abort" || prefix == "schedule-scrub" || ///< dev/tests only! prefix == "schedule-deep-scrub" ///< dev/tests only! ) { @@ -4545,6 +4546,12 @@ void OSD::final_init() asok_hook, "Trigger a deep scrub"); ceph_assert(r == 0); + r = admin_socket->register_command( + "scrub-abort " + "name=pgid,type=CephPgid,req=false", + asok_hook, + "Abort an ongoing scrub. Cancel any operator-initiated scrub"); + ceph_assert(r == 0); // debug/test commands (faking the timestamps) r = admin_socket->register_command( "schedule-scrub " diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index b0f12f6925c..883a32a77e6 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -1202,6 +1202,16 @@ void PrimaryLogPG::do_command( outbl.append(ss.str()); } + else if (prefix == "scrub-abort") { + if (is_primary()) { + m_scrubber->on_operator_abort_scrub(f.get()); + } else { + ss << "Not primary"; + ret = -EPERM; + outbl.append(ss.str()); + } + } + // the test/debug commands that schedule a scrub by modifying timestamps else if (prefix == "schedule-scrub" || prefix == "schedule-deep-scrub") { if (is_primary()) { diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 1bd1ffc309f..0d58e1546f5 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -771,6 +771,85 @@ void PgScrubber::on_operator_forced_scrub( } +/** + * Operation: + * - if the PG is being scrubbed - just send the operator-abort event to + * the FSM. That would stop the ongoing scrub session, and remove the + * (possible) operator-requested priority from both PG targets (shallow + * and deep). + * - otherwise - manually manipulate the two urgencies. + */ +void PgScrubber::on_operator_abort_scrub(ceph::Formatter* f) +{ + Formatter::ObjectSection asok_resp_section{*f, "result"sv}; + if (!is_primary() || !m_scrub_job) { + dout(10) << fmt::format( + "{}: pg[{}]: not Primary or no scrub-job", __func__, + m_pg_id.pgid) + << dendl; + f->dump_bool("applicable", false); + f->dump_bool("active", false); + return; + } + + dout(5) << fmt::format( + "{}: pg[{}]: job on entry: {}", __func__, m_pg_id.pgid, + *m_scrub_job) + << dendl; + ceph_assert(m_pg->is_locked()); + if (is_scrub_active()) { + m_fsm->process_event(OperatorAbort{}); + f->dump_bool("applicable", true); + f->dump_bool("active", true); + + } else if (!m_scrub_job->is_registered()) { + const auto err_text = fmt::format( + "{}: pg[{}] is not registered for scrubbing", __func__, m_pg_id.pgid); + dout(5) << err_text << dendl; + f->dump_bool("applicable", false); + f->dump_bool("active", false); + f->dump_string("error", err_text); + + } else { + // not scrubbing now. Remove any operator-requested priority from + // both targets. + + if (m_scrub_job->is_queued()) { + // one or both of the targets are in the queue. Remove them. + m_osds->get_scrub_services().remove_from_osd_queue(m_pg_id); + m_scrub_job->clear_both_targets_queued(); + dout(20) << fmt::format( + "{}: pg[{}] dequeuing for an update", __func__, + m_pg_id.pgid) + << dendl; + } + + // if any of the targets was set to operator-initiated urgency - + // remove that designation, and reschedule both. + const auto scrub_time_now = ceph_clock_now(); + const bool adj_shallow = downgrade_on_operator_abort( + m_scrub_job->get_target(scrub_level_t::shallow), scrub_time_now); + // note: must not short-circuit! + const bool adj_deep = downgrade_on_operator_abort( + m_scrub_job->get_target(scrub_level_t::deep), scrub_time_now); + if (adj_shallow || adj_deep) { + update_targets(scrub_time_now); + dout(10) << fmt::format("{}: adjusted job: {}", __func__, *m_scrub_job) + << dendl; + } + m_osds->get_scrub_services().enqueue_scrub_job(*m_scrub_job); + m_scrub_job->set_both_targets_queued(); + f->dump_bool("applicable", true); + f->dump_bool("active", false); + } + dout(5) << fmt::format( + "{}: pg[{}] job at exit: {}", __func__, m_pg_id.pgid, + *m_scrub_job) + << dendl; + m_pg->publish_stats_to_osd(); +} + + // ---------------------------------------------------------------------------- bool PgScrubber::has_pg_marked_new_updates() const diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index 1f5ec95e841..abf0a726ee4 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -340,6 +340,9 @@ class PgScrubber : public ScrubPgIF, scrub_level_t scrub_level, scrub_type_t scrub_type) final; + void on_operator_abort_scrub( + ceph::Formatter* f) final; + /** * let the scrubber know that a recovery operation has completed. * This might trigger an 'after repair' scrub. diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index b57eec5d81c..844c6943588 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -482,6 +482,10 @@ struct ScrubPgIF { ceph::Formatter* f, scrub_level_t scrub_level) = 0; + /// abort an ongoing scrub, and cancel any pending operator scrub request + virtual void on_operator_abort_scrub( + ceph::Formatter* f) = 0; + virtual void dump_scrubber(ceph::Formatter* f) const = 0; /**