From: Ronen Friedman Date: Tue, 17 May 2022 16:13:59 +0000 (+0000) Subject: osd/scrub: restart snap trimming after a failed scrub X-Git-Tag: v17.2.1~17^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F46418%2Fhead;p=ceph.git osd/scrub: restart snap trimming after a failed scrub A followup to PR#45640. In PR#45640 snap trimming was restarted (if blocked) after all successful scrubs, and after most scrub failures. Still, a few failure scenarios did not handle snaptrim restart correctly. The current PR cleans up and fixes the interaction between scrub initiation/termination (for whatever cause) and snap trimming. Signed-off-by: Ronen Friedman (cherry picked from commit 290e744a9b6c64f3da805056625b963f0eedaf33) --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index d3345319acbc..d77cef0d7b5c 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -441,7 +441,6 @@ void PG::queue_scrub_after_repair() m_scrubber->set_op_parameters(m_planned_scrub); dout(15) << __func__ << ": queueing" << dendl; - m_scrubber->set_queued_or_active(); osd->queue_scrub_after_repair(this, Scrub::scrub_prio_t::high_priority); } @@ -1371,7 +1370,6 @@ Scrub::schedule_result_t PG::sched_scrub() m_scrubber->set_op_parameters(m_planned_scrub); dout(10) << __func__ << ": queueing" << dendl; - m_scrubber->set_queued_or_active(); osd->queue_for_scrub(this, Scrub::scrub_prio_t::low_priority); return Scrub::schedule_result_t::scrub_initiated; } diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 5b4ed18b8f49..25e4a83d9e8b 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -188,9 +188,7 @@ void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued) m_fsm->process_event(StartScrub{}); dout(10) << "scrubber event --<< StartScrub" << dendl; } else { - clear_queued_or_active(); - // and just in case snap trimming was blocked by the aborted scrub - m_pg->snap_trimmer_scrub_complete(); + clear_queued_or_active(); // also restarts snap trimming } } @@ -204,9 +202,7 @@ void PgScrubber::initiate_scrub_after_repair(epoch_t epoch_queued) m_fsm->process_event(AfterRepairScrub{}); dout(10) << "scrubber event --<< AfterRepairScrub" << dendl; } else { - clear_queued_or_active(); - // and just in case snap trimming was blocked by the aborted scrub - m_pg->snap_trimmer_scrub_complete(); + clear_queued_or_active(); // also restarts snap trimming } } @@ -1335,6 +1331,8 @@ void PgScrubber::set_op_parameters(requested_scrub_t& request) { dout(10) << __func__ << " input: " << request << dendl; + set_queued_or_active(); // we are fully committed now. + // write down the epoch of starting a new scrub. Will be used // to discard stale messages from previous aborted scrubs. m_epoch_start = m_pg->get_osdmap_epoch(); @@ -1715,7 +1713,11 @@ void PgScrubber::set_queued_or_active() void PgScrubber::clear_queued_or_active() { - m_queued_or_active = false; + if (m_queued_or_active) { + m_queued_or_active = false; + // and just in case snap trimming was blocked by the aborted scrub + m_pg->snap_trimmer_scrub_complete(); + } } bool PgScrubber::is_queued_or_active() const @@ -1921,9 +1923,6 @@ void PgScrubber::scrub_finish() if (m_pg->is_active() && m_pg->is_primary()) { m_pg->recovery_state.share_pg_info(); } - - // we may have blocked the snap trimmer - m_pg->snap_trimmer_scrub_complete(); } void PgScrubber::on_digest_updates() diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index e223ae213ab7..eeba65bab796 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -429,6 +429,7 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener { [[nodiscard]] bool was_epoch_changed() const final; void set_queued_or_active() final; + /// Clears `m_queued_or_active` and restarts snaptrimming void clear_queued_or_active() final; void mark_local_map_ready() final; diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index 2b4d370be11d..078eb60afb98 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -191,6 +191,8 @@ struct ScrubPgIF { /** * Manipulate the 'scrubbing request has been queued, or - we are * actually scrubbing' Scrubber's flag + * + * clear_queued_or_active() will also restart any blocked snaptrimming. */ virtual void set_queued_or_active() = 0; virtual void clear_queued_or_active() = 0;