From 290e744a9b6c64f3da805056625b963f0eedaf33 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Tue, 17 May 2022 16:13:59 +0000 Subject: [PATCH] osd/scrub: restart snap trimming after a failed scrub A followup to PR#45640. In PR#45640 snap trimming was restarted (if blocked) after all successful scrubs, and after most scrub failures. Still, a few failure scenarios did not handle snaptrim restart correctly. The current PR cleans up and fixes the interaction between scrub initiation/termination (for whatever cause) and snap trimming. Signed-off-by: Ronen Friedman --- src/osd/PG.cc | 2 -- src/osd/scrubber/pg_scrubber.cc | 19 +++++++++---------- src/osd/scrubber/pg_scrubber.h | 1 + src/osd/scrubber_common.h | 2 ++ 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 04b4a1e12069..ee7a4b31562f 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -441,7 +441,6 @@ void PG::queue_scrub_after_repair() m_scrubber->set_op_parameters(m_planned_scrub); dout(15) << __func__ << ": queueing" << dendl; - m_scrubber->set_queued_or_active(); osd->queue_scrub_after_repair(this, Scrub::scrub_prio_t::high_priority); } @@ -1371,7 +1370,6 @@ Scrub::schedule_result_t PG::sched_scrub() m_scrubber->set_op_parameters(m_planned_scrub); dout(10) << __func__ << ": queueing" << dendl; - m_scrubber->set_queued_or_active(); osd->queue_for_scrub(this, Scrub::scrub_prio_t::low_priority); return Scrub::schedule_result_t::scrub_initiated; } diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 8379208094b6..f424d3e52d60 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -192,9 +192,7 @@ void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued) m_fsm->process_event(StartScrub{}); dout(10) << "scrubber event --<< StartScrub" << dendl; } else { - clear_queued_or_active(); - // and just in case snap trimming was blocked by the aborted scrub - m_pg->snap_trimmer_scrub_complete(); + clear_queued_or_active(); // also restarts snap trimming } } @@ -210,9 +208,7 @@ void PgScrubber::initiate_scrub_after_repair(epoch_t epoch_queued) m_fsm->process_event(AfterRepairScrub{}); dout(10) << "scrubber event --<< AfterRepairScrub" << dendl; } else { - clear_queued_or_active(); - // and just in case snap trimming was blocked by the aborted scrub - m_pg->snap_trimmer_scrub_complete(); + clear_queued_or_active(); // also restarts snap trimming } } @@ -1421,6 +1417,8 @@ void PgScrubber::set_op_parameters(requested_scrub_t& request) { dout(10) << __func__ << " input: " << request << dendl; + set_queued_or_active(); // we are fully committed now. + // write down the epoch of starting a new scrub. Will be used // to discard stale messages from previous aborted scrubs. m_epoch_start = m_pg->get_osdmap_epoch(); @@ -1720,7 +1718,11 @@ void PgScrubber::set_queued_or_active() void PgScrubber::clear_queued_or_active() { - m_queued_or_active = false; + if (m_queued_or_active) { + m_queued_or_active = false; + // and just in case snap trimming was blocked by the aborted scrub + m_pg->snap_trimmer_scrub_complete(); + } } bool PgScrubber::is_queued_or_active() const @@ -1934,9 +1936,6 @@ void PgScrubber::scrub_finish() if (m_pg->is_active() && m_pg->is_primary()) { m_pg->recovery_state.share_pg_info(); } - - // we may have blocked the snap trimmer - m_pg->snap_trimmer_scrub_complete(); } void PgScrubber::on_digest_updates() diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index 0ed13f352d74..fc6efa2dd1c5 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -524,6 +524,7 @@ class PgScrubber : public ScrubPgIF, [[nodiscard]] bool was_epoch_changed() const final; void set_queued_or_active() final; + /// Clears `m_queued_or_active` and restarts snaptrimming void clear_queued_or_active() final; void mark_local_map_ready() final; diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index 7cdf714db421..4a63cdf18161 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -236,6 +236,8 @@ struct ScrubPgIF { /** * Manipulate the 'scrubbing request has been queued, or - we are * actually scrubbing' Scrubber's flag + * + * clear_queued_or_active() will also restart any blocked snaptrimming. */ virtual void set_queued_or_active() = 0; virtual void clear_queued_or_active() = 0; -- 2.47.3