From 569c07f979d9f0846f803565dfc35ed0f9acaba8 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Sat, 23 Dec 2023 10:18:41 -0600 Subject: [PATCH] osd/scrub: manage queue registration lifetime in the FSM As the state of 'being registered in the OSDs scrub queue' corresponds to the PrimaryActive FSM state. Signed-off-by: Ronen Friedman --- src/osd/PG.cc | 1 - src/osd/PrimaryLogPG.cc | 3 +-- src/osd/scrubber/pg_scrubber.cc | 30 +++++++++----------------- src/osd/scrubber/pg_scrubber.h | 2 +- src/osd/scrubber/scrub_machine.cc | 2 ++ src/osd/scrubber/scrub_machine_lstnr.h | 9 ++++++++ src/osd/scrubber_common.h | 8 ------- 7 files changed, 23 insertions(+), 32 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 3138b8c32f9..eb7a332f6c2 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1847,7 +1847,6 @@ void PG::on_activate(interval_set snaps) snap_trimq = snaps; release_pg_backoffs(); projected_last_update = info.last_update; - m_scrubber->on_pg_activate(m_planned_scrub); } void PG::on_replica_activate() diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 8120d76e7ea..d57ab432d4a 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -12879,8 +12879,7 @@ void PrimaryLogPG::on_shutdown() osd->clear_queued_recovery(this); } - m_scrubber->scrub_clear_state(); - m_scrubber->rm_from_osd_scrubbing(); + m_scrubber->on_new_interval(); vector tids; cancel_copy_ops(false, &tids); diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index dd279f720ad..ab9b86e5b35 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -460,14 +460,12 @@ void PgScrubber::on_new_interval() (is_primary() ? "Primary" : "Replica/other"), is_scrub_active(), is_queued_or_active()) << dendl; - m_fsm->process_event(IntervalChanged{}); // the following asserts were added due to a past bug, where PG flags were // left set in some scenarios. ceph_assert(!is_queued_or_active()); ceph_assert(!state_test(PG_STATE_SCRUBBING)); ceph_assert(!state_test(PG_STATE_DEEP_SCRUB)); - rm_from_osd_scrubbing(); } bool PgScrubber::is_scrub_registered() const @@ -493,35 +491,31 @@ void PgScrubber::rm_from_osd_scrubbing() } } -void PgScrubber::on_pg_activate(const requested_scrub_t& request_flags) +/* + * Note: referring to m_planned_scrub here is temporary, as this set of + * scheduling flags will be removed in a followup PR. + */ +void PgScrubber::schedule_scrub_with_osd() { ceph_assert(is_primary()); - if (!m_scrub_job) { - // we won't have a chance to see more logs from this function, thus: - dout(2) << fmt::format( - "{}: flags:<{}> {}.Reg-state:{:.7}. No scrub-job", __func__, - request_flags, (is_primary() ? "Primary" : "Replica/other"), - registration_state()) - << dendl; - return; - } + ceph_assert(m_scrub_job); - ceph_assert(!is_queued_or_active()); auto pre_state = m_scrub_job->state_desc(); auto pre_reg = registration_state(); auto suggested = m_osds->get_scrub_services().determine_scrub_time( - request_flags, m_pg->info, m_pg->get_pgpool().info.opts); + m_planned_scrub, m_pg->info, m_pg->get_pgpool().info.opts); m_osds->get_scrub_services().register_with_osd(m_scrub_job, suggested); dout(10) << fmt::format( "{}: {} <{:.5}>&<{:.10}> --> <{:.5}>&<{:.14}>", - __func__, request_flags, + __func__, m_planned_scrub, (is_primary() ? "Primary" : "Replica/other"), pre_reg, pre_state, registration_state(), m_scrub_job->state_desc()) << dendl; } + void PgScrubber::on_primary_active_clean() { dout(10) << fmt::format( @@ -2177,11 +2171,7 @@ void PgScrubber::handle_query_state(ceph::Formatter* f) PgScrubber::~PgScrubber() { m_fsm->process_event(IntervalChanged{}); - if (m_scrub_job) { - // make sure the OSD won't try to scrub this one just now - rm_from_osd_scrubbing(); - m_scrub_job.reset(); - } + m_scrub_job.reset(); } PgScrubber::PgScrubber(PG* pg) diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index af667f73214..8360b4c038f 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -252,7 +252,7 @@ class PgScrubber : public ScrubPgIF, void rm_from_osd_scrubbing() final; - void on_pg_activate(const requested_scrub_t& request_flags) final; + void schedule_scrub_with_osd() final; scrub_level_t scrub_requested( scrub_level_t scrub_level, diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc index 1928eed7d0c..26054bf3f76 100644 --- a/src/osd/scrubber/scrub_machine.cc +++ b/src/osd/scrubber/scrub_machine.cc @@ -123,6 +123,8 @@ PrimaryActive::PrimaryActive(my_context ctx) { DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases dout(10) << "-- state -->> PrimaryActive" << dendl; + // insert this PG into the OSD scrub queue. Calculate initial schedule + scrbr->schedule_scrub_with_osd(); } PrimaryActive::~PrimaryActive() diff --git a/src/osd/scrubber/scrub_machine_lstnr.h b/src/osd/scrubber/scrub_machine_lstnr.h index 8b110fe6271..086802ee813 100644 --- a/src/osd/scrubber/scrub_machine_lstnr.h +++ b/src/osd/scrubber/scrub_machine_lstnr.h @@ -107,6 +107,15 @@ struct ScrubMachineListener { [[nodiscard]] virtual bool is_primary() const = 0; + /// dequeue this PG from the OSD's scrub-queue + virtual void rm_from_osd_scrubbing() = 0; + + /** + * the FSM has entered the PrimaryActive state. That happens when + * peered as a Primary, and achieving the 'active' state. + */ + virtual void schedule_scrub_with_osd() = 0; + virtual void select_range_n_notify() = 0; /// walk the log to find the latest update that affects our chunk diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index fbbef578ae6..b41a9b4bba8 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -409,12 +409,6 @@ struct ScrubPgIF { */ virtual bool reserve_local() = 0; - /** - * if activated as a Primary - register the scrub job with the OSD - * scrub queue - */ - virtual void on_pg_activate(const requested_scrub_t& request_flags) = 0; - /** * Recalculate the required scrub time. * @@ -431,8 +425,6 @@ struct ScrubPgIF { */ virtual void handle_scrub_reserve_msgs(OpRequestRef op) = 0; - virtual void rm_from_osd_scrubbing() = 0; - virtual scrub_level_t scrub_requested( scrub_level_t scrub_level, scrub_type_t scrub_type, -- 2.39.5