From 754157c4289395677a3edf5e6eb81e14a7c0cf1e Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Wed, 1 Feb 2023 09:22:00 +0200 Subject: [PATCH] osd/scrub: split on_pg_activate from on_new_interval Separate and clarify handling of interval termination, pg activation, and configuration change. A primary PG now registers with its OSD for scrubbing only on activation: on_pg_activate() called from PG::on_activate(). When the interval ends, the scrubber is notified via on_interval_change, which is responsible for cleaning up any active or replica state associated with scrub. Configuration changes are still handled by update_scrub_job(). Signed-off-by: Ronen Friedman Signed-off-by: Samuel Just --- src/crimson/osd/pg.h | 4 -- src/osd/PG.cc | 24 ++++------- src/osd/PG.h | 2 - src/osd/PeeringState.cc | 2 - src/osd/PeeringState.h | 3 -- src/osd/scrubber/pg_scrubber.cc | 71 ++++++++++++++++++--------------- src/osd/scrubber/pg_scrubber.h | 6 +-- src/osd/scrubber_common.h | 13 +++--- 8 files changed, 55 insertions(+), 70 deletions(-) diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h index 57397991537..c6a806a53a3 100644 --- a/src/crimson/osd/pg.h +++ b/src/crimson/osd/pg.h @@ -163,10 +163,6 @@ public: // Not needed yet -- mainly for scrub scheduling } - /// Notify PG that Primary/Replica status has changed (to update scrub registration) - void on_primary_status_change(bool was_primary, bool now_primary) final { - } - /// Need to reschedule next scrub. Assuming no change in role void reschedule_scrub() final { } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 211d8a3d46f..3e8bdc7f215 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1687,7 +1687,11 @@ std::optional PG::validate_scrub_mode() const void PG::on_info_history_change() { ceph_assert(m_scrubber); - m_scrubber->on_primary_change(__func__, m_planned_scrub); + dout(20) << fmt::format( + "{} for a {}", __func__, + (is_primary() ? "Primary" : "non-primary")) + << dendl; + reschedule_scrub(); } void PG::reschedule_scrub() @@ -1704,15 +1708,6 @@ void PG::reschedule_scrub() } } -void PG::on_primary_status_change(bool was_primary, bool now_primary) -{ - // make sure we have a working scrubber when becoming a primary - if (was_primary != now_primary) { - ceph_assert(m_scrubber); - m_scrubber->on_primary_change(__func__, m_planned_scrub); - } -} - void PG::scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type) { ceph_assert(m_scrubber); @@ -1740,13 +1735,7 @@ void PG::on_new_interval() { projected_last_update = eversion_t(); cancel_recovery(); - - ceph_assert(m_scrubber); - // log some scrub data before we react to the interval - dout(20) << __func__ << (is_scrub_queued_or_active() ? " scrubbing " : " ") - << "flags: " << m_planned_scrub << dendl; - - m_scrubber->on_primary_change(__func__, m_planned_scrub); + m_scrubber->on_new_interval(); } epoch_t PG::cluster_osdmap_trim_lower_bound() { @@ -1833,6 +1822,7 @@ void PG::on_activate(interval_set snaps) snap_trimq = snaps; release_pg_backoffs(); projected_last_update = info.last_update; + m_scrubber->on_pg_activate(m_planned_scrub); } void PG::on_active_exit() diff --git a/src/osd/PG.h b/src/osd/PG.h index 42b485e24bb..187760ba539 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -540,8 +540,6 @@ public: void on_info_history_change() override; - void on_primary_status_change(bool was_primary, bool now_primary) override; - void reschedule_scrub() override; void scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type) override; diff --git a/src/osd/PeeringState.cc b/src/osd/PeeringState.cc index f3485ca6c6e..a9583af5abc 100644 --- a/src/osd/PeeringState.cc +++ b/src/osd/PeeringState.cc @@ -711,8 +711,6 @@ void PeeringState::start_peering_interval( // did primary change? if (was_old_primary != is_primary()) { state_clear(PG_STATE_CLEAN); - // queue/dequeue the scrubber - pl->on_primary_status_change(was_old_primary, is_primary()); } pl->on_role_change(); diff --git a/src/osd/PeeringState.h b/src/osd/PeeringState.h index a3aa04503f5..a70962072c5 100644 --- a/src/osd/PeeringState.h +++ b/src/osd/PeeringState.h @@ -282,9 +282,6 @@ public: /// Notify that info/history changed (generally to update scrub registration) virtual void on_info_history_change() = 0; - /// Notify PG that Primary/Replica status has changed (to update scrub registration) - virtual void on_primary_status_change(bool was_primary, bool now_primary) = 0; - /// Need to reschedule next scrub. Assuming no change in role virtual void reschedule_scrub() = 0; diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 1ff95d19cfe..19288758cd7 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -467,6 +467,27 @@ unsigned int PgScrubber::scrub_requeue_priority( // ///////////////////////////////////////////////////////////////////// // // scrub-op registration handling +/* on_new_interval + * + * Responsible for restting any scrub state and releasing any resources. + * Any inflight events will be ignored via check_interval/should_drop_message + * or canceled. + */ +void PgScrubber::on_new_interval() +{ + dout(10) << fmt::format( + "{}: current role:{} active?{} q/a:{}", __func__, + (is_primary() ? "Primary" : "Replica/other"), + is_scrub_active(), is_queued_or_active()) + << dendl; + + m_fsm->process_event(FullReset{}); + // we may be the primary + if (is_queued_or_active()) { + clear_pgscrub_state(); + } + rm_from_osd_scrubbing(); +} bool PgScrubber::is_scrub_registered() const { @@ -491,49 +512,33 @@ void PgScrubber::rm_from_osd_scrubbing() } } -void PgScrubber::on_primary_change( - std::string_view caller, - const requested_scrub_t& request_flags) +void PgScrubber::on_pg_activate(const requested_scrub_t& request_flags) { + ceph_assert(is_primary()); if (!m_scrub_job) { // we won't have a chance to see more logs from this function, thus: - dout(10) << fmt::format( - "{}: (from {}& w/{}) {}.Reg-state:{:.7}. No scrub-job", - __func__, caller, request_flags, - (is_primary() ? "Primary" : "Replica/other"), - registration_state()) - << dendl; + dout(2) << fmt::format( + "{}: flags:<{}> {}.Reg-state:{:.7}. No scrub-job", __func__, + request_flags, (is_primary() ? "Primary" : "Replica/other"), + registration_state()) + << dendl; return; } + ceph_assert(!is_queued_or_active()); auto pre_state = m_scrub_job->state_desc(); auto pre_reg = registration_state(); - if (is_primary()) { - auto suggested = m_osds->get_scrub_services().determine_scrub_time( - request_flags, m_pg->info, m_pg->get_pgpool().info.opts); - m_osds->get_scrub_services().register_with_osd(m_scrub_job, suggested); - } else { - m_osds->get_scrub_services().remove_from_osd_queue(m_scrub_job); - } - // is there an interval change we should respond to? - if (is_primary() && is_scrub_active()) { - if (m_interval_start < m_pg->get_same_interval_since()) { - dout(10) << fmt::format( - "{}: interval changed ({} -> {}). Aborting active scrub.", - __func__, m_interval_start, m_pg->get_same_interval_since()) - << dendl; - scrub_clear_state(); - } - } + auto suggested = m_osds->get_scrub_services().determine_scrub_time( + request_flags, m_pg->info, m_pg->get_pgpool().info.opts); + m_osds->get_scrub_services().register_with_osd(m_scrub_job, suggested); - dout(10) - << fmt::format( - "{} (from {} {}): {}. <{:.5}>&<{:.10}> --> <{:.5}>&<{:.14}>", - __func__, caller, request_flags, - (is_primary() ? "Primary" : "Replica/other"), pre_reg, pre_state, - registration_state(), m_scrub_job->state_desc()) - << dendl; + dout(10) << fmt::format( + "{}: {} <{:.5}>&<{:.10}> --> <{:.5}>&<{:.14}>", + __func__, request_flags, + (is_primary() ? "Primary" : "Replica/other"), pre_reg, + pre_state, registration_state(), m_scrub_job->state_desc()) + << dendl; } /* diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index c09cdc1d823..f67987a4b44 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -387,9 +387,7 @@ class PgScrubber : public ScrubPgIF, void rm_from_osd_scrubbing() final; - void on_primary_change( - std::string_view caller, - const requested_scrub_t& request_flags) final; + void on_pg_activate(const requested_scrub_t& request_flags) final; void scrub_requested( scrub_level_t scrub_level, @@ -439,6 +437,8 @@ class PgScrubber : public ScrubPgIF, /// handle a message carrying a replica map void map_from_replica(OpRequestRef op) final; + void on_new_interval() final; + void scrub_clear_state() final; bool is_queued_or_active() const final; diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index a534fd131bf..e1bd8c8eb0f 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -280,6 +280,10 @@ struct ScrubPgIF { virtual void set_op_parameters(const requested_scrub_t&) = 0; + /// stop any active scrubbing (on interval end) and unregister from + /// the OSD scrub queue + virtual void on_new_interval() = 0; + virtual void scrub_clear_state() = 0; virtual void handle_query_state(ceph::Formatter* f) = 0; @@ -380,13 +384,10 @@ struct ScrubPgIF { virtual bool reserve_local() = 0; /** - * Register/de-register with the OSD scrub queue - * - * Following our status as Primary or replica. + * if activated as a Primary - register the scrub job with the OSD + * scrub queue */ - virtual void on_primary_change( - std::string_view caller, - const requested_scrub_t& request_flags) = 0; + virtual void on_pg_activate(const requested_scrub_t& request_flags) = 0; /** * Recalculate the required scrub time. -- 2.39.5