]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: split on_pg_activate from on_new_interval
authorRonen Friedman <rfriedma@redhat.com>
Wed, 1 Feb 2023 07:22:00 +0000 (09:22 +0200)
committerSamuel Just <sjust@redhat.com>
Wed, 12 Apr 2023 03:39:19 +0000 (20:39 -0700)
Separate and clarify handling of interval termination, pg activation,
and configuration change.

A primary PG now registers with its OSD for scrubbing only on
activation: on_pg_activate() called from PG::on_activate().

When the interval ends, the scrubber is notified via on_interval_change,
which is responsible for cleaning up any active or replica state
associated with scrub.

Configuration changes are still handled by update_scrub_job().

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
Signed-off-by: Samuel Just <sjust@redhat.com>
src/crimson/osd/pg.h
src/osd/PG.cc
src/osd/PG.h
src/osd/PeeringState.cc
src/osd/PeeringState.h
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/pg_scrubber.h
src/osd/scrubber_common.h

index 573979915379707a2c4c4041e863bf3931d6f3e7..c6a806a53a358b84a113e38cf03b1376401835f7 100644 (file)
@@ -163,10 +163,6 @@ public:
     // Not needed yet -- mainly for scrub scheduling
   }
 
-  /// Notify PG that Primary/Replica status has changed (to update scrub registration)
-  void on_primary_status_change(bool was_primary, bool now_primary) final {
-  }
-
   /// Need to reschedule next scrub. Assuming no change in role
   void reschedule_scrub() final {
   }
index 211d8a3d46f1d076e731935823d4bab8a471ab69..3e8bdc7f21518372ec8f9d2c8b98b2a5c7bd02c9 100644 (file)
@@ -1687,7 +1687,11 @@ std::optional<requested_scrub_t> PG::validate_scrub_mode() const
 void PG::on_info_history_change()
 {
   ceph_assert(m_scrubber);
-  m_scrubber->on_primary_change(__func__, m_planned_scrub);
+  dout(20) << fmt::format(
+                 "{} for a {}", __func__,
+                 (is_primary() ? "Primary" : "non-primary"))
+          << dendl;
+  reschedule_scrub();
 }
 
 void PG::reschedule_scrub()
@@ -1704,15 +1708,6 @@ void PG::reschedule_scrub()
   }
 }
 
-void PG::on_primary_status_change(bool was_primary, bool now_primary)
-{
-  // make sure we have a working scrubber when becoming a primary
-  if (was_primary != now_primary) {
-    ceph_assert(m_scrubber);
-    m_scrubber->on_primary_change(__func__, m_planned_scrub);
-  }
-}
-
 void PG::scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type)
 {
   ceph_assert(m_scrubber);
@@ -1740,13 +1735,7 @@ void PG::on_new_interval()
 {
   projected_last_update = eversion_t();
   cancel_recovery();
-
-  ceph_assert(m_scrubber);
-  // log some scrub data before we react to the interval
-  dout(20) << __func__ << (is_scrub_queued_or_active() ? " scrubbing " : " ")
-           << "flags: " << m_planned_scrub << dendl;
-
-  m_scrubber->on_primary_change(__func__, m_planned_scrub);
+  m_scrubber->on_new_interval();
 }
 
 epoch_t PG::cluster_osdmap_trim_lower_bound() {
@@ -1833,6 +1822,7 @@ void PG::on_activate(interval_set<snapid_t> snaps)
   snap_trimq = snaps;
   release_pg_backoffs();
   projected_last_update = info.last_update;
+  m_scrubber->on_pg_activate(m_planned_scrub);
 }
 
 void PG::on_active_exit()
index 42b485e24bb2aed5d02ab63cb5025e2d48251832..187760ba539276afea3736c8c789ef6b8beaf149 100644 (file)
@@ -540,8 +540,6 @@ public:
 
   void on_info_history_change() override;
 
-  void on_primary_status_change(bool was_primary, bool now_primary) override;
-
   void reschedule_scrub() override;
 
   void scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type) override;
index f3485ca6c6e49c22914d2c8c5c3095a6240d8020..a9583af5abc52ebf423f0032b6356cead09fcc92 100644 (file)
@@ -711,8 +711,6 @@ void PeeringState::start_peering_interval(
     // did primary change?
     if (was_old_primary != is_primary()) {
       state_clear(PG_STATE_CLEAN);
-      // queue/dequeue the scrubber
-      pl->on_primary_status_change(was_old_primary, is_primary());
     }
 
     pl->on_role_change();
index a3aa04503f52fc3fcd67c5f20326308d1b705e72..a70962072c5ca328117f25ca4d0bcd68868e2bdb 100644 (file)
@@ -282,9 +282,6 @@ public:
     /// Notify that info/history changed (generally to update scrub registration)
     virtual void on_info_history_change() = 0;
 
-    /// Notify PG that Primary/Replica status has changed (to update scrub registration)
-    virtual void on_primary_status_change(bool was_primary, bool now_primary) = 0;
-
     /// Need to reschedule next scrub. Assuming no change in role
     virtual void reschedule_scrub() = 0;
 
index 1ff95d19cfea64ad87998fa276610686c5011175..19288758cd73dd8b1499fc16c526f7c2e778f29d 100644 (file)
@@ -467,6 +467,27 @@ unsigned int PgScrubber::scrub_requeue_priority(
 // ///////////////////////////////////////////////////////////////////// //
 // scrub-op registration handling
 
+/* on_new_interval
+ *
+ * Responsible for restting any scrub state and releasing any resources.
+ * Any inflight events will be ignored via check_interval/should_drop_message
+ * or canceled.
+ */
+void PgScrubber::on_new_interval()
+{
+  dout(10) << fmt::format(
+                 "{}: current role:{} active?{} q/a:{}", __func__,
+                 (is_primary() ? "Primary" : "Replica/other"),
+                 is_scrub_active(), is_queued_or_active())
+          << dendl;
+
+  m_fsm->process_event(FullReset{});
+  // we may be the primary
+  if (is_queued_or_active()) {
+    clear_pgscrub_state();
+  }
+  rm_from_osd_scrubbing();
+}
 
 bool PgScrubber::is_scrub_registered() const
 {
@@ -491,49 +512,33 @@ void PgScrubber::rm_from_osd_scrubbing()
   }
 }
 
-void PgScrubber::on_primary_change(
-  std::string_view caller,
-  const requested_scrub_t& request_flags)
+void PgScrubber::on_pg_activate(const requested_scrub_t& request_flags)
 {
+  ceph_assert(is_primary());
   if (!m_scrub_job) {
     // we won't have a chance to see more logs from this function, thus:
-    dout(10) << fmt::format(
-                 "{}: (from {}& w/{}) {}.Reg-state:{:.7}. No scrub-job",
-                 __func__, caller, request_flags,
-                 (is_primary() ? "Primary" : "Replica/other"),
-                 registration_state())
-            << dendl;
+    dout(2) << fmt::format(
+                  "{}: flags:<{}> {}.Reg-state:{:.7}. No scrub-job", __func__,
+                  request_flags, (is_primary() ? "Primary" : "Replica/other"),
+                  registration_state())
+           << dendl;
     return;
   }
 
+  ceph_assert(!is_queued_or_active());
   auto pre_state = m_scrub_job->state_desc();
   auto pre_reg = registration_state();
-  if (is_primary()) {
-    auto suggested = m_osds->get_scrub_services().determine_scrub_time(
-      request_flags, m_pg->info, m_pg->get_pgpool().info.opts);
-    m_osds->get_scrub_services().register_with_osd(m_scrub_job, suggested);
-  } else {
-    m_osds->get_scrub_services().remove_from_osd_queue(m_scrub_job);
-  }
 
-  // is there an interval change we should respond to?
-  if (is_primary() && is_scrub_active()) {
-    if (m_interval_start < m_pg->get_same_interval_since()) {
-      dout(10) << fmt::format(
-                   "{}: interval changed ({} -> {}). Aborting active scrub.",
-                   __func__, m_interval_start, m_pg->get_same_interval_since())
-              << dendl;
-      scrub_clear_state();
-    }
-  }
+  auto suggested = m_osds->get_scrub_services().determine_scrub_time(
+      request_flags, m_pg->info, m_pg->get_pgpool().info.opts);
+  m_osds->get_scrub_services().register_with_osd(m_scrub_job, suggested);
 
-  dout(10)
-    << fmt::format(
-        "{} (from {} {}): {}. <{:.5}>&<{:.10}> --> <{:.5}>&<{:.14}>",
-        __func__, caller, request_flags,
-        (is_primary() ? "Primary" : "Replica/other"), pre_reg, pre_state,
-        registration_state(), m_scrub_job->state_desc())
-    << dendl;
+  dout(10) << fmt::format(
+                 "{}: <flags:{}> {} <{:.5}>&<{:.10}> --> <{:.5}>&<{:.14}>",
+                 __func__, request_flags,
+                 (is_primary() ? "Primary" : "Replica/other"), pre_reg,
+                 pre_state, registration_state(), m_scrub_job->state_desc())
+          << dendl;
 }
 
 /*
index c09cdc1d8239b4306dbc81706bed2acb6e172430..f67987a4b44e29f4dabaf80596dfc70077e4495e 100644 (file)
@@ -387,9 +387,7 @@ class PgScrubber : public ScrubPgIF,
 
   void rm_from_osd_scrubbing() final;
 
-  void on_primary_change(
-    std::string_view caller,
-    const requested_scrub_t& request_flags) final;
+  void on_pg_activate(const requested_scrub_t& request_flags) final;
 
   void scrub_requested(
       scrub_level_t scrub_level,
@@ -439,6 +437,8 @@ class PgScrubber : public ScrubPgIF,
   /// handle a message carrying a replica map
   void map_from_replica(OpRequestRef op) final;
 
+  void on_new_interval() final;
+
   void scrub_clear_state() final;
 
   bool is_queued_or_active() const final;
index a534fd131bf15b44a67a800e9a2eae32eed85bf8..e1bd8c8eb0f6cf34218386f834c538248b5a7a97 100644 (file)
@@ -280,6 +280,10 @@ struct ScrubPgIF {
 
   virtual void set_op_parameters(const requested_scrub_t&) = 0;
 
+  /// stop any active scrubbing (on interval end) and unregister from
+  /// the OSD scrub queue
+  virtual void on_new_interval() = 0;
+
   virtual void scrub_clear_state() = 0;
 
   virtual void handle_query_state(ceph::Formatter* f) = 0;
@@ -380,13 +384,10 @@ struct ScrubPgIF {
   virtual bool reserve_local() = 0;
 
   /**
-   * Register/de-register with the OSD scrub queue
-   *
-   * Following our status as Primary or replica.
+   * if activated as a Primary - register the scrub job with the OSD
+   * scrub queue
    */
-  virtual void on_primary_change(
-    std::string_view caller,
-    const requested_scrub_t& request_flags) = 0;
+  virtual void on_pg_activate(const requested_scrub_t& request_flags) = 0;
 
   /**
    * Recalculate the required scrub time.