osd/scrub: avoid "over clearing" queued_or_active flag

author Ronen Friedman <rfriedma@redhat.com>

Thu, 28 Dec 2023 19:41:19 +0000 (13:41 -0600)

committer Ronen Friedman <rfriedma@redhat.com>

Fri, 5 Jan 2024 15:00:10 +0000 (09:00 -0600)
author Ronen Friedman <rfriedma@redhat.com>
Thu, 28 Dec 2023 19:41:19 +0000 (13:41 -0600)
committer Ronen Friedman <rfriedma@redhat.com>
Fri, 5 Jan 2024 15:00:10 +0000 (09:00 -0600)
diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc

index ab9b86e5b35c7ac3875a68f4a8c8bbd60c35551b..f7e027a84f075a71c6c72cac2d855d904c4e50ab 100644 (file)
--- a/src/osd/scrubber/pg_scrubber.cc
+++ b/src/osd/scrubber/pg_scrubber.cc
@@ -208,17 +208,18 @@ bool PgScrubber::should_abort() const
  
  void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued)
  {
-  dout(15) << __func__ << " epoch: " << epoch_queued << dendl;
+  dout(10) << fmt::format(
+                 "{}: epoch:{} is PrimaryIdle:{}", __func__, epoch_queued,
+                 m_fsm->is_primary_idle())
+          << dendl;
+
    // we may have lost our Primary status while the message languished in the
    // queue
    if (check_interval(epoch_queued)) {
      dout(10) << "scrubber event -->> StartScrub epoch: " << epoch_queued
              << dendl;
-    reset_epoch(epoch_queued);
      m_fsm->process_event(StartScrub{});
      dout(10) << "scrubber event --<< StartScrub" << dendl;
-  } else {
-    clear_queued_or_active();  // also restarts snap trimming
    }
  }
  
@@ -229,17 +230,17 @@ void PgScrubber::advance_token()
  
  void PgScrubber::initiate_scrub_after_repair(epoch_t epoch_queued)
  {
-  dout(15) << __func__ << " epoch: " << epoch_queued << dendl;
+  dout(10) << fmt::format(
+                 "{}: epoch:{} is PrimaryIdle:{}", __func__, epoch_queued,
+                 m_fsm->is_primary_idle())
+          << dendl;
    // we may have lost our Primary status while the message languished in the
    // queue
    if (check_interval(epoch_queued)) {
      dout(10) << "scrubber event -->> AfterRepairScrub epoch: " << epoch_queued
              << dendl;
-    reset_epoch(epoch_queued);
      m_fsm->process_event(AfterRepairScrub{});
      dout(10) << "scrubber event --<< AfterRepairScrub" << dendl;
-  } else {
-    clear_queued_or_active();  // also restarts snap trimming
    }
  }
  
@@ -403,13 +404,13 @@ bool PgScrubber::is_reserving() const
    return m_fsm->is_reserving();
  }
  
-void PgScrubber::reset_epoch(epoch_t epoch_queued)
+void PgScrubber::reset_epoch()
  {
    dout(10) << __func__ << " state deep? " << state_test(PG_STATE_DEEP_SCRUB)
            << dendl;
    m_fsm->assert_not_in_session();
  
-  m_epoch_start = epoch_queued;
+  m_epoch_start = m_pg->get_same_interval_since();
    ceph_assert(m_is_deep == state_test(PG_STATE_DEEP_SCRUB));
    update_op_mode_text();
  }
diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h

index 8360b4c038f77b6a3ba43716e07a2302a1b11c1b..adf50f97d92a79aaddcbe431f5a90b9f0eb02fd0 100644 (file)
--- a/src/osd/scrubber/pg_scrubber.h
+++ b/src/osd/scrubber/pg_scrubber.h
@@ -556,7 +556,7 @@ class PgScrubber : public ScrubPgIF,
     *  - the epoch when started;
     *  - the depth of the scrub requested (from the PG_STATE variable)
     */
-  void reset_epoch(epoch_t epoch_queued);
+  void reset_epoch() final;
  
    void run_callbacks();
  
diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc

index 26054bf3f7642407ed311b86c044eb9027ff71ab..9aa5842c490850c8dbd3c84efca16f70c9f50c6f 100644 (file)
--- a/src/osd/scrubber/scrub_machine.cc
+++ b/src/osd/scrubber/scrub_machine.cc
@@ -57,6 +57,11 @@ bool ScrubMachine::is_reserving() const
    return state_cast<const ReservingReplicas*>();
  }
  
+bool ScrubMachine::is_primary_idle() const
+{
+  return state_cast<const PrimaryIdle*>();
+}
+
  bool ScrubMachine::is_accepting_updates() const
  {
    DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
@@ -150,6 +155,7 @@ sc::result PrimaryIdle::react(const StartScrub&)
  {
    dout(10) << "PrimaryIdle::react(const StartScrub&)" << dendl;
    DECLARE_LOCALS;
+  scrbr->reset_epoch();
    return transit<ReservingReplicas>();
  }
  
@@ -157,6 +163,7 @@ sc::result PrimaryIdle::react(const AfterRepairScrub&)
  {
    dout(10) << "PrimaryIdle::react(const AfterRepairScrub&)" << dendl;
    DECLARE_LOCALS;
+  scrbr->reset_epoch();
    return transit<ReservingReplicas>();
  }
  
diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h

index fbd8f8b3a35960abd0abb7ecd2d11d08e37d3739..8fa96da1405251bca805419789e6c3ea3ade2dbd 100644 (file)
--- a/src/osd/scrubber/scrub_machine.h
+++ b/src/osd/scrubber/scrub_machine.h
@@ -244,6 +244,7 @@ class ScrubMachine : public sc::state_machine<ScrubMachine, NotActive> {
    void assert_not_in_session() const;
    [[nodiscard]] bool is_reserving() const;
    [[nodiscard]] bool is_accepting_updates() const;
+  [[nodiscard]] bool is_primary_idle() const;
  
    // elapsed time for the currently active scrub.session
    ceph::timespan get_time_scrubbing() const;
diff --git a/src/osd/scrubber/scrub_machine_lstnr.h b/src/osd/scrubber/scrub_machine_lstnr.h

index 086802ee813e77e8ebfb081c1f810b1878db16d6..f2ddd9c87168a5fffae0ae2fff5f581e5435028a 100644 (file)
--- a/src/osd/scrubber/scrub_machine_lstnr.h
+++ b/src/osd/scrubber/scrub_machine_lstnr.h
@@ -212,6 +212,9 @@ struct ScrubMachineListener {
    virtual void set_queued_or_active() = 0;
    virtual void clear_queued_or_active() = 0;
  
+  /// note the epoch when the scrub session started
+  virtual void reset_epoch() = 0;
+
    /**
     * Our scrubbing is blocked, waiting for an excessive length of time for
     * our target chunk to be unlocked. We will set the corresponding flags,
author	Ronen Friedman <rfriedma@redhat.com>
	Thu, 28 Dec 2023 19:41:19 +0000 (13:41 -0600)
committer	Ronen Friedman <rfriedma@redhat.com>
	Fri, 5 Jan 2024 15:00:10 +0000 (09:00 -0600)
src/osd/scrubber/pg_scrubber.cc		patch \| blob \| history
src/osd/scrubber/pg_scrubber.h		patch \| blob \| history
src/osd/scrubber/scrub_machine.cc		patch \| blob \| history
src/osd/scrubber/scrub_machine.h		patch \| blob \| history
src/osd/scrubber/scrub_machine_lstnr.h		patch \| blob \| history