From a9c5839a0ce14f0dbe17a5f0adcbae265e01d3ca Mon Sep 17 00:00:00 2001
From: Ronen Friedman <rfriedma@redhat.com>
Date: Mon, 28 Dec 2020 18:49:34 +0200
Subject: [PATCH] osd: scrubber: improved handling of new-interval events

Interval change events now trigger immediate scrub session termination and resources
release.

(based on code-review comments by @athanatos.)

Fixes: https://tracker.ceph.com/issues/48712
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
---
 src/osd/PG.cc             |  26 ++----
 src/osd/PrimaryLogPG.cc   |   2 -
 src/osd/pg_scrubber.cc    | 170 +++++++++-----------------------------
 src/osd/pg_scrubber.h     |  27 ++++--
 src/osd/scrub_machine.cc  |  26 ------
 src/osd/scrub_machine.h   |  19 ++---
 src/osd/scrubber_common.h |  16 ++--
 7 files changed, 82 insertions(+), 204 deletions(-)

diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 972a5482841a7..2b97a6cc4eec2 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -360,7 +360,7 @@ void PG::clear_primary_state()
   finish_sync_event = 0;  // so that _finish_recovery doesn't go off in another thread
   release_pg_backoffs();
 
-  m_scrubber->unreserve_replicas();
+  m_scrubber->discard_replica_reservations();
   scrub_after_recovery = false;
 
   agent_clear();
@@ -2066,31 +2066,19 @@ void PG::replica_scrub(OpRequestRef op, ThreadPool::TPHandle& handle)
 void PG::scrub(epoch_t epoch_queued, ThreadPool::TPHandle& handle)
 {
   dout(10) << __func__ << " queued at: " << epoch_queued << dendl;
-
-  scrub_queued = false;
-  ceph_assert(is_primary());
-  ceph_assert(!m_scrubber->is_scrub_active());
-
   // a new scrub
-
-  m_scrubber->reset_epoch(epoch_queued);
-
-  // note: send_start_scrub() will verify 'epoch queued' against our current interval
-  m_scrubber->send_start_scrub(epoch_queued);
+  scrub_queued = false;
+  m_scrubber->initiate_regular_scrub(epoch_queued);
 }
 
 // note: no need to secure OSD resources for a recovery scrub
-void PG::recovery_scrub(epoch_t epoch_queued, ThreadPool::TPHandle& handle)
+void PG::recovery_scrub(epoch_t epoch_queued,
+                        [[maybe_unused]] ThreadPool::TPHandle& handle)
 {
   dout(10) << __func__ << " queued at: " << epoch_queued << dendl;
-
-  scrub_queued = false;
-  ceph_assert(is_primary());
-  ceph_assert(!m_scrubber->is_scrub_active());
-
   // a new scrub
-  m_scrubber->reset_epoch(epoch_queued);
-  m_scrubber->send_start_after_repair(epoch_queued);
+  scrub_queued = false;
+  m_scrubber->initiate_scrub_after_repair(epoch_queued);
 }
 
 void PG::replica_scrub(epoch_t epoch_queued,
diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc
index 663b8fb011d5b..c0fbdd342a5c4 100644
--- a/src/osd/PrimaryLogPG.cc
+++ b/src/osd/PrimaryLogPG.cc
@@ -12552,8 +12552,6 @@ void PrimaryLogPG::on_change(ObjectStore::Transaction &t)
   requeue_ops(waiting_for_active);
   requeue_ops(waiting_for_readable);
 
-  m_scrubber->clear_scrub_reservations();
-
   vector<ceph_tid_t> tids;
   cancel_copy_ops(is_primary(), &tids);
   cancel_flush_ops(is_primary(), &tids);
diff --git a/src/osd/pg_scrubber.cc b/src/osd/pg_scrubber.cc
index 5c9d3b8e4492b..fe57a9f2594c2 100644
--- a/src/osd/pg_scrubber.cc
+++ b/src/osd/pg_scrubber.cc
@@ -72,75 +72,14 @@ ostream& operator<<(ostream& out, const requested_scrub_t& sf)
   return out;
 }
 
-// returns false if the message should be discarded. Handles the notification of interval
-// change, if not done already. called only for active scrub? not sure.
-
-// let's first make this a Primary-only function
+/*
+ * if the incoming message is from a previous interval, it must mean
+ * PrimaryLogPG::on_change() was called when that interval ended. We can safely discard
+ * the stale message.
+ */
 bool PgScrubber::check_interval(epoch_t epoch_to_verify)
 {
-  const auto current_interval = m_pg->get_same_interval_since();
-
-  if (epoch_to_verify < current_interval) {
-    // the event will not be delivered. If we have already noticed and handled
-    // the change of seasons, it will be silently discarded. Otherwise - we
-    // reset the scrubber and its FSM.
-    dout(10) << __func__ << " stale message. epoch: " << epoch_to_verify << " vs. "
-	     << current_interval << " (handled: " << m_last_dead_interval << ")" << dendl;
-
-    if (epoch_to_verify > m_last_dead_interval) {
-
-      // we have not seen this interval change yet.
-      // The remote reservations are no longer relevant.
-
-      m_last_dead_interval = current_interval;
-
-      // clear the remote reservations. No need to send messages.
-      if (m_reservations) {
-	m_reservations->discard_all();
-      }
-
-      // stop the scrub and send a reset message to the FSM
-      scrub_clear_state();
-    }
-    return false;
-  }
-
-  return true;
-}
-
-bool PgScrubber::check_interval_replica(epoch_t epoch_to_verify)
-{
-  const auto current_interval = m_pg->get_same_interval_since();
-
-  if (epoch_to_verify < current_interval) {
-    // the event will not be delivered. If we have already noticed and handled
-    // the change of seasons, it will be silently discarded. Otherwise - we
-    // reset the scrubber and its FSM.
-    dout(10) << __func__ << " stale message. epoch: " << epoch_to_verify << " vs. "
-	     << current_interval << " (handled: " << m_last_dead_interval << ")" << dendl;
-
-    if (epoch_to_verify > m_last_dead_interval) {
-
-      // we have not seen this interval change yet.
-      // The remote reservations are no longer relevant.
-
-      m_last_dead_interval = current_interval;
-
-      // clear the remote reservations. No need to send messages
-      m_remote_osd_resource.reset();
-
-      // stop the scrub and send a reset message to the FSM
-      // replica_handling_done();
-      send_interval_changed();
-    }
-    return false;
-  }
-
-  // verify that we are reserved by the primary
-  // not true anymore (see rapair scrubs) ceph_assert(m_remote_osd_resource &&
-  // m_remote_osd_resource->is_reserved());
-
-  return true;
+  return epoch_to_verify >= m_pg->get_same_interval_since();
 }
 
 bool PgScrubber::is_message_relevant(epoch_t epoch_to_verify)
@@ -156,12 +95,10 @@ bool PgScrubber::is_message_relevant(epoch_t epoch_to_verify)
     return false;
   }
 
-  // check for reasons to abort this scrub
-
   // has a new interval started?
   if (!check_interval(epoch_to_verify)) {
-    // if this is a new interval, check_interval() just discarded
-    // remote resources and then killed the scrub
+    // if this is a new interval, on_change() has already terminated that
+    // old scrub.
     return false;
   }
 
@@ -171,9 +108,6 @@ bool PgScrubber::is_message_relevant(epoch_t epoch_to_verify)
   return verify_against_abort(epoch_to_verify);
 }
 
-
-// false if the message was discarded because of an abort flag.
-// Reset everything if the abort was not handled before.
 bool PgScrubber::verify_against_abort(epoch_t epoch_to_verify)
 {
   if (!should_abort()) {
@@ -214,7 +148,7 @@ bool PgScrubber::should_abort() const
   return false;
 }
 
-//   sending (processing) state-machine events --------------------------------
+//   initiating state-machine events --------------------------------
 
 /*
  * a note re the checks performed before sending scrub-initiating messages:
@@ -226,7 +160,7 @@ bool PgScrubber::should_abort() const
  * The check_interval() catches all major changes to the PG. As for the other conditions
  * we may check (and see is_message_relevant() above):
  *
- * - we are not 'active' yet, so must check against is_active(), andL
+ * - we are not 'active' yet, so must not check against is_active(), and:
  *
  * - the 'abort' flags were just verified (when the triggering message was queued). As
  *   those are only modified in human speeds - they need not be queried again.
@@ -235,25 +169,30 @@ bool PgScrubber::should_abort() const
  * ('StartReplica' & 'StartReplicaNoWait').
  */
 
-
-void PgScrubber::send_start_scrub(epoch_t epoch_queued)
+void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued)
 {
-  dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
+  dout(15) << __func__ << " epoch: " << epoch_queued << dendl;
+  // we may have lost our Primary status while the message languished in the queue
   if (check_interval(epoch_queued)) {
+    dout(10) << "scrubber event -->> StartScrub epoch: " << epoch_queued << dendl;
+    reset_epoch(epoch_queued);
     m_fsm->my_states();
     m_fsm->process_event(StartScrub{});
+    dout(10) << "scrubber event --<< StartScrub" << dendl;
   }
-  dout(10) << "scrubber event --<< " << __func__ << dendl;
 }
 
-void PgScrubber::send_start_after_repair(epoch_t epoch_queued)
+void PgScrubber::initiate_scrub_after_repair(epoch_t epoch_queued)
 {
-  dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
+  dout(15) << __func__ << " epoch: " << epoch_queued << dendl;
+  // we may have lost our Primary status while the message languished in the queue
   if (check_interval(epoch_queued)) {
+    dout(10) << "scrubber event -->> AfterRepairScrub epoch: " << epoch_queued << dendl;
+    reset_epoch(epoch_queued);
     m_fsm->my_states();
     m_fsm->process_event(AfterRepairScrub{});
+    dout(10) << "scrubber event --<< AfterRepairScrub" << dendl;
   }
-  dout(10) << "scrubber event --<< " << __func__ << dendl;
 }
 
 void PgScrubber::send_scrub_unblock(epoch_t epoch_queued)
@@ -273,6 +212,7 @@ void PgScrubber::send_scrub_resched(epoch_t epoch_queued)
     m_fsm->my_states();
     m_fsm->process_event(InternalSchedScrub{});
   }
+  dout(10) << "scrubber event --<< " << __func__ << dendl;
 }
 
 void PgScrubber::send_start_replica(epoch_t epoch_queued)
@@ -283,9 +223,9 @@ void PgScrubber::send_start_replica(epoch_t epoch_queued)
     dout(1) << "got a replica scrub request while Primary!" << dendl;
     return;
   }
-  if (check_interval_replica(epoch_queued)) {
+  if (check_interval(epoch_queued)) {
     m_fsm->my_states();
-    // buy us some time by not waiting for updates if there are none
+    // save us some time by not waiting for updates if there are none
     // to wait for. Affects the transition from NotActive into either
     // ReplicaWaitUpdates or ActiveReplica.
     if (pending_active_pushes())
@@ -299,7 +239,7 @@ void PgScrubber::send_start_replica(epoch_t epoch_queued)
 void PgScrubber::send_sched_replica(epoch_t epoch_queued)
 {
   dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
-  if (check_interval_replica(epoch_queued)) {
+  if (check_interval(epoch_queued)) {
     m_fsm->my_states();
     m_fsm->process_event(SchedReplica{});  // retest for map availability
   }
@@ -339,15 +279,6 @@ void PgScrubber::digest_update_notification(epoch_t epoch_queued)
   dout(10) << "scrubber event --<< " << __func__ << dendl;
 }
 
-// no checks should be performed here
-void PgScrubber::send_interval_changed()
-{
-  dout(10) << "scrubber event -->> " << __func__ << dendl;
-  m_fsm->my_states();
-  m_fsm->process_event(IntervalChanged{});
-  dout(10) << "scrubber event --<< " << __func__ << dendl;
-}
-
 void PgScrubber::send_replica_maps_ready(epoch_t epoch_queued)
 {
   dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
@@ -361,7 +292,7 @@ void PgScrubber::send_replica_maps_ready(epoch_t epoch_queued)
 void PgScrubber::send_replica_pushes_upd(epoch_t epoch_queued)
 {
   dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
-  if (check_interval_replica(epoch_queued)) {
+  if (check_interval(epoch_queued)) {
     m_fsm->my_states();
     m_fsm->process_event(ReplicaPushesUpd{});
   }
@@ -852,7 +783,6 @@ void PgScrubber::on_init()
   preemption_data.reset();
   m_pg->publish_stats_to_osd();
   m_interval_start = m_pg->get_history().same_interval_since;
-  // m_epoch_started = m_pg->get_osdmap_epoch();
 
   dout(10) << __func__ << " start same_interval:" << m_interval_start << dendl;
 
@@ -866,7 +796,6 @@ void PgScrubber::on_init()
   }
 
   m_start = m_pg->info.pgid.pgid.get_hobj_start();
-  m_last_dead_interval = get_osdmap_epoch();
   m_active = true;
 }
 
@@ -874,8 +803,6 @@ void PgScrubber::on_replica_init()
 {
   ceph_assert(!m_active);
   m_active = true;
-  m_last_dead_interval = get_osdmap_epoch();  // so that check_interval_replica() won't
-					      // kill a scrub for stale messages
 }
 
 void PgScrubber::_scan_snaps(ScrubMap& smap)
@@ -1459,6 +1386,14 @@ void PgScrubber::handle_scrub_reserve_release(OpRequestRef op)
   m_remote_osd_resource.reset();
 }
 
+void PgScrubber::discard_replica_reservations()
+{
+  dout(10) << __func__ << dendl;
+  if (m_reservations.has_value()) {
+    m_reservations->discard_all();
+  }
+}
+
 void PgScrubber::clear_scrub_reservations()
 {
   dout(10) << __func__ << dendl;
@@ -1819,12 +1754,14 @@ void PgScrubber::cleanup_on_finish()
   state_clear(PG_STATE_DEEP_SCRUB);
   m_pg->publish_stats_to_osd();
 
-  m_reservations.reset();
-  m_local_osd_resource.reset();
+  clear_scrub_reservations();
+  m_pg->publish_stats_to_osd();
 
   requeue_waiting();
 
   reset_internal_state();
+  m_flags = scrub_flags_t{};
+
   // type-specific state clear
   _scrub_clear_state();
 }
@@ -1857,6 +1794,7 @@ void PgScrubber::clear_pgscrub_state()
   requeue_waiting();
 
   reset_internal_state();
+  m_flags = scrub_flags_t{};
 
   // type-specific state clear
   _scrub_clear_state();
@@ -1869,32 +1807,8 @@ void PgScrubber::replica_handling_done()
   state_clear(PG_STATE_SCRUBBING);
   state_clear(PG_STATE_DEEP_SCRUB);
 
-  preemption_data.reset();
-  m_maps_status.reset();
-  m_received_maps.clear();
-
-  m_start = hobject_t{};
-  m_end = hobject_t{};
-  m_max_end = hobject_t{};
-  m_subset_last_update = eversion_t{};
-  m_shallow_errors = 0;
-  m_deep_errors = 0;
-  m_fixed_count = 0;
-  m_omap_stats = (const struct omap_stat_t){0};
-
-  run_callbacks();
-  m_inconsistent.clear();
-  m_missing.clear();
-  m_authoritative.clear();
-  num_digest_updates_pending = 0;
-  replica_scrubmap = ScrubMap{};
-  replica_scrubmap_pos.reset();
-
-  m_cleaned_meta_map = ScrubMap{};
-  m_needs_sleep = true;
-  m_sleep_started_at = utime_t{};
+  reset_internal_state();
 
-  m_active = false;
   m_pg->publish_stats_to_osd();
 }
 
@@ -1933,8 +1847,6 @@ void PgScrubber::reset_internal_state()
   m_needs_sleep = true;
   m_sleep_started_at = utime_t{};
 
-  m_flags = scrub_flags_t{};
-
   m_active = false;
 }
 
diff --git a/src/osd/pg_scrubber.h b/src/osd/pg_scrubber.h
index f6536dba6ef0b..e24ad422dfc41 100644
--- a/src/osd/pg_scrubber.h
+++ b/src/osd/pg_scrubber.h
@@ -191,9 +191,9 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
   /// are we waiting for resource reservation grants form our replicas?
   [[nodiscard]] bool is_reserving() const final;
 
-  void send_start_scrub(epoch_t epoch_queued) final;
+  void initiate_regular_scrub(epoch_t epoch_queued) final;
 
-  void send_start_after_repair(epoch_t epoch_queued) final;
+  void initiate_scrub_after_repair(epoch_t epoch_queued) final;
 
   void send_scrub_resched(epoch_t epoch_queued) final;
 
@@ -213,8 +213,6 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
 
   void send_replica_pushes_upd(epoch_t epoch_queued) final;
 
-  void reset_epoch(epoch_t epoch_queued) final;
-
   /**
    *  we allow some number of preemptions of the scrub, which mean we do
    *  not block.  Then we start to block.  Once we start blocking, we do
@@ -234,6 +232,7 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
   void handle_scrub_reserve_grant(OpRequestRef op, pg_shard_t from) final;
   void handle_scrub_reserve_reject(OpRequestRef op, pg_shard_t from) final;
   void handle_scrub_reserve_release(OpRequestRef op) final;
+  void discard_replica_reservations() final;
   void clear_scrub_reservations() final;  // PG::clear... fwds to here
   void unreserve_replicas() final;
 
@@ -422,9 +421,14 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
 
   ScrubMap clean_meta_map();
 
-  void run_callbacks();
+  /**
+   *  mark down some parameters of the initiated scrub:
+   *  - the epoch when started;
+   *  - the depth of the scrub requested (from the PG_STATE variable)
+   */
+  void reset_epoch(epoch_t epoch_queued);
 
-  void send_interval_changed();
+  void run_callbacks();
 
   // -----     methods used to verify the relevance of incoming events:
 
@@ -457,12 +461,17 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
    * check the 'no scrub' configuration options.
    */
   [[nodiscard]] bool should_abort() const;
+
+  /**
+   * Check the 'no scrub' configuration flags.
+   *
+   * Reset everything if the abort was not handled before.
+   * @returns false if the message was discarded due to abort flag.
+   */
   [[nodiscard]] bool verify_against_abort(epoch_t epoch_to_verify);
 
-  bool check_interval(epoch_t epoch_to_verify);
-  bool check_interval_replica(epoch_t epoch_to_verify);
+  [[nodiscard]] bool check_interval(epoch_t epoch_to_verify);
 
-  epoch_t m_last_dead_interval{};
   epoch_t m_last_aborted{};  // last time we've noticed a request to abort
 
 
diff --git a/src/osd/scrub_machine.cc b/src/osd/scrub_machine.cc
index 271e1362fc778..9ae3e873f9575 100644
--- a/src/osd/scrub_machine.cc
+++ b/src/osd/scrub_machine.cc
@@ -75,12 +75,6 @@ NotActive::NotActive(my_context ctx) : my_base(ctx)
   dout(10) << "-- state -->> NotActive" << dendl;
 }
 
-sc::result NotActive::react(const IntervalChanged&)
-{
-  dout(15) << "NotActive::react(const IntervalChanged&)" << dendl;
-  return discard_event();
-}
-
 // ----------------------- ReservingReplicas ---------------------------------
 
 ReservingReplicas::ReservingReplicas(my_context ctx) : my_base(ctx)
@@ -426,16 +420,6 @@ ReplicaWaitUpdates::ReplicaWaitUpdates(my_context ctx) : my_base(ctx)
   scrbr->on_replica_init();
 }
 
-sc::result ReplicaWaitUpdates::react(const IntervalChanged&)
-{
-  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
-  dout(10) << "ReplicaWaitUpdates::react(const IntervalChanged&)" << dendl;
-
-  // note: the master's reservation of us was just discarded by our caller
-  scrbr->replica_handling_done();
-  return transit<NotActive>();
-}
-
 /*
  * Triggered externally, by the entity that had an update re pushes
  */
@@ -517,16 +501,6 @@ sc::result ActiveReplica::react(const SchedReplica&)
   return transit<NotActive>();
 }
 
-sc::result ActiveReplica::react(const IntervalChanged&)
-{
-  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
-  dout(10) << "ActiveReplica::react(const IntervalChanged&) " << dendl;
-
-  scrbr->send_replica_map(PreemptionNoted::no_preemption);
-  scrbr->replica_handling_done();
-  return transit<NotActive>();
-}
-
 /**
  * the event poster is handling the scrubber reset
  */
diff --git a/src/osd/scrub_machine.h b/src/osd/scrub_machine.h
index 7bbb7db37fe18..95035389e9ab6 100644
--- a/src/osd/scrub_machine.h
+++ b/src/osd/scrub_machine.h
@@ -54,8 +54,6 @@ void on_event_discard(std::string_view nm);
 MEV(RemotesReserved)	 ///< all replicas have granted our reserve request
 MEV(ReservationFailure)	 ///< a reservation request has failed
 
-MEV(IntervalChanged)  ///< ... from what it was when this chunk started
-
 MEV(StartScrub)	 ///< initiate a new scrubbing session (relevant if we are a Primary)
 MEV(AfterRepairScrub)  ///< initiate a new scrubbing session. Only triggered at Recovery
 		       ///< completion.
@@ -130,15 +128,12 @@ class ScrubMachine : public sc::state_machine<ScrubMachine, NotActive> {
 struct NotActive : sc::state<NotActive, ScrubMachine> {
   explicit NotActive(my_context ctx);
 
-  using reactions = mpl::list<sc::custom_reaction<IntervalChanged>,
-			      sc::transition<StartScrub, ReservingReplicas>,
+  using reactions = mpl::list<sc::transition<StartScrub, ReservingReplicas>,
 			      // a scrubbing that was initiated at recovery completion,
 			      // and requires no resource reservations:
 			      sc::transition<AfterRepairScrub, ActiveScrubbing>,
 			      sc::transition<StartReplica, ReplicaWaitUpdates>,
 			      sc::transition<StartReplicaNoWait, ActiveReplica>>;
-
-  sc::result react(const IntervalChanged&);
 };
 
 struct ReservingReplicas : sc::state<ReservingReplicas, ScrubMachine> {
@@ -299,24 +294,20 @@ struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing> {
  */
 struct ReplicaWaitUpdates : sc::state<ReplicaWaitUpdates, ScrubMachine> {
   explicit ReplicaWaitUpdates(my_context ctx);
-  using reactions = mpl::list<sc::custom_reaction<ReplicaPushesUpd>,
-			      sc::custom_reaction<FullReset>,
-			      sc::custom_reaction<IntervalChanged>>;
+  using reactions =
+    mpl::list<sc::custom_reaction<ReplicaPushesUpd>, sc::custom_reaction<FullReset>>;
 
   sc::result react(const ReplicaPushesUpd&);
-  sc::result react(const IntervalChanged&);
   sc::result react(const FullReset&);
 };
 
 
 struct ActiveReplica : sc::state<ActiveReplica, ScrubMachine> {
   explicit ActiveReplica(my_context ctx);
-  using reactions = mpl::list<sc::custom_reaction<IntervalChanged>,
-			      sc::custom_reaction<SchedReplica>,
-			      sc::custom_reaction<FullReset>>;
+  using reactions =
+    mpl::list<sc::custom_reaction<SchedReplica>, sc::custom_reaction<FullReset>>;
 
   sc::result react(const SchedReplica&);
-  sc::result react(const IntervalChanged&);
   sc::result react(const FullReset&);
 };
 
diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h
index 510c428546948..15a6cdf4dede4 100644
--- a/src/osd/scrubber_common.h
+++ b/src/osd/scrubber_common.h
@@ -116,9 +116,9 @@ struct ScrubPgIF {
 
   // --------------- triggering state-machine events:
 
-  virtual void send_start_scrub(epoch_t epoch_queued) = 0;
+  virtual void initiate_regular_scrub(epoch_t epoch_queued) = 0;
 
-  virtual void send_start_after_repair(epoch_t epoch_queued) = 0;
+  virtual void initiate_scrub_after_repair(epoch_t epoch_queued) = 0;
 
   virtual void send_scrub_resched(epoch_t epoch_queued) = 0;
 
@@ -140,8 +140,6 @@ struct ScrubPgIF {
 
   // --------------------------------------------------
 
-  virtual void reset_epoch(epoch_t epoch_queued) = 0;
-
   [[nodiscard]] virtual bool are_callbacks_pending()
     const = 0;	// currently only used for an assert
 
@@ -230,9 +228,17 @@ struct ScrubPgIF {
    */
   virtual void unreserve_replicas() = 0;
 
+  /**
+   *  "forget" all replica reservations. No messages are sent to the
+   *  previously-reserved.
+   *
+   *  Used upon interval change. The replicas' state is guaranteed to
+   *  be reset separately by the interval-change event.
+   */
+  virtual void discard_replica_reservations() = 0;
+
   /**
    * clear both local and OSD-managed resource reservation flags
-   * (note: no replica un/reservation messages are involved!)
    */
   virtual void clear_scrub_reservations() = 0;
 
-- 
2.39.5