osd: scrubber: improved handling of new-interval events

author Ronen Friedman <rfriedma@redhat.com>

Mon, 28 Dec 2020 16:49:34 +0000 (18:49 +0200)

committer Ronen Friedman <rfriedma@redhat.com>

Wed, 13 Jan 2021 13:27:06 +0000 (15:27 +0200)
author Ronen Friedman <rfriedma@redhat.com>
Mon, 28 Dec 2020 16:49:34 +0000 (18:49 +0200)
committer Ronen Friedman <rfriedma@redhat.com>
Wed, 13 Jan 2021 13:27:06 +0000 (15:27 +0200)
diff --git a/src/osd/PG.cc b/src/osd/PG.cc

index 972a5482841a7b8f7ebb93cc86e44e349ef4536f..2b97a6cc4eec2700a567546cc0f4264bc3cfd003 100644 (file)
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -360,7 +360,7 @@ void PG::clear_primary_state()
    finish_sync_event = 0;  // so that _finish_recovery doesn't go off in another thread
    release_pg_backoffs();
  
-  m_scrubber->unreserve_replicas();
+  m_scrubber->discard_replica_reservations();
    scrub_after_recovery = false;
  
    agent_clear();
@@ -2066,31 +2066,19 @@ void PG::replica_scrub(OpRequestRef op, ThreadPool::TPHandle& handle)
  void PG::scrub(epoch_t epoch_queued, ThreadPool::TPHandle& handle)
  {
    dout(10) << __func__ << " queued at: " << epoch_queued << dendl;
-
-  scrub_queued = false;
-  ceph_assert(is_primary());
-  ceph_assert(!m_scrubber->is_scrub_active());
-
    // a new scrub
-
-  m_scrubber->reset_epoch(epoch_queued);
-
-  // note: send_start_scrub() will verify 'epoch queued' against our current interval
-  m_scrubber->send_start_scrub(epoch_queued);
+  scrub_queued = false;
+  m_scrubber->initiate_regular_scrub(epoch_queued);
  }
  
  // note: no need to secure OSD resources for a recovery scrub
-void PG::recovery_scrub(epoch_t epoch_queued, ThreadPool::TPHandle& handle)
+void PG::recovery_scrub(epoch_t epoch_queued,
+                        [[maybe_unused]] ThreadPool::TPHandle& handle)
  {
    dout(10) << __func__ << " queued at: " << epoch_queued << dendl;
-
-  scrub_queued = false;
-  ceph_assert(is_primary());
-  ceph_assert(!m_scrubber->is_scrub_active());
-
    // a new scrub
-  m_scrubber->reset_epoch(epoch_queued);
-  m_scrubber->send_start_after_repair(epoch_queued);
+  scrub_queued = false;
+  m_scrubber->initiate_scrub_after_repair(epoch_queued);
  }
  
  void PG::replica_scrub(epoch_t epoch_queued,
diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc

index 663b8fb011d5b011d8645cb596aaf435a1745b9e..c0fbdd342a5c49ff659d1d12cc35054b82ea2a0f 100644 (file)
--- a/src/osd/PrimaryLogPG.cc
+++ b/src/osd/PrimaryLogPG.cc
@@ -12552,8 +12552,6 @@ void PrimaryLogPG::on_change(ObjectStore::Transaction &t)
    requeue_ops(waiting_for_active);
    requeue_ops(waiting_for_readable);
  
-  m_scrubber->clear_scrub_reservations();
-
    vector<ceph_tid_t> tids;
    cancel_copy_ops(is_primary(), &tids);
    cancel_flush_ops(is_primary(), &tids);
diff --git a/src/osd/pg_scrubber.cc b/src/osd/pg_scrubber.cc

index 5c9d3b8e4492b0e1da1f5e8552bf15b8080a18b5..fe57a9f2594c2ad0bb08aca77bb2d50c49faaba8 100644 (file)
--- a/src/osd/pg_scrubber.cc
+++ b/src/osd/pg_scrubber.cc
@@ -72,75 +72,14 @@ ostream& operator<<(ostream& out, const requested_scrub_t& sf)
    return out;
  }
  
-// returns false if the message should be discarded. Handles the notification of interval
-// change, if not done already. called only for active scrub? not sure.
-
-// let's first make this a Primary-only function
+/*
+ * if the incoming message is from a previous interval, it must mean
+ * PrimaryLogPG::on_change() was called when that interval ended. We can safely discard
+ * the stale message.
+ */
  bool PgScrubber::check_interval(epoch_t epoch_to_verify)
  {
-  const auto current_interval = m_pg->get_same_interval_since();
-
-  if (epoch_to_verify < current_interval) {
-    // the event will not be delivered. If we have already noticed and handled
-    // the change of seasons, it will be silently discarded. Otherwise - we
-    // reset the scrubber and its FSM.
-    dout(10) << __func__ << " stale message. epoch: " << epoch_to_verify << " vs. "
-            << current_interval << " (handled: " << m_last_dead_interval << ")" << dendl;
-
-    if (epoch_to_verify > m_last_dead_interval) {
-
-      // we have not seen this interval change yet.
-      // The remote reservations are no longer relevant.
-
-      m_last_dead_interval = current_interval;
-
-      // clear the remote reservations. No need to send messages.
-      if (m_reservations) {
-       m_reservations->discard_all();
-      }
-
-      // stop the scrub and send a reset message to the FSM
-      scrub_clear_state();
-    }
-    return false;
-  }
-
-  return true;
-}
-
-bool PgScrubber::check_interval_replica(epoch_t epoch_to_verify)
-{
-  const auto current_interval = m_pg->get_same_interval_since();
-
-  if (epoch_to_verify < current_interval) {
-    // the event will not be delivered. If we have already noticed and handled
-    // the change of seasons, it will be silently discarded. Otherwise - we
-    // reset the scrubber and its FSM.
-    dout(10) << __func__ << " stale message. epoch: " << epoch_to_verify << " vs. "
-            << current_interval << " (handled: " << m_last_dead_interval << ")" << dendl;
-
-    if (epoch_to_verify > m_last_dead_interval) {
-
-      // we have not seen this interval change yet.
-      // The remote reservations are no longer relevant.
-
-      m_last_dead_interval = current_interval;
-
-      // clear the remote reservations. No need to send messages
-      m_remote_osd_resource.reset();
-
-      // stop the scrub and send a reset message to the FSM
-      // replica_handling_done();
-      send_interval_changed();
-    }
-    return false;
-  }
-
-  // verify that we are reserved by the primary
-  // not true anymore (see rapair scrubs) ceph_assert(m_remote_osd_resource &&
-  // m_remote_osd_resource->is_reserved());
-
-  return true;
+  return epoch_to_verify >= m_pg->get_same_interval_since();
  }
  
  bool PgScrubber::is_message_relevant(epoch_t epoch_to_verify)
@@ -156,12 +95,10 @@ bool PgScrubber::is_message_relevant(epoch_t epoch_to_verify)
      return false;
    }
  
-  // check for reasons to abort this scrub
-
    // has a new interval started?
    if (!check_interval(epoch_to_verify)) {
-    // if this is a new interval, check_interval() just discarded
-    // remote resources and then killed the scrub
+    // if this is a new interval, on_change() has already terminated that
+    // old scrub.
      return false;
    }
  
@@ -171,9 +108,6 @@ bool PgScrubber::is_message_relevant(epoch_t epoch_to_verify)
    return verify_against_abort(epoch_to_verify);
  }
  
-
-// false if the message was discarded because of an abort flag.
-// Reset everything if the abort was not handled before.
  bool PgScrubber::verify_against_abort(epoch_t epoch_to_verify)
  {
    if (!should_abort()) {
@@ -214,7 +148,7 @@ bool PgScrubber::should_abort() const
    return false;
  }
  
-//   sending (processing) state-machine events --------------------------------
+//   initiating state-machine events --------------------------------
  
  /*
   * a note re the checks performed before sending scrub-initiating messages:
@@ -226,7 +160,7 @@ bool PgScrubber::should_abort() const
   * The check_interval() catches all major changes to the PG. As for the other conditions
   * we may check (and see is_message_relevant() above):
   *
- * - we are not 'active' yet, so must check against is_active(), andL
+ * - we are not 'active' yet, so must not check against is_active(), and:
   *
   * - the 'abort' flags were just verified (when the triggering message was queued). As
   *   those are only modified in human speeds - they need not be queried again.
@@ -235,25 +169,30 @@ bool PgScrubber::should_abort() const
   * ('StartReplica' & 'StartReplicaNoWait').
   */
  
-
-void PgScrubber::send_start_scrub(epoch_t epoch_queued)
+void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued)
  {
-  dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
+  dout(15) << __func__ << " epoch: " << epoch_queued << dendl;
+  // we may have lost our Primary status while the message languished in the queue
    if (check_interval(epoch_queued)) {
+    dout(10) << "scrubber event -->> StartScrub epoch: " << epoch_queued << dendl;
+    reset_epoch(epoch_queued);
      m_fsm->my_states();
      m_fsm->process_event(StartScrub{});
+    dout(10) << "scrubber event --<< StartScrub" << dendl;
    }
-  dout(10) << "scrubber event --<< " << __func__ << dendl;
  }
  
-void PgScrubber::send_start_after_repair(epoch_t epoch_queued)
+void PgScrubber::initiate_scrub_after_repair(epoch_t epoch_queued)
  {
-  dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
+  dout(15) << __func__ << " epoch: " << epoch_queued << dendl;
+  // we may have lost our Primary status while the message languished in the queue
    if (check_interval(epoch_queued)) {
+    dout(10) << "scrubber event -->> AfterRepairScrub epoch: " << epoch_queued << dendl;
+    reset_epoch(epoch_queued);
      m_fsm->my_states();
      m_fsm->process_event(AfterRepairScrub{});
+    dout(10) << "scrubber event --<< AfterRepairScrub" << dendl;
    }
-  dout(10) << "scrubber event --<< " << __func__ << dendl;
  }
  
  void PgScrubber::send_scrub_unblock(epoch_t epoch_queued)
@@ -273,6 +212,7 @@ void PgScrubber::send_scrub_resched(epoch_t epoch_queued)
      m_fsm->my_states();
      m_fsm->process_event(InternalSchedScrub{});
    }
+  dout(10) << "scrubber event --<< " << __func__ << dendl;
  }
  
  void PgScrubber::send_start_replica(epoch_t epoch_queued)
@@ -283,9 +223,9 @@ void PgScrubber::send_start_replica(epoch_t epoch_queued)
      dout(1) << "got a replica scrub request while Primary!" << dendl;
      return;
    }
-  if (check_interval_replica(epoch_queued)) {
+  if (check_interval(epoch_queued)) {
      m_fsm->my_states();
-    // buy us some time by not waiting for updates if there are none
+    // save us some time by not waiting for updates if there are none
      // to wait for. Affects the transition from NotActive into either
      // ReplicaWaitUpdates or ActiveReplica.
      if (pending_active_pushes())
@@ -299,7 +239,7 @@ void PgScrubber::send_start_replica(epoch_t epoch_queued)
  void PgScrubber::send_sched_replica(epoch_t epoch_queued)
  {
    dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
-  if (check_interval_replica(epoch_queued)) {
+  if (check_interval(epoch_queued)) {
      m_fsm->my_states();
      m_fsm->process_event(SchedReplica{});  // retest for map availability
    }
@@ -339,15 +279,6 @@ void PgScrubber::digest_update_notification(epoch_t epoch_queued)
    dout(10) << "scrubber event --<< " << __func__ << dendl;
  }
  
-// no checks should be performed here
-void PgScrubber::send_interval_changed()
-{
-  dout(10) << "scrubber event -->> " << __func__ << dendl;
-  m_fsm->my_states();
-  m_fsm->process_event(IntervalChanged{});
-  dout(10) << "scrubber event --<< " << __func__ << dendl;
-}
-
  void PgScrubber::send_replica_maps_ready(epoch_t epoch_queued)
  {
    dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
@@ -361,7 +292,7 @@ void PgScrubber::send_replica_maps_ready(epoch_t epoch_queued)
  void PgScrubber::send_replica_pushes_upd(epoch_t epoch_queued)
  {
    dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl;
-  if (check_interval_replica(epoch_queued)) {
+  if (check_interval(epoch_queued)) {
      m_fsm->my_states();
      m_fsm->process_event(ReplicaPushesUpd{});
    }
@@ -852,7 +783,6 @@ void PgScrubber::on_init()
    preemption_data.reset();
    m_pg->publish_stats_to_osd();
    m_interval_start = m_pg->get_history().same_interval_since;
-  // m_epoch_started = m_pg->get_osdmap_epoch();
  
    dout(10) << __func__ << " start same_interval:" << m_interval_start << dendl;
  
@@ -866,7 +796,6 @@ void PgScrubber::on_init()
    }
  
    m_start = m_pg->info.pgid.pgid.get_hobj_start();
-  m_last_dead_interval = get_osdmap_epoch();
    m_active = true;
  }
  
@@ -874,8 +803,6 @@ void PgScrubber::on_replica_init()
  {
    ceph_assert(!m_active);
    m_active = true;
-  m_last_dead_interval = get_osdmap_epoch();  // so that check_interval_replica() won't
-                                             // kill a scrub for stale messages
  }
  
  void PgScrubber::_scan_snaps(ScrubMap& smap)
@@ -1459,6 +1386,14 @@ void PgScrubber::handle_scrub_reserve_release(OpRequestRef op)
    m_remote_osd_resource.reset();
  }
  
+void PgScrubber::discard_replica_reservations()
+{
+  dout(10) << __func__ << dendl;
+  if (m_reservations.has_value()) {
+    m_reservations->discard_all();
+  }
+}
+
  void PgScrubber::clear_scrub_reservations()
  {
    dout(10) << __func__ << dendl;
@@ -1819,12 +1754,14 @@ void PgScrubber::cleanup_on_finish()
    state_clear(PG_STATE_DEEP_SCRUB);
    m_pg->publish_stats_to_osd();
  
-  m_reservations.reset();
-  m_local_osd_resource.reset();
+  clear_scrub_reservations();
+  m_pg->publish_stats_to_osd();
  
    requeue_waiting();
  
    reset_internal_state();
+  m_flags = scrub_flags_t{};
+
    // type-specific state clear
    _scrub_clear_state();
  }
@@ -1857,6 +1794,7 @@ void PgScrubber::clear_pgscrub_state()
    requeue_waiting();
  
    reset_internal_state();
+  m_flags = scrub_flags_t{};
  
    // type-specific state clear
    _scrub_clear_state();
@@ -1869,32 +1807,8 @@ void PgScrubber::replica_handling_done()
    state_clear(PG_STATE_SCRUBBING);
    state_clear(PG_STATE_DEEP_SCRUB);
  
-  preemption_data.reset();
-  m_maps_status.reset();
-  m_received_maps.clear();
-
-  m_start = hobject_t{};
-  m_end = hobject_t{};
-  m_max_end = hobject_t{};
-  m_subset_last_update = eversion_t{};
-  m_shallow_errors = 0;
-  m_deep_errors = 0;
-  m_fixed_count = 0;
-  m_omap_stats = (const struct omap_stat_t){0};
-
-  run_callbacks();
-  m_inconsistent.clear();
-  m_missing.clear();
-  m_authoritative.clear();
-  num_digest_updates_pending = 0;
-  replica_scrubmap = ScrubMap{};
-  replica_scrubmap_pos.reset();
-
-  m_cleaned_meta_map = ScrubMap{};
-  m_needs_sleep = true;
-  m_sleep_started_at = utime_t{};
+  reset_internal_state();
  
-  m_active = false;
    m_pg->publish_stats_to_osd();
  }
  
@@ -1933,8 +1847,6 @@ void PgScrubber::reset_internal_state()
    m_needs_sleep = true;
    m_sleep_started_at = utime_t{};
  
-  m_flags = scrub_flags_t{};
-
    m_active = false;
  }
  
diff --git a/src/osd/pg_scrubber.h b/src/osd/pg_scrubber.h

index f6536dba6ef0b7bb352969fa3e37dbe05fa3d098..e24ad422dfc417b5a0387e6cb0be1b46d30cbfeb 100644 (file)
--- a/src/osd/pg_scrubber.h
+++ b/src/osd/pg_scrubber.h
@@ -191,9 +191,9 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
    /// are we waiting for resource reservation grants form our replicas?
    [[nodiscard]] bool is_reserving() const final;
  
-  void send_start_scrub(epoch_t epoch_queued) final;
+  void initiate_regular_scrub(epoch_t epoch_queued) final;
  
-  void send_start_after_repair(epoch_t epoch_queued) final;
+  void initiate_scrub_after_repair(epoch_t epoch_queued) final;
  
    void send_scrub_resched(epoch_t epoch_queued) final;
  
@@ -213,8 +213,6 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
  
    void send_replica_pushes_upd(epoch_t epoch_queued) final;
  
-  void reset_epoch(epoch_t epoch_queued) final;
-
    /**
     *  we allow some number of preemptions of the scrub, which mean we do
     *  not block.  Then we start to block.  Once we start blocking, we do
@@ -234,6 +232,7 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
    void handle_scrub_reserve_grant(OpRequestRef op, pg_shard_t from) final;
    void handle_scrub_reserve_reject(OpRequestRef op, pg_shard_t from) final;
    void handle_scrub_reserve_release(OpRequestRef op) final;
+  void discard_replica_reservations() final;
    void clear_scrub_reservations() final;  // PG::clear... fwds to here
    void unreserve_replicas() final;
  
@@ -422,9 +421,14 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
  
    ScrubMap clean_meta_map();
  
-  void run_callbacks();
+  /**
+   *  mark down some parameters of the initiated scrub:
+   *  - the epoch when started;
+   *  - the depth of the scrub requested (from the PG_STATE variable)
+   */
+  void reset_epoch(epoch_t epoch_queued);
  
-  void send_interval_changed();
+  void run_callbacks();
  
    // -----     methods used to verify the relevance of incoming events:
  
@@ -457,12 +461,17 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
     * check the 'no scrub' configuration options.
     */
    [[nodiscard]] bool should_abort() const;
+
+  /**
+   * Check the 'no scrub' configuration flags.
+   *
+   * Reset everything if the abort was not handled before.
+   * @returns false if the message was discarded due to abort flag.
+   */
    [[nodiscard]] bool verify_against_abort(epoch_t epoch_to_verify);
  
-  bool check_interval(epoch_t epoch_to_verify);
-  bool check_interval_replica(epoch_t epoch_to_verify);
+  [[nodiscard]] bool check_interval(epoch_t epoch_to_verify);
  
-  epoch_t m_last_dead_interval{};
    epoch_t m_last_aborted{};  // last time we've noticed a request to abort
  
  
diff --git a/src/osd/scrub_machine.cc b/src/osd/scrub_machine.cc

index 271e1362fc778d18981bae5cf9f29c36335605b5..9ae3e873f9575bf22eaf0991f748f2e1f6e64884 100644 (file)
--- a/src/osd/scrub_machine.cc
+++ b/src/osd/scrub_machine.cc
@@ -75,12 +75,6 @@ NotActive::NotActive(my_context ctx) : my_base(ctx)
    dout(10) << "-- state -->> NotActive" << dendl;
  }
  
-sc::result NotActive::react(const IntervalChanged&)
-{
-  dout(15) << "NotActive::react(const IntervalChanged&)" << dendl;
-  return discard_event();
-}
-
  // ----------------------- ReservingReplicas ---------------------------------
  
  ReservingReplicas::ReservingReplicas(my_context ctx) : my_base(ctx)
@@ -426,16 +420,6 @@ ReplicaWaitUpdates::ReplicaWaitUpdates(my_context ctx) : my_base(ctx)
    scrbr->on_replica_init();
  }
  
-sc::result ReplicaWaitUpdates::react(const IntervalChanged&)
-{
-  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
-  dout(10) << "ReplicaWaitUpdates::react(const IntervalChanged&)" << dendl;
-
-  // note: the master's reservation of us was just discarded by our caller
-  scrbr->replica_handling_done();
-  return transit<NotActive>();
-}
-
  /*
   * Triggered externally, by the entity that had an update re pushes
   */
@@ -517,16 +501,6 @@ sc::result ActiveReplica::react(const SchedReplica&)
    return transit<NotActive>();
  }
  
-sc::result ActiveReplica::react(const IntervalChanged&)
-{
-  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
-  dout(10) << "ActiveReplica::react(const IntervalChanged&) " << dendl;
-
-  scrbr->send_replica_map(PreemptionNoted::no_preemption);
-  scrbr->replica_handling_done();
-  return transit<NotActive>();
-}
-
  /**
   * the event poster is handling the scrubber reset
   */
diff --git a/src/osd/scrub_machine.h b/src/osd/scrub_machine.h

index 7bbb7db37fe180422665dbb6a9a7dd94a3a52ebc..95035389e9ab69dba70b739b042ebca89e12597c 100644 (file)
--- a/src/osd/scrub_machine.h
+++ b/src/osd/scrub_machine.h
@@ -54,8 +54,6 @@ void on_event_discard(std::string_view nm);
  MEV(RemotesReserved)    ///< all replicas have granted our reserve request
  MEV(ReservationFailure)         ///< a reservation request has failed
  
-MEV(IntervalChanged)  ///< ... from what it was when this chunk started
-
  MEV(StartScrub)         ///< initiate a new scrubbing session (relevant if we are a Primary)
  MEV(AfterRepairScrub)  ///< initiate a new scrubbing session. Only triggered at Recovery
                        ///< completion.
@@ -130,15 +128,12 @@ class ScrubMachine : public sc::state_machine<ScrubMachine, NotActive> {
  struct NotActive : sc::state<NotActive, ScrubMachine> {
    explicit NotActive(my_context ctx);
  
-  using reactions = mpl::list<sc::custom_reaction<IntervalChanged>,
-                             sc::transition<StartScrub, ReservingReplicas>,
+  using reactions = mpl::list<sc::transition<StartScrub, ReservingReplicas>,
                               // a scrubbing that was initiated at recovery completion,
                               // and requires no resource reservations:
                               sc::transition<AfterRepairScrub, ActiveScrubbing>,
                               sc::transition<StartReplica, ReplicaWaitUpdates>,
                               sc::transition<StartReplicaNoWait, ActiveReplica>>;
-
-  sc::result react(const IntervalChanged&);
  };
  
  struct ReservingReplicas : sc::state<ReservingReplicas, ScrubMachine> {
@@ -299,24 +294,20 @@ struct WaitDigestUpdate : sc::state<WaitDigestUpdate, ActiveScrubbing> {
   */
  struct ReplicaWaitUpdates : sc::state<ReplicaWaitUpdates, ScrubMachine> {
    explicit ReplicaWaitUpdates(my_context ctx);
-  using reactions = mpl::list<sc::custom_reaction<ReplicaPushesUpd>,
-                             sc::custom_reaction<FullReset>,
-                             sc::custom_reaction<IntervalChanged>>;
+  using reactions =
+    mpl::list<sc::custom_reaction<ReplicaPushesUpd>, sc::custom_reaction<FullReset>>;
  
    sc::result react(const ReplicaPushesUpd&);
-  sc::result react(const IntervalChanged&);
    sc::result react(const FullReset&);
  };
  
  
  struct ActiveReplica : sc::state<ActiveReplica, ScrubMachine> {
    explicit ActiveReplica(my_context ctx);
-  using reactions = mpl::list<sc::custom_reaction<IntervalChanged>,
-                             sc::custom_reaction<SchedReplica>,
-                             sc::custom_reaction<FullReset>>;
+  using reactions =
+    mpl::list<sc::custom_reaction<SchedReplica>, sc::custom_reaction<FullReset>>;
  
    sc::result react(const SchedReplica&);
-  sc::result react(const IntervalChanged&);
    sc::result react(const FullReset&);
  };
  
diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h

index 510c4285469489c258514df6aff44ac5fdc63291..15a6cdf4dede485e14f395feff4d6bcfd8280820 100644 (file)
--- a/src/osd/scrubber_common.h
+++ b/src/osd/scrubber_common.h
@@ -116,9 +116,9 @@ struct ScrubPgIF {
  
    // --------------- triggering state-machine events:
  
-  virtual void send_start_scrub(epoch_t epoch_queued) = 0;
+  virtual void initiate_regular_scrub(epoch_t epoch_queued) = 0;
  
-  virtual void send_start_after_repair(epoch_t epoch_queued) = 0;
+  virtual void initiate_scrub_after_repair(epoch_t epoch_queued) = 0;
  
    virtual void send_scrub_resched(epoch_t epoch_queued) = 0;
  
@@ -140,8 +140,6 @@ struct ScrubPgIF {
  
    // --------------------------------------------------
  
-  virtual void reset_epoch(epoch_t epoch_queued) = 0;
-
    [[nodiscard]] virtual bool are_callbacks_pending()
      const = 0; // currently only used for an assert
  
@@ -230,9 +228,17 @@ struct ScrubPgIF {
     */
    virtual void unreserve_replicas() = 0;
  
+  /**
+   *  "forget" all replica reservations. No messages are sent to the
+   *  previously-reserved.
+   *
+   *  Used upon interval change. The replicas' state is guaranteed to
+   *  be reset separately by the interval-change event.
+   */
+  virtual void discard_replica_reservations() = 0;
+
    /**
     * clear both local and OSD-managed resource reservation flags
-   * (note: no replica un/reservation messages are involved!)
     */
    virtual void clear_scrub_reservations() = 0;
author	Ronen Friedman <rfriedma@redhat.com>
	Mon, 28 Dec 2020 16:49:34 +0000 (18:49 +0200)
committer	Ronen Friedman <rfriedma@redhat.com>
	Wed, 13 Jan 2021 13:27:06 +0000 (15:27 +0200)
src/osd/PG.cc		patch \| blob \| history
src/osd/PrimaryLogPG.cc		patch \| blob \| history
src/osd/pg_scrubber.cc		patch \| blob \| history
src/osd/pg_scrubber.h		patch \| blob \| history
src/osd/scrub_machine.cc		patch \| blob \| history
src/osd/scrub_machine.h		patch \| blob \| history
src/osd/scrubber_common.h		patch \| blob \| history