osd/scrub: only telling the scrubber of 'updates' events if these events are waited for

author Ronen Friedman <rfriedma@redhat.com>

Sun, 8 Aug 2021 14:14:55 +0000 (14:14 +0000)

committer Ronen Friedman <rfriedma@redhat.com>

Sat, 12 Mar 2022 15:43:35 +0000 (15:43 +0000)
author Ronen Friedman <rfriedma@redhat.com>
Sun, 8 Aug 2021 14:14:55 +0000 (14:14 +0000)
committer Ronen Friedman <rfriedma@redhat.com>
Sat, 12 Mar 2022 15:43:35 +0000 (15:43 +0000)
diff --git a/src/messages/MOSDRepScrub.h b/src/messages/MOSDRepScrub.h

index 5b9eaae18545a6bb0f019865e6e1fb7e362da135..ededcdca8c9490c3882db7eefcb863b24d1155f3 100644 (file)
--- a/src/messages/MOSDRepScrub.h
+++ b/src/messages/MOSDRepScrub.h
@@ -29,7 +29,7 @@ public:
  
    spg_t pgid;             // PG to scrub
    eversion_t scrub_from; // only scrub log entries after scrub_from
-  eversion_t scrub_to;   // last_update_applied when message sent
+  eversion_t scrub_to;   // last_update_applied when message sent (not used)
    epoch_t map_epoch = 0, min_epoch = 0;
    bool chunky;           // true for chunky scrubs
    hobject_t start;       // lower bound of scrub, inclusive
diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc

index ff97b21e0e4cb73702f8a9399d122bf032084dd6..6494dddf09914ceae57609b1109bf99f28ec2c3d 100644 (file)
--- a/src/osd/PrimaryLogPG.cc
+++ b/src/osd/PrimaryLogPG.cc
@@ -10962,8 +10962,10 @@ void PrimaryLogPG::op_applied(const eversion_t &applied_version)
    ceph_assert(applied_version <= info.last_update);
    recovery_state.local_write_applied(applied_version);
  
-  if (is_primary() && m_scrubber->should_requeue_blocked_ops(recovery_state.get_last_update_applied())) {
-    osd->queue_scrub_applied_update(this, is_scrub_blocking_ops());
+  if (is_primary() && m_scrubber) {
+    // if there's a scrub operation waiting for the selected chunk to be fully updated -
+    // allow it to continue
+    m_scrubber->on_applied_when_primary(recovery_state.get_last_update_applied());
    }
  }
  
diff --git a/src/osd/pg_scrubber.cc b/src/osd/pg_scrubber.cc

index 089a026b08e502417900348f6786cfe23cc686f9..ad8c2c6d774a2b1b658876566be8357563cba227 100644 (file)
--- a/src/osd/pg_scrubber.cc
+++ b/src/osd/pg_scrubber.cc
@@ -484,6 +484,17 @@ void PgScrubber::set_subset_last_update(eversion_t e)
    dout(15) << __func__ << " last-update: " << e << dendl;
  }
  
+void PgScrubber::on_applied_when_primary(const eversion_t& applied_version)
+{
+  // we are only interested in updates if we are the Primary, and in state
+  // WaitLastUpdate
+  if (m_fsm->is_accepting_updates() && (applied_version >= m_subset_last_update)) {
+    m_osds->queue_scrub_applied_update(m_pg, m_pg->is_scrub_blocking_ops());
+    dout(15) << __func__ << " update: " << applied_version
+            << " vs. required: " << m_subset_last_update << dendl;
+  }
+}
+
  /*
   * setting:
   * - m_subset_last_update
@@ -991,7 +1002,7 @@ int PgScrubber::build_scrub_map_chunk(
    // scan objects
    while (!pos.done()) {
      int r = m_pg->get_pgbackend()->be_scan_list(map, pos);
-    dout(10) << __func__ << " be r " << r << dendl;
+    dout(30) << __func__ << " BE returned " << r << dendl;
      if (r == -EINPROGRESS) {
        dout(20) << __func__ << " in progress" << dendl;
        return r;
@@ -1086,7 +1097,6 @@ void PgScrubber::replica_scrub_op(OpRequestRef op)
    // are we still processing a previous scrub-map request without noticing that the
    // interval changed? won't see it here, but rather at the reservation stage.
  
-
    if (msg->map_epoch < m_pg->info.history.same_interval_since) {
      dout(10) << "replica_scrub_op discarding old replica_scrub from " << msg->map_epoch
              << " < " << m_pg->info.history.same_interval_since << dendl;
@@ -1336,12 +1346,13 @@ void PgScrubber::handle_scrub_reserve_request(OpRequestRef op)
     *  otherwise the interval would have changed.
     *  Ostensibly we can discard & redo the reservation. But then we
     *  will be temporarily releasing the OSD resource - and might not be able to grab it
-   *  again. Thus we simple treat this as a successful new request.
+   *  again. Thus, we simply treat this as a successful new request.
     */
  
    if (m_remote_osd_resource.has_value() && m_remote_osd_resource->is_stale()) {
      // we are holding a stale reservation from a past epoch
      m_remote_osd_resource.reset();
+    dout(10) << __func__ << " stale reservation request" << dendl;
    }
  
    if (request_ep < m_pg->get_same_interval_since()) {
diff --git a/src/osd/pg_scrubber.h b/src/osd/pg_scrubber.h

index 176a00a23e9f926102d988363d29e4ccb3ee01e4..56b27d7848d49cc4b6338bac4c8717eb93f9e7cd 100644 (file)
--- a/src/osd/pg_scrubber.h
+++ b/src/osd/pg_scrubber.h
@@ -212,6 +212,13 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
    void send_sched_replica(epoch_t epoch_queued) final;
  
    void send_replica_pushes_upd(epoch_t epoch_queued) final;
+  /**
+   *  The PG has updated its 'applied version'. It might be that we are waiting for this
+   *  information: after selecting a range of objects to scrub, we've marked the latest
+   *  version of these objects in m_subset_last_update. We will not start the map building
+   *  before we know that the PG has reached this version.
+   */
+  void on_applied_when_primary(const eversion_t& applied_version) final;
  
    /**
     *  we allow some number of preemptions of the scrub, which mean we do
diff --git a/src/osd/scrub_machine.cc b/src/osd/scrub_machine.cc

index 2a2ee8732bda928de2464eec32d8e16154020928..e8ae1bc561760216278541cda91100741875c5d4 100644 (file)
--- a/src/osd/scrub_machine.cc
+++ b/src/osd/scrub_machine.cc
@@ -60,6 +60,14 @@ bool ScrubMachine::is_reserving() const
    return state_cast<const ReservingReplicas*>();
  }
  
+bool ScrubMachine::is_accepting_updates() const
+{
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  ceph_assert(scrbr->is_primary());
+
+  return state_cast<const WaitLastUpdate*>();
+}
+
  // for the rest of the code in this file - we know what PG we are dealing with:
  #undef dout_prefix
  #define dout_prefix _prefix(_dout, this->context<ScrubMachine>().m_pg)
@@ -236,6 +244,15 @@ WaitLastUpdate::WaitLastUpdate(my_context ctx) : my_base(ctx)
    post_event(UpdatesApplied{});
  }
  
+/**
+ *  Note:
+ *  Updates are locally readable immediately. Thus, on the replicas we do need
+ *  to wait for the update notifications before scrubbing. For the Primary it's
+ *  a bit different: on EC (and only there) rmw operations have an additional
+ *  read roundtrip. That means that on the Primary we need to wait for
+ *  last_update_applied (the replica side, even on EC, is still safe
+ *  since the actual transaction will already be readable by commit time.
+ */
  void WaitLastUpdate::on_new_updates(const UpdatesApplied&)
  {
    DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
diff --git a/src/osd/scrub_machine.h b/src/osd/scrub_machine.h

index 95035389e9ab69dba70b739b042ebca89e12597c..7386163d8601a613f33729705b934955d896e646 100644 (file)
--- a/src/osd/scrub_machine.h
+++ b/src/osd/scrub_machine.h
@@ -64,7 +64,7 @@ MEV(SelectedChunkFree)
  MEV(ChunkIsBusy)
  MEV(ActivePushesUpd)    ///< Update to active_pushes. 'active_pushes' represents recovery
                          ///< that is in-flight to the local ObjectStore
-MEV(UpdatesApplied)     // external
+MEV(UpdatesApplied)     ///< (Primary only) all updates are committed
  MEV(InternalAllUpdates)         ///< the internal counterpart of UpdatesApplied
  MEV(GotReplicas)        ///< got a map from a replica
  
@@ -76,8 +76,6 @@ MEV(IntLocalMapDone)
  
  MEV(DigestUpdate)  ///< external. called upon success of a MODIFY op. See
                    ///< scrub_snapshot_metadata()
-MEV(AllChunksDone)
-
  MEV(StartReplica)       ///< initiating replica scrub. replica_scrub_op() -> OSD Q ->
                          ///< replica_scrub()
  MEV(StartReplicaNoWait)         ///< 'start replica' when there are no pending updates
@@ -112,6 +110,7 @@ class ScrubMachine : public sc::state_machine<ScrubMachine, NotActive> {
    void my_states() const;
    void assert_not_active() const;
    [[nodiscard]] bool is_reserving() const;
+  [[nodiscard]] bool is_accepting_updates() const;
  };
  
  /**
@@ -170,13 +169,9 @@ struct ActiveScrubbing : sc::state<ActiveScrubbing, ScrubMachine, PendingTimer>
    ~ActiveScrubbing();
  
    using reactions = mpl::list<
-    // done scrubbing
-    sc::transition<AllChunksDone, NotActive>,
-
      sc::custom_reaction<InternalError>,
      sc::custom_reaction<FullReset>>;
  
-  sc::result react(const AllChunksDone&);
    sc::result react(const FullReset&);
    sc::result react(const InternalError&);
  };
diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h

index 15a6cdf4dede485e14f395feff4d6bcfd8280820..ab6f84e88f54c6987493359fa45c7e9050968241 100644 (file)
--- a/src/osd/scrubber_common.h
+++ b/src/osd/scrubber_common.h
@@ -138,6 +138,22 @@ struct ScrubPgIF {
  
    virtual void send_sched_replica(epoch_t epoch_queued) = 0;
  
+//   virtual void send_full_reset(epoch_t epoch_queued) = 0;
+// 
+//   virtual void send_chunk_free(epoch_t epoch_queued) = 0;
+// 
+//   virtual void send_chunk_busy(epoch_t epoch_queued) = 0;
+// 
+//   virtual void send_local_map_done(epoch_t epoch_queued) = 0;
+// 
+//   virtual void send_get_next_chunk(epoch_t epoch_queued) = 0;
+// 
+//   virtual void send_scrub_is_finished(epoch_t epoch_queued) = 0;
+// 
+//   virtual void send_maps_compared(epoch_t epoch_queued) = 0;
+
+  virtual void on_applied_when_primary(const eversion_t &applied_version) = 0;
+
    // --------------------------------------------------
  
    [[nodiscard]] virtual bool are_callbacks_pending()
author	Ronen Friedman <rfriedma@redhat.com>
	Sun, 8 Aug 2021 14:14:55 +0000 (14:14 +0000)
committer	Ronen Friedman <rfriedma@redhat.com>
	Sat, 12 Mar 2022 15:43:35 +0000 (15:43 +0000)
src/messages/MOSDRepScrub.h		patch \| blob \| history
src/osd/PrimaryLogPG.cc		patch \| blob \| history
src/osd/pg_scrubber.cc		patch \| blob \| history
src/osd/pg_scrubber.h		patch \| blob \| history
src/osd/scrub_machine.cc		patch \| blob \| history
src/osd/scrub_machine.h		patch \| blob \| history
src/osd/scrubber_common.h		patch \| blob \| history