osd/scrub: only telling the scrubber of 'updates' events if these events are waited for

author Ronen Friedman <rfriedma@redhat.com>

Sun, 8 Aug 2021 14:14:55 +0000 (14:14 +0000)

committer Ronen Friedman <rfriedma@redhat.com>

Wed, 11 Aug 2021 13:48:21 +0000 (16:48 +0300)
author Ronen Friedman <rfriedma@redhat.com>
Sun, 8 Aug 2021 14:14:55 +0000 (14:14 +0000)
committer Ronen Friedman <rfriedma@redhat.com>
Wed, 11 Aug 2021 13:48:21 +0000 (16:48 +0300)
diff --git a/src/messages/MOSDRepScrub.h b/src/messages/MOSDRepScrub.h

index 5b9eaae18545a6bb0f019865e6e1fb7e362da135..ededcdca8c9490c3882db7eefcb863b24d1155f3 100644 (file)
--- a/src/messages/MOSDRepScrub.h
+++ b/src/messages/MOSDRepScrub.h
@@ -29,7 +29,7 @@ public:
  
    spg_t pgid;             // PG to scrub
    eversion_t scrub_from; // only scrub log entries after scrub_from
-  eversion_t scrub_to;   // last_update_applied when message sent
+  eversion_t scrub_to;   // last_update_applied when message sent (not used)
    epoch_t map_epoch = 0, min_epoch = 0;
    bool chunky;           // true for chunky scrubs
    hobject_t start;       // lower bound of scrub, inclusive
diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc

index 64e596b107acb47870bac029a4c439c4cfc17059..024ea91f4cf7e531ec4eb0ea10f15d9afc9e1c14 100644 (file)
--- a/src/osd/PrimaryLogPG.cc
+++ b/src/osd/PrimaryLogPG.cc
@@ -11278,8 +11278,10 @@ void PrimaryLogPG::op_applied(const eversion_t &applied_version)
    ceph_assert(applied_version <= info.last_update);
    recovery_state.local_write_applied(applied_version);
  
-  if (is_primary() && m_scrubber->should_requeue_blocked_ops(recovery_state.get_last_update_applied())) {
-    osd->queue_scrub_applied_update(this, is_scrub_blocking_ops());
+  if (is_primary() && m_scrubber) {
+    // if there's a scrub operation waiting for the selected chunk to be fully updated -
+    // allow it to continue
+    m_scrubber->on_applied_when_primary(recovery_state.get_last_update_applied());
    }
  }
  
diff --git a/src/osd/pg_scrubber.cc b/src/osd/pg_scrubber.cc

index 0a98f5addc0edb8d4b8cbd5f7a7972613921bb46..0fc96cfca245b27bd925a25b1befa575e6259956 100644 (file)
--- a/src/osd/pg_scrubber.cc
+++ b/src/osd/pg_scrubber.cc
@@ -558,6 +558,17 @@ void PgScrubber::set_subset_last_update(eversion_t e)
    dout(15) << __func__ << " last-update: " << e << dendl;
  }
  
+void PgScrubber::on_applied_when_primary(const eversion_t& applied_version)
+{
+  // we are only interested in updates if we are the Primary, and in state
+  // WaitLastUpdate
+  if (m_fsm->is_accepting_updates() && (applied_version >= m_subset_last_update)) {
+    m_osds->queue_scrub_applied_update(m_pg, m_pg->is_scrub_blocking_ops());
+    dout(15) << __func__ << " update: " << applied_version
+            << " vs. required: " << m_subset_last_update << dendl;
+  }
+}
+
  /*
   * The selected range is set directly into 'm_start' and 'm_end'
   * setting:
@@ -1111,6 +1122,7 @@ int PgScrubber::build_scrub_map_chunk(
    while (!pos.done()) {
  
      int r = m_pg->get_pgbackend()->be_scan_list(map, pos);
+    dout(30) << __func__ << " BE returned " << r << dendl;
      if (r == -EINPROGRESS) {
        dout(20) << __func__ << " in progress" << dendl;
        return r;
@@ -1201,7 +1213,6 @@ void PgScrubber::replica_scrub_op(OpRequestRef op)
    // are we still processing a previous scrub-map request without noticing that the
    // interval changed? won't see it here, but rather at the reservation stage.
  
-
    if (msg->map_epoch < m_pg->info.history.same_interval_since) {
      dout(10) << "replica_scrub_op discarding old replica_scrub from " << msg->map_epoch
              << " < " << m_pg->info.history.same_interval_since << dendl;
@@ -1465,12 +1476,13 @@ void PgScrubber::handle_scrub_reserve_request(OpRequestRef op)
     *  otherwise the interval would have changed.
     *  Ostensibly we can discard & redo the reservation. But then we
     *  will be temporarily releasing the OSD resource - and might not be able to grab it
-   *  again. Thus we simple treat this as a successful new request.
+   *  again. Thus, we simply treat this as a successful new request.
     */
  
    if (m_remote_osd_resource.has_value() && m_remote_osd_resource->is_stale()) {
      // we are holding a stale reservation from a past epoch
      m_remote_osd_resource.reset();
+    dout(10) << __func__ << " stale reservation request" << dendl;
    }
  
    if (request_ep < m_pg->get_same_interval_since()) {
diff --git a/src/osd/pg_scrubber.h b/src/osd/pg_scrubber.h

index 0145ae6515f285f7d98fe438af2d94f1e535b218..fdee52e6f353240312462f1363fc4d7ae4d6aa14 100644 (file)
--- a/src/osd/pg_scrubber.h
+++ b/src/osd/pg_scrubber.h
@@ -214,6 +214,13 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
    void send_sched_replica(epoch_t epoch_queued) final;
  
    void send_replica_pushes_upd(epoch_t epoch_queued) final;
+  /**
+   *  The PG has updated its 'applied version'. It might be that we are waiting for this
+   *  information: after selecting a range of objects to scrub, we've marked the latest
+   *  version of these objects in m_subset_last_update. We will not start the map building
+   *  before we know that the PG has reached this version.
+   */
+  void on_applied_when_primary(const eversion_t& applied_version) final;
  
    void send_full_reset(epoch_t epoch_queued) final;
  
@@ -345,6 +352,8 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
    // -------------------------------------------------------------------------------------------
    // the I/F used by the state-machine (i.e. the implementation of ScrubMachineListener)
  
+  [[nodiscard]] bool is_primary() const final { return m_pg->recovery_state.is_primary(); }
+
    void select_range_n_notify() final;
  
    Scrub::BlockedRangeWarning acquire_blocked_alarm() final;
@@ -417,8 +426,6 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener {
    void state_set(uint64_t m) { m_pg->state_set(m); }
    void state_clear(uint64_t m) { m_pg->state_clear(m); }
  
-  [[nodiscard]] bool is_primary() const { return m_pg->recovery_state.is_primary(); }
-
    [[nodiscard]] bool is_scrub_registered() const;
  
    virtual void _scrub_clear_state() {}
diff --git a/src/osd/scrub_machine.cc b/src/osd/scrub_machine.cc

index d34d00446d7fda693d4fe3482f2129034a7bc1da..edee613ffa0f88f4fd99882ac6f1ebfeec1adce8 100644 (file)
--- a/src/osd/scrub_machine.cc
+++ b/src/osd/scrub_machine.cc
@@ -60,6 +60,14 @@ bool ScrubMachine::is_reserving() const
    return state_cast<const ReservingReplicas*>();
  }
  
+bool ScrubMachine::is_accepting_updates() const
+{
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  ceph_assert(scrbr->is_primary());
+
+  return state_cast<const WaitLastUpdate*>();
+}
+
  // for the rest of the code in this file - we know what PG we are dealing with:
  #undef dout_prefix
  #define dout_prefix _prefix(_dout, this->context<ScrubMachine>().m_pg)
@@ -242,6 +250,15 @@ WaitLastUpdate::WaitLastUpdate(my_context ctx) : my_base(ctx)
    post_event(UpdatesApplied{});
  }
  
+/**
+ *  Note:
+ *  Updates are locally readable immediately. Thus, on the replicas we do need
+ *  to wait for the update notifications before scrubbing. For the Primary it's
+ *  a bit different: on EC (and only there) rmw operations have an additional
+ *  read roundtrip. That means that on the Primary we need to wait for
+ *  last_update_applied (the replica side, even on EC, is still safe
+ *  since the actual transaction will already be readable by commit time.
+ */
  void WaitLastUpdate::on_new_updates(const UpdatesApplied&)
  {
    DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
diff --git a/src/osd/scrub_machine.h b/src/osd/scrub_machine.h

index 4c8b0d4121bc232b186031c179483efba7843755..dd44f106152844d873f200b07c75f4091893c6ec 100644 (file)
--- a/src/osd/scrub_machine.h
+++ b/src/osd/scrub_machine.h
@@ -74,7 +74,7 @@ MEV(ChunkIsBusy)
  MEV(ActivePushesUpd)  ///< Update to active_pushes. 'active_pushes' represents recovery
                       ///< that is in-flight to the local ObjectStore
  
-MEV(UpdatesApplied)  // external
+MEV(UpdatesApplied)  ///< (Primary only) all updates are committed
  
  MEV(InternalAllUpdates)         ///< the internal counterpart of UpdatesApplied
  
@@ -90,8 +90,6 @@ MEV(IntLocalMapDone)
  MEV(DigestUpdate)  ///< external. called upon success of a MODIFY op. See
                    ///< scrub_snapshot_metadata()
  
-MEV(AllChunksDone)
-
  MEV(MapsCompared)  ///< (Crimson) maps_compare_n_cleanup() transactions are done
  
  MEV(StartReplica)  ///< initiating replica scrub.
@@ -133,6 +131,7 @@ class ScrubMachine : public sc::state_machine<ScrubMachine, NotActive> {
    void my_states() const;
    void assert_not_active() const;
    [[nodiscard]] bool is_reserving() const;
+  [[nodiscard]] bool is_accepting_updates() const;
  };
  
  /**
@@ -192,13 +191,9 @@ struct ActiveScrubbing : sc::state<ActiveScrubbing, ScrubMachine, PendingTimer>
    ~ActiveScrubbing();
  
    using reactions = mpl::list<
-    // done scrubbing
-    sc::transition<AllChunksDone, NotActive>,
-
      sc::custom_reaction<InternalError>,
      sc::custom_reaction<FullReset>>;
  
-  sc::result react(const AllChunksDone&);
    sc::result react(const FullReset&);
    sc::result react(const InternalError&);
  };
diff --git a/src/osd/scrub_machine_lstnr.h b/src/osd/scrub_machine_lstnr.h

index 167e5f618f51888284487e4ec09545ddd733c926..91dee910af2c7db0b5e0fb8fa6083641db6e737d 100644 (file)
--- a/src/osd/scrub_machine_lstnr.h
+++ b/src/osd/scrub_machine_lstnr.h
@@ -63,6 +63,8 @@ struct ScrubMachineListener {
  
    virtual ~ScrubMachineListener() = default;
  
+  [[nodiscard]] virtual bool is_primary() const = 0;
+
    virtual void select_range_n_notify() = 0;
  
    virtual Scrub::BlockedRangeWarning acquire_blocked_alarm() = 0;
diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h

index 30883bf8bf38530f7b73a8aea2f6badd2cec9880..22913875813ebc0a723ac99d6d832bb9d2dd18ea 100644 (file)
--- a/src/osd/scrubber_common.h
+++ b/src/osd/scrubber_common.h
@@ -152,6 +152,8 @@ struct ScrubPgIF {
  
    virtual void send_maps_compared(epoch_t epoch_queued) = 0;
  
+  virtual void on_applied_when_primary(const eversion_t &applied_version) = 0;
+
    // --------------------------------------------------
  
    [[nodiscard]] virtual bool are_callbacks_pending()
author	Ronen Friedman <rfriedma@redhat.com>
	Sun, 8 Aug 2021 14:14:55 +0000 (14:14 +0000)
committer	Ronen Friedman <rfriedma@redhat.com>
	Wed, 11 Aug 2021 13:48:21 +0000 (16:48 +0300)
src/messages/MOSDRepScrub.h		patch \| blob \| history
src/osd/PrimaryLogPG.cc		patch \| blob \| history
src/osd/pg_scrubber.cc		patch \| blob \| history
src/osd/pg_scrubber.h		patch \| blob \| history
src/osd/scrub_machine.cc		patch \| blob \| history
src/osd/scrub_machine.h		patch \| blob \| history
src/osd/scrub_machine_lstnr.h		patch \| blob \| history
src/osd/scrubber_common.h		patch \| blob \| history