crimson/osd: send empty transactions to backfill targets that haven't

author Xuehan Xu <xuxuehan@qianxin.com>

Sun, 4 Aug 2024 10:59:05 +0000 (18:59 +0800)

committer Xuehan Xu <xuxuehan@qianxin.com>

Sun, 4 Aug 2024 10:59:05 +0000 (18:59 +0800)
author Xuehan Xu <xuxuehan@qianxin.com>
Sun, 4 Aug 2024 10:59:05 +0000 (18:59 +0800)
committer Xuehan Xu <xuxuehan@qianxin.com>
Sun, 4 Aug 2024 10:59:05 +0000 (18:59 +0800)
diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc

index c195241cc5d8ae6e821a132a64c1933af87c939e..a681705deee07f91a45b7ef7ee1f1d5dd8e5d79d 100644 (file)
--- a/src/crimson/osd/pg.cc
+++ b/src/crimson/osd/pg.cc
@@ -1698,6 +1698,28 @@ bool PG::is_degraded_or_backfilling_object(const hobject_t& soid) const {
    return false;
  }
  
+bool PG::should_send_op(
+  pg_shard_t peer,
+  const hobject_t &hoid) const
+{
+  if (peer == get_primary())
+    return true;
+  bool should_send =
+    (hoid.pool != (int64_t)get_info().pgid.pool() ||
+    (has_backfill_state() && hoid <= get_last_backfill_started()) ||
+    hoid <= peering_state.get_peer_info(peer).last_backfill);
+  if (!should_send) {
+    ceph_assert(is_backfill_target(peer));
+    logger().debug("{} issue_repop shipping empty opt to osd."
+                   "{}, object {} beyond std::max(last_backfill_started, "
+                   "peer_info[peer].last_backfill {})",
+                   peer, hoid, peering_state.get_peer_info(peer).last_backfill);
+  }
+  return should_send;
+  // TODO: should consider async recovery cases in the future which are not supported
+  //       by crimson yet
+}
+
  PG::interruptible_future<std::optional<PG::complete_op_t>>
  PG::already_complete(const osd_reqid_t& reqid)
  {
diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h

index 252709dea4dd903a39bb202c19485e2d6f93725c..a9f45d329cb4a3d2eddf8bc29ecbf9de3cf6d3be 100644 (file)
--- a/src/crimson/osd/pg.h
+++ b/src/crimson/osd/pg.h
@@ -521,6 +521,7 @@ public:
    bool get_need_up_thru() const {
      return peering_state.get_need_up_thru();
    }
+  bool should_send_op(pg_shard_t peer, const hobject_t &hoid) const;
    epoch_t get_same_interval_since() const {
      return get_info().history.same_interval_since;
    }
@@ -740,6 +741,15 @@ public:
    PeeringState& get_peering_state() final {
      return peering_state;
    }
+  bool has_backfill_state() const {
+    return (bool)(recovery_handler->backfill_state);
+  }
+  const BackfillState& get_backfill_state() const {
+    return *recovery_handler->backfill_state;
+  }
+  hobject_t get_last_backfill_started() const {
+    return get_backfill_state().get_last_backfill_started();
+  }
    bool has_reset_since(epoch_t epoch) const final {
      return peering_state.pg_has_reset_since(epoch);
    }
diff --git a/src/crimson/osd/replicated_backend.cc b/src/crimson/osd/replicated_backend.cc

index d01fd6468034a3f59d3a0869593beec8345e461f..d227b9c89e9750062ab5bab2b54766caf1d20d7e 100644 (file)
--- a/src/crimson/osd/replicated_backend.cc
+++ b/src/crimson/osd/replicated_backend.cc
@@ -65,7 +65,14 @@ ReplicatedBackend::_submit_transaction(std::set<pg_shard_t>&& pg_shards,
         min_epoch,
         tid,
         osd_op_p.at_version);
-      m->set_data(encoded_txn);
+      if (pg.should_send_op(pg_shard, hoid)) {
+       m->set_data(encoded_txn);
+      } else {
+       ceph::os::Transaction t;
+       bufferlist bl;
+       encode(t, bl);
+       m->set_data(bl);
+      }
        pending_txn->second.acked_peers.push_back({pg_shard, eversion_t{}});
        encode(log_entries, m->logbl);
        m->pg_trim_to = osd_op_p.pg_trim_to;
author	Xuehan Xu <xuxuehan@qianxin.com>
	Sun, 4 Aug 2024 10:59:05 +0000 (18:59 +0800)
committer	Xuehan Xu <xuxuehan@qianxin.com>
	Sun, 4 Aug 2024 10:59:05 +0000 (18:59 +0800)
src/crimson/osd/pg.cc		patch \| blob \| history
src/crimson/osd/pg.h		patch \| blob \| history
src/crimson/osd/replicated_backend.cc		patch \| blob \| history