]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: bypass messenger for local EC reads
authorRadoslaw Zarzynski <rzarzyns@redhat.com>
Mon, 21 Apr 2025 08:49:55 +0000 (08:49 +0000)
committerAlex Ainscow <aainscow@uk.ibm.com>
Wed, 17 Sep 2025 08:43:26 +0000 (09:43 +0100)
Signed-off-by: Radoslaw Zarzynski <rzarzyns@redhat.com>
(cherry picked from commit b07d1f67625c8b621b2ebf5a7f744c588cae99d3)

src/osd/ECBackend.cc
src/osd/ECBackend.h
src/osd/ECCommon.cc
src/osd/ECCommon.h

index 3cdf378d1f83a2c7d0320db69f46cdcb8f06b2a4..4a6530c14b1cdacdfae1e46d8ebf27c245b60830 100644 (file)
@@ -75,7 +75,11 @@ ECBackend::ECBackend(
   ECSwitch *s,
   ECExtentCache::LRU &ec_extent_cache_lru)
   : parent(pg), cct(cct), switcher(s),
+#ifdef WITH_CRIMSON
+    read_pipeline(cct, ec_impl, this->sinfo, get_parent()->get_eclistener(), *this),
+#else
     read_pipeline(cct, ec_impl, this->sinfo, get_parent()->get_eclistener()),
+#endif
     rmw_pipeline(cct, ec_impl, this->sinfo, get_parent()->get_eclistener(),
                  *this, ec_extent_cache_lru),
     recovery_backend(cct, switcher->coll, ec_impl, this->sinfo, read_pipeline,
@@ -570,6 +574,16 @@ void ECBackend::handle_sub_read(
   reply->tid = op.tid;
 }
 
+void ECBackend::handle_sub_read_n_reply(
+  pg_shard_t from,
+  ECSubRead &op,
+  const ZTracer::Trace &trace)
+{
+  ECSubReadReply reply;
+  handle_sub_read(from, op, &reply, trace);
+  handle_sub_read_reply(from, reply, trace);
+}
+
 void ECBackend::handle_sub_write_reply(
   pg_shard_t from,
   const ECSubWriteReply &ec_write_reply_op,
index 1796d03f14a9dbc881237fdc0c7033c0439636b9..4d116a582a2c94e552fd33d6c3d48720e878cfee 100644 (file)
@@ -82,6 +82,15 @@ class ECBackend : public ECCommon {
       ECSubReadReply *reply,
       const ZTracer::Trace &trace
     );
+  void handle_sub_read_n_reply(
+    pg_shard_t from,
+    ECSubRead &op,
+    const ZTracer::Trace &trace
+#ifdef WITH_CRIMSON
+    ) override;
+#else
+    );
+#endif
   void handle_sub_write_reply(
       pg_shard_t from,
       const ECSubWriteReply &op,
index 02f5d4984d8368bfa597770086a60cd75e6d65d5..20ad999c8cb7649e58a193f2905591510673ac7d 100644 (file)
@@ -496,12 +496,19 @@ void ECCommon::ReadPipeline::do_read_op(ReadOp &rop) {
     ceph_assert(reads_sent);
   }
 
+  std::optional<ECSubRead> local_read_op;
   std::vector<std::pair<int, Message*>> m;
   m.reserve(messages.size());
   for (auto &&[pg_shard, read]: messages) {
     rop.in_progress.insert(pg_shard);
     shard_to_read_map[pg_shard].insert(rop.tid);
     read.tid = tid;
+#ifdef WITH_CRIMSON // crimson only
+    if (pg_shard == get_parent()->whoami_shard()) {
+      local_read_op = std::move(read);
+      continue;
+    }
+#endif
     auto *msg = new MOSDECSubOpRead;
     msg->set_priority(priority);
     msg->pgid = spg_t(get_info().pgid.pgid, pg_shard.shard);
@@ -516,11 +523,22 @@ void ECCommon::ReadPipeline::do_read_op(ReadOp &rop) {
       msg->trace.keyval("shard", pg_shard.shard.id);
     }
     m.push_back(std::make_pair(pg_shard.osd, msg));
+    dout(10) << __func__ << ": will send msg " << *msg
+             << " to osd." << pg_shard << dendl;
   }
   if (!m.empty()) {
     get_parent()->send_message_osd_cluster(m, get_osdmap_epoch());
   }
 
+#if WITH_CRIMSON
+  if (local_read_op) {
+    dout(10) << __func__ << ": doing local read for " << rop << dendl;
+    handle_sub_read_n_reply(
+      get_parent()->whoami_shard(),
+      *local_read_op,
+      rop.trace);
+  }
+#endif
   dout(10) << __func__ << ": started " << rop << dendl;
 }
 
index 2c2684155d3fd11b4224e8873d80120dc1f480df..6e9d3a6a7b8876fdd6e2b4459a7199d3f4e62f31 100644 (file)
@@ -51,6 +51,7 @@ typedef crimson::osd::ObjectContextRef ObjectContextRef;
 //forward declaration
 struct ECBackend;
 struct ECSubWrite;
+struct ECSubRead;
 struct PGLog;
 struct RecoveryMessages;
 
@@ -76,6 +77,14 @@ struct ECCommon {
       const ZTracer::Trace &trace,
       ECListener &eclistener) = 0;
 
+#ifdef WITH_CRIMSON
+  virtual void handle_sub_read_n_reply(
+    pg_shard_t from,
+    ECSubRead &op,
+    const ZTracer::Trace &trace
+    ) = 0;
+#endif
+
   virtual void objects_read_and_reconstruct(
       const std::map<hobject_t, std::list<ec_align_t>> &reads,
       bool fast_read,
@@ -348,6 +357,9 @@ struct ECCommon {
     const ECUtil::stripe_info_t &sinfo;
     // TODO: lay an interface down here
     ECListener *parent;
+#ifdef WITH_CRIMSON
+    ECCommon &ec_backend;
+#endif
 
     ECListener *get_parent() const { return parent; }
 
@@ -364,11 +376,21 @@ struct ECCommon {
     ReadPipeline(CephContext *cct,
                  ceph::ErasureCodeInterfaceRef ec_impl,
                  const ECUtil::stripe_info_t &sinfo,
+#ifdef WITH_CRIMSON
+                 ECListener *parent,
+                 ECCommon &ec_backend)
+#else
                  ECListener *parent)
+#endif
       : cct(cct),
         ec_impl(std::move(ec_impl)),
         sinfo(sinfo),
+#ifdef WITH_CRIMSON
+        parent(parent),
+        ec_backend(ec_backend) {}
+#else
         parent(parent) {}
+#endif
 
     /**
      * While get_want_to_read_shards creates a want_to_read based on the EC
@@ -416,6 +438,15 @@ struct ECCommon {
         const std::optional<std::set<pg_shard_t>> &error_shards = std::nullopt
         //< [in] Shards where reads have failed (optional)
       ); ///< @return error code, 0 on success
+
+#ifdef WITH_CRIMSON
+    void handle_sub_read_n_reply(
+      pg_shard_t from,
+      ECSubRead &op,
+      const ZTracer::Trace &trace) {
+      ec_backend.handle_sub_read_n_reply(from, op, trace);
+    }
+#endif
   };
 
   /**