]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore/device: add readv
authorXuehan Xu <xuxuehan@qianxin.com>
Fri, 15 Aug 2025 11:12:18 +0000 (19:12 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Thu, 4 Dec 2025 08:24:43 +0000 (16:24 +0800)
Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/os/seastore/device.h
src/crimson/os/seastore/random_block_manager/nvme_block_device.cc
src/crimson/os/seastore/random_block_manager/nvme_block_device.h
src/crimson/os/seastore/random_block_manager/rbm_device.cc
src/crimson/os/seastore/random_block_manager/rbm_device.h
src/crimson/os/seastore/segment_manager/block.cc
src/crimson/os/seastore/segment_manager/block.h
src/crimson/os/seastore/segment_manager/ephemeral.cc
src/crimson/os/seastore/segment_manager/ephemeral.h
src/crimson/os/seastore/segment_manager/zbd.cc
src/crimson/os/seastore/segment_manager/zbd.h

index 5ef1b64217110d4a7aed15aced56a674df2cbe8c..d6f2a2f862e741202fb2c14ccd569dbfa58c961e 100644 (file)
@@ -155,6 +155,8 @@ public:
     size_t len,
     ceph::bufferptr &out) = 0;
 
+  virtual read_ertr::future<> readv(paddr_t addr, std::vector<bufferptr> vecs) = 0;
+
   read_ertr::future<ceph::bufferptr> read(
     paddr_t addr,
     size_t len
index 8938b8f344e844fe8e7226c9a2f6a34f2fe47b65..76cbd39e07a37d1f9483bd8c618b2e54540e1a22 100644 (file)
@@ -165,6 +165,39 @@ read_ertr::future<> NVMeBlockDevice::read(
   }
 }
 
+read_ertr::future<> NVMeBlockDevice::_readv(
+  uint64_t offset,
+  std::vector<bufferptr> ptrs) {
+  LOG_PREFIX(NVMeBlockDevice::_readv);
+  DEBUG("block: read offset {}, {} buffers", offset, ptrs.size());
+  if (ptrs.size() == 0) {
+    return read_ertr::now();
+  }
+
+  if (is_end_to_end_data_protection()) {
+    return nvme_readv(offset, std::move(ptrs));
+  }
+  std::vector<iovec> iov;
+  size_t length = 0;
+  for (auto &ptr : ptrs) {
+    length += ptr.length();
+    assert((ptr.length() % super.block_size) == 0);
+    iov.emplace_back(ptr.c_str(), ptr.length());
+  }
+  return device.dma_read(offset, std::move(iov)
+  ).handle_exception(
+    [FNAME](auto e) -> read_ertr::future<size_t> {
+      ERROR("read: dma_read got error{}", e);
+      return crimson::ct_error::input_output_error::make();
+    }).then([length, FNAME](auto result) -> read_ertr::future<> {
+      if (result != length) {
+        ERROR("read: dma_read got error with not proper length");
+        return crimson::ct_error::input_output_error::make();
+      }
+      return read_ertr::now();
+    });
+}
+
 write_ertr::future<> NVMeBlockDevice::writev(
   uint64_t offset,
   ceph::bufferlist bl,
@@ -419,4 +452,28 @@ read_ertr::future<> NVMeBlockDevice::nvme_read(
   }
 }
 
+read_ertr::future<> NVMeBlockDevice::nvme_readv(
+  uint64_t offset, std::vector<bufferptr> ptrs) {
+  struct io_t {
+    uint64_t offset = 0;
+    bufferptr ptr;
+  };
+  std::vector<io_t> iov;
+  size_t off = 0;
+  for (auto &ptr : ptrs) {
+    auto len = ptr.length();
+    iov.emplace_back(offset + off, std::move(ptr));
+    off += len;
+  }
+  return seastar::do_with(
+    std::move(iov),
+    [this](auto &iov) {
+    return read_ertr::parallel_for_each(
+      iov,
+      [this](auto &io) {
+      return nvme_read(io.offset, io.ptr.length(), io.ptr.c_str());
+    });
+  });
+}
+
 }
index f8535e8417aff62d1548fe68c735a7e15d8667cb..959f3bdda4daf949c031a3147af5c07f6cc2c387 100644 (file)
@@ -228,9 +228,14 @@ public:
   read_ertr::future<> read(
     uint64_t offset,
     bufferptr &bptr) final;
+  read_ertr::future<> _readv(
+    uint64_t offset,
+    std::vector<bufferptr> ptrs) final;
 
   read_ertr::future<> nvme_read(
     uint64_t offset, size_t len, void *buffer_ptr);
+  read_ertr::future<> nvme_readv(
+    uint64_t offset, std::vector<bufferptr> ptrs);
 
   close_ertr::future<> close() override;
 
index 91d42170666c2fa71d17a55de158c9f4ec19b0d4..f2d4e666cc1b34c9814f54d8a4aeaa49f8129d19 100644 (file)
@@ -239,6 +239,24 @@ write_ertr::future<> EphemeralRBMDevice::write(
   return write_ertr::now();
 }
 
+read_ertr::future<> EphemeralRBMDevice::_readv(
+  uint64_t offset,
+  std::vector<bufferptr> ptrs) {
+  LOG_PREFIX(EphemeralRBMDevice::_readv);
+  ceph_assert(buf);
+  DEBUG(
+    "EphemeralRBMDevice: read offset {} {} buffers",
+    offset,
+    ptrs.size());
+
+  for (auto &ptr : ptrs) {
+    ptr.copy_in(0, ptr.length(), buf + offset);
+    offset += ptr.length();
+  }
+
+  return read_ertr::now();
+}
+
 read_ertr::future<> EphemeralRBMDevice::read(
   uint64_t offset,
   bufferptr &bptr) {
index ad79b61a5cfa97b21d65bbf3e566e62d72e777ab..2409d22c63486442d4891857dc1359750181c419 100644 (file)
@@ -77,9 +77,19 @@ public:
     uint64_t rbm_addr = convert_paddr_to_abs_addr(addr);
     return read(rbm_addr, out);
   }
+  read_ertr::future<> readv(
+    paddr_t addr,
+    std::vector<bufferptr> ptrs) final {
+    uint64_t rbm_addr = convert_paddr_to_abs_addr(addr);
+    return _readv(rbm_addr, std::move(ptrs));
+  }
 protected:
   rbm_superblock_t super;
   rbm_shard_info_t shard_info;
+  virtual read_ertr::future<> _readv(
+    uint64_t offset,
+    std::vector<bufferptr> ptrs) = 0;
+
 public:
   RBMDevice() {}
   virtual ~RBMDevice() = default;
@@ -230,6 +240,9 @@ public:
   read_ertr::future<> read(
     uint64_t offset,
     bufferptr &bptr) override;
+  read_ertr::future<> _readv(
+    uint64_t offset,
+    std::vector<bufferptr> ptrs) override;
 
   close_ertr::future<> close() override;
 
index b68b8cc8609f7db878b40e0363f67ce788458daf..72eb927d3baf06e4e7ac38c7301289d29a5cb265 100644 (file)
@@ -169,6 +169,43 @@ static read_ertr::future<> do_read(
   });
 }
 
+static read_ertr::future<> do_readv(
+  device_id_t device_id,
+  seastar::file &device,
+  uint64_t offset,
+  std::vector<bufferptr> ptrs)
+{
+  LOG_PREFIX(block_do_readv);
+  std::vector<iovec> iov;
+  size_t len = 0;
+  for (auto &ptr : ptrs) {
+    iov.emplace_back(ptr.c_str(), ptr.length());
+    len += ptr.length();
+  }
+  TRACE("{} poffset=0x{:x}~0x{:x} {} buffers",
+    device_id_printer_t{device_id}, offset, len, ptrs.size());
+  return device.dma_read(offset, std::move(iov)
+  ).handle_exception(
+    //FIXME: this is a little bit tricky, since seastar::future<T>::handle_exception
+    // returns seastar::future<T>, to return an crimson::ct_error, we have to create
+    // a seastar::future<T> holding that crimson::ct_error. This is not necessary
+    // once seastar::future<T>::handle_exception() returns seastar::futurize_t<T>
+    [FNAME, device_id, offset, len](auto e) -> read_ertr::future<size_t>
+  {
+    ERROR("{} poffset=0x{:x}~0x{:x} got error -- {}",
+          device_id_printer_t{device_id}, offset, len, e);
+    return crimson::ct_error::input_output_error::make();
+  }).then([FNAME, device_id, offset, len](auto result) -> read_ertr::future<> {
+    if (result != len) {
+      ERROR("{} poffset=0x{:x}~0x{:x} read len=0x{:x} inconsistent",
+            device_id_printer_t{device_id}, offset, len, result);
+      return crimson::ct_error::input_output_error::make();
+    }
+    TRACE("{} poffset=0x{:x}~0x{:x} done", device_id_printer_t{device_id}, offset, len);
+    return read_ertr::now();
+  });
+}
+
 write_ertr::future<>
 SegmentStateTracker::write_out(
   device_id_t device_id,
@@ -637,6 +674,58 @@ SegmentManager::release_ertr::future<> BlockSegmentManager::release(
       shard_info.tracker_offset);
 }
 
+SegmentManager::read_ertr::future<> BlockSegmentManager::readv(
+  paddr_t addr,
+  std::vector<bufferptr> ptrs)
+{
+  LOG_PREFIX(BlockSegmentManager::readv);
+  size_t len = 0;
+  for (auto &ptr : ptrs) {
+    len += ptr.length();
+  }
+  auto& seg_addr = addr.as_seg_paddr();
+  auto id = seg_addr.get_segment_id();
+  auto s_id = id.device_segment_id();
+  auto s_off = seg_addr.get_segment_off();
+  auto p_off = get_offset(addr);
+  DEBUG("{} offset=0x{:x}~0x{:x} poffset=0x{:x} ...", id, s_off, len, p_off);
+
+  assert(addr.get_device_id() == get_device_id());
+
+  if (s_off % superblock.block_size != 0 ||
+      len % superblock.block_size != 0) {
+    ERROR("{} offset=0x{:x}~0x{:x} poffset=0x{:x} invalid read", id, s_off, len, p_off);
+    return crimson::ct_error::invarg::make();
+  }
+
+  if (s_id >= get_num_segments()) {
+    ERROR("{} offset=0x{:x}~0x{:x} poffset=0x{:x} segment-id out of range {}",
+          id, s_off, len, p_off, get_num_segments());
+    return crimson::ct_error::invarg::make();
+  }
+
+  if (s_off + len > superblock.segment_size) {
+    ERROR("{} offset=0x{:x}~0x{:x} poffset=0x{:x} read out of range 0x{:x}",
+          id, s_off, len, p_off, superblock.segment_size);
+    return crimson::ct_error::invarg::make();
+  }
+
+  if (tracker->get(s_id) == segment_state_t::EMPTY) {
+    // XXX: not an error during scanning,
+    // might need refactor to increase the log level
+    DEBUG("{} offset=0x{:x}~0x{:x} poffset=0x{:x} invalid state {}",
+          id, s_off, len, p_off, tracker->get(s_id));
+    return crimson::ct_error::enoent::make();
+  }
+
+  stats.data_read.increment(len);
+  return do_readv(
+    get_device_id(),
+    device,
+    p_off,
+    std::move(ptrs));
+}
+
 SegmentManager::read_ertr::future<> BlockSegmentManager::read(
   paddr_t addr,
   size_t len,
index a0445371016ccf9f213b15904d1f1b1e59d6ea22..3a6e5fd42c13cf7ecb0c1f5007aa3fcf507f6675 100644 (file)
@@ -149,6 +149,9 @@ public:
     size_t len,
     ceph::bufferptr &out) final;
 
+  read_ertr::future<> readv(
+    paddr_t addr, std::vector<bufferptr> vecs) final;
+
   device_type_t get_device_type() const final {
     return superblock.config.spec.dtype;
   }
index 6b34e5df093925b8406e9f24644917a71646578f..393c20ed0429885ddc341a0f1f4911585f09613a 100644 (file)
@@ -290,4 +290,47 @@ SegmentManager::read_ertr::future<> EphemeralSegmentManager::read(
   });
 }
 
+SegmentManager::read_ertr::future<> EphemeralSegmentManager::readv(
+  paddr_t addr,
+  std::vector<bufferptr> ptrs)
+{
+  size_t len = 0;
+  for (auto &ptr : ptrs) {
+    len += ptr.length();
+  }
+  auto& seg_addr = addr.as_seg_paddr();
+  if (seg_addr.get_segment_id().device_segment_id() >= get_num_segments()) {
+    logger().error(
+      "EphemeralSegmentManager::readv: invalid segment {}",
+      addr);
+    return crimson::ct_error::invarg::make();
+  }
+
+  if (seg_addr.get_segment_off() + len > config.segment_size) {
+    logger().error(
+      "EphemeralSegmentManager::read: invalid offset {}~0x{:x}!",
+      addr,
+      len);
+    return crimson::ct_error::invarg::make();
+  }
+
+  auto offset = get_offset(addr);
+  for (auto &ptr : ptrs) {
+    ptr.copy_in(0, ptr.length(), buffer + offset);
+    offset += ptr.length();
+  }
+
+  logger().debug(
+    "segment_read to segment {} at offset 0x{:x}, "
+    "physical offset 0x{:x}, length 0x{:x}",
+    seg_addr.get_segment_id().device_segment_id(),
+    seg_addr.get_segment_off(),
+    get_offset(addr),
+    len);
+
+  return read_ertr::now().safe_then([] {
+    return seastar::yield();
+  });
+}
+
 }
index 95bff4b1affe7df78b157c3dcce7d9d6bacaf6eb..dc9580a11d5a5629a293b95f5b4026dbfd10fc14 100644 (file)
@@ -121,6 +121,10 @@ public:
     size_t len,
     ceph::bufferptr &out) final;
 
+  read_ertr::future<> readv(
+    paddr_t addr,
+    std::vector<bufferptr> ptr) final;
+
   size_t get_available_size() const final {
     return config.size;
   }
index cb343e0fa0ca1f23ab837caff86e94492b955c88..4af70460357dd5523a65ed6d7b112bf5c469d87b 100644 (file)
@@ -371,6 +371,43 @@ static read_ertr::future<> do_read(
   });
 }
 
+static read_ertr::future<> do_readv(
+  device_id_t device_id,
+  seastar::file &device,
+  uint64_t offset,
+  std::vector<bufferptr> ptrs)
+{
+  LOG_PREFIX(block_do_readv);
+  std::vector<iovec> iov;
+  size_t len = 0;
+  for (auto &ptr : ptrs) {
+    iov.emplace_back(ptr.c_str(), ptr.length());
+    len += ptr.length();
+  }
+  TRACE("{} poffset=0x{:x}~0x{:x} {} buffers",
+    device_id_printer_t{device_id}, offset, len, vecs.size());
+  return device.dma_read(offset, std::move(iov)
+  ).handle_exception(
+    //FIXME: this is a little bit tricky, since seastar::future<T>::handle_exception
+    // returns seastar::future<T>, to return an crimson::ct_error, we have to create
+    // a seastar::future<T> holding that crimson::ct_error. This is not necessary
+    // once seastar::future<T>::handle_exception() returns seastar::futurize_t<T>
+    [FNAME, device_id, offset, len](auto e) -> read_ertr::future<size_t>
+  {
+    ERROR("{} poffset=0x{:x}~0x{:x} got error -- {}",
+          device_id_printer_t{device_id}, offset, len, e);
+    return crimson::ct_error::input_output_error::make();
+  }).then([FNAME, device_id, offset, len](auto result) -> read_ertr::future<> {
+    if (result != len) {
+      ERROR("{} poffset=0x{:x}~0x{:x} read len=0x{:x} inconsistent",
+            device_id_printer_t{device_id}, offset, len, result);
+      return crimson::ct_error::input_output_error::make();
+    }
+    TRACE("{} poffset=0x{:x}~0x{:x} done", device_id_printer_t{device_id}, offset, len);
+    return read_ertr::now();
+  });
+}
+
 static
 ZBDSegmentManager::access_ertr::future<zbd_sm_metadata_t>
 read_metadata(seastar::file &device, seastar::stat_data sd)
@@ -646,6 +683,35 @@ ZBDSegmentManager::release_ertr::future<> ZBDSegmentManager::release(
   });
 }
 
+SegmentManager::read_ertr::future<> ZBDSegmentManager::readv(
+  paddr_t addr,
+  std::vector<bufferptr> ptrs)
+{
+  LOG_PREFIX(ZBDSegmentManager::readv);
+  size_t len = 0;
+  for (auto &ptr : ptrs) {
+    len += ptr.length();
+  }
+  auto& seg_addr = addr.as_seg_paddr();
+  if (seg_addr.get_segment_id().device_segment_id() >= get_num_segments()) {
+    ERROR("invalid segment {}",
+      seg_addr.get_segment_id().device_segment_id());
+    return crimson::ct_error::invarg::make();
+  }
+  
+  if (seg_addr.get_segment_off() + len > metadata.segment_capacity) {
+    ERROR("invalid read offset {}, len 0x{:x}",
+      addr,
+      len);
+    return crimson::ct_error::invarg::make();
+  }
+  return do_readv(
+    get_device_id(),
+    device,
+    get_offset(addr),
+    std::move(ptrs));
+}
+
 SegmentManager::read_ertr::future<> ZBDSegmentManager::read(
   paddr_t addr,
   size_t len,
index d82974783d4240480c7e225f880b7aa2f76a3e6b..c23860e4e1adaf4868c64d2e5905a4f32e0bb465 100644 (file)
@@ -152,6 +152,10 @@ namespace crimson::os::seastore::segment_manager::zbd {
       size_t len, 
       ceph::bufferptr &out) final;
 
+    read_ertr::future<> readv(
+      paddr_t addr,
+      std::vector<bufferptr> ptrs) final;
+
     device_type_t get_device_type() const final {
       return device_type_t::ZBD;
     }