]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/device: add readv
authorXuehan Xu <xuxuehan@qianxin.com>
Fri, 15 Aug 2025 11:12:18 +0000 (19:12 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Mon, 30 Mar 2026 10:09:24 +0000 (18:09 +0800)
Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/crimson/os/seastore/device.h
src/crimson/os/seastore/random_block_manager/nvme_block_device.cc
src/crimson/os/seastore/random_block_manager/nvme_block_device.h
src/crimson/os/seastore/random_block_manager/rbm_device.cc
src/crimson/os/seastore/random_block_manager/rbm_device.h
src/crimson/os/seastore/segment_manager/block.cc
src/crimson/os/seastore/segment_manager/block.h
src/crimson/os/seastore/segment_manager/ephemeral.cc
src/crimson/os/seastore/segment_manager/ephemeral.h
src/crimson/os/seastore/segment_manager/zbd.cc
src/crimson/os/seastore/segment_manager/zbd.h

index dc21522899a29158c30a10b1d7867b043d320f77..da37ebf198ff1ecf63da25afcc9fc193f22bbe62 100644 (file)
@@ -157,6 +157,8 @@ public:
     size_t len,
     ceph::bufferptr &out) = 0;
 
+  virtual read_ertr::future<> readv(paddr_t addr, std::vector<bufferptr> vecs) = 0;
+
   read_ertr::future<ceph::bufferptr> read(
     paddr_t addr,
     size_t len
index 88d259b1521b84e8f1def68887f104e987a40b35..dfe04f5bd2966cd5682b6ac45bc323d7f2ac7747 100644 (file)
@@ -188,6 +188,39 @@ read_ertr::future<> NVMeBlockDevice::read(
   }
 }
 
+read_ertr::future<> NVMeBlockDevice::_readv(
+  uint64_t offset,
+  std::vector<bufferptr> ptrs) {
+  LOG_PREFIX(NVMeBlockDevice::_readv);
+  DEBUG("block: read offset {}, {} buffers", offset, ptrs.size());
+  if (ptrs.size() == 0) {
+    return read_ertr::now();
+  }
+
+  if (is_end_to_end_data_protection()) {
+    return nvme_readv(offset, std::move(ptrs));
+  }
+  std::vector<iovec> iov;
+  size_t length = 0;
+  for (auto &ptr : ptrs) {
+    length += ptr.length();
+    assert((ptr.length() % super.block_size) == 0);
+    iov.emplace_back(ptr.c_str(), ptr.length());
+  }
+  return device.dma_read(offset, std::move(iov)
+  ).handle_exception(
+    [FNAME](auto e) -> read_ertr::future<size_t> {
+      ERROR("read: dma_read got error{}", e);
+      return crimson::ct_error::input_output_error::make();
+    }).then([length, FNAME](auto result) -> read_ertr::future<> {
+      if (result != length) {
+        ERROR("read: dma_read got error with not proper length");
+        return crimson::ct_error::input_output_error::make();
+      }
+      return read_ertr::now();
+    });
+}
+
 write_ertr::future<> NVMeBlockDevice::writev(
   uint64_t offset,
   ceph::bufferlist bl,
@@ -442,4 +475,28 @@ read_ertr::future<> NVMeBlockDevice::nvme_read(
   }
 }
 
+read_ertr::future<> NVMeBlockDevice::nvme_readv(
+  uint64_t offset, std::vector<bufferptr> ptrs) {
+  struct io_t {
+    uint64_t offset = 0;
+    bufferptr ptr;
+  };
+  std::vector<io_t> iov;
+  size_t off = 0;
+  for (auto &ptr : ptrs) {
+    auto len = ptr.length();
+    iov.emplace_back(offset + off, std::move(ptr));
+    off += len;
+  }
+  return seastar::do_with(
+    std::move(iov),
+    [this](auto &iov) {
+    return read_ertr::parallel_for_each(
+      iov,
+      [this](auto &io) {
+      return nvme_read(io.offset, io.ptr.length(), io.ptr.c_str());
+    });
+  });
+}
+
 }
index 2ac0a5a7264f4a095e299ad12ded920238bd9860..3031b8d01917a1c30e70004a1f6e0d63a685e663 100644 (file)
@@ -230,9 +230,14 @@ public:
   read_ertr::future<> read(
     uint64_t offset,
     bufferptr &bptr) final;
+  read_ertr::future<> _readv(
+    uint64_t offset,
+    std::vector<bufferptr> ptrs) final;
 
   read_ertr::future<> nvme_read(
     uint64_t offset, size_t len, void *buffer_ptr);
+  read_ertr::future<> nvme_readv(
+    uint64_t offset, std::vector<bufferptr> ptrs);
 
   close_ertr::future<> close() override;
 
index e81a959bbbd9e2fd288ceb9d9550ef1192b48562..0b759e05c4d455ab8400e021f7e45c98786fd83e 100644 (file)
@@ -273,6 +273,24 @@ write_ertr::future<> EphemeralRBMDevice::write(
   return write_ertr::now();
 }
 
+read_ertr::future<> EphemeralRBMDevice::_readv(
+  uint64_t offset,
+  std::vector<bufferptr> ptrs) {
+  LOG_PREFIX(EphemeralRBMDevice::_readv);
+  ceph_assert(buf);
+  DEBUG(
+    "EphemeralRBMDevice: read offset {} {} buffers",
+    offset,
+    ptrs.size());
+
+  for (auto &ptr : ptrs) {
+    ptr.copy_in(0, ptr.length(), buf + offset);
+    offset += ptr.length();
+  }
+
+  return read_ertr::now();
+}
+
 read_ertr::future<> EphemeralRBMDevice::read(
   uint64_t offset,
   bufferptr &bptr) {
index debdb03067b745ab197368d3d2013b5dd95c9ac8..2bf2336b9de5cd4fc47b3e54e710b45769b55cff 100644 (file)
@@ -77,12 +77,22 @@ public:
     uint64_t rbm_addr = convert_paddr_to_abs_addr(addr);
     return read(rbm_addr, out);
   }
+  read_ertr::future<> readv(
+    paddr_t addr,
+    std::vector<bufferptr> ptrs) final {
+    uint64_t rbm_addr = convert_paddr_to_abs_addr(addr);
+    return _readv(rbm_addr, std::move(ptrs));
+  }
 protected:
   rbm_superblock_t super;
   rbm_shard_info_t shard_info;
   uint32_t device_shard_nums = 0;
   store_index_t store_index = 0;
   bool shard_status = true;
+  virtual read_ertr::future<> _readv(
+    uint64_t offset,
+    std::vector<bufferptr> ptrs) = 0;
+
 public:
   RBMDevice(store_index_t store_index = 0)
   : store_index(store_index) {}
@@ -236,6 +246,9 @@ public:
   read_ertr::future<> read(
     uint64_t offset,
     bufferptr &bptr) override;
+  read_ertr::future<> _readv(
+    uint64_t offset,
+    std::vector<bufferptr> ptrs) override;
 
   close_ertr::future<> close() override;
 
index d871026d442cf66672c059661ee22faf9aa80cbb..d4f2129338b4105b9b58bb636d7640c1759e5177 100644 (file)
@@ -171,6 +171,43 @@ static read_ertr::future<> do_read(
   });
 }
 
+static read_ertr::future<> do_readv(
+  device_id_t device_id,
+  seastar::file &device,
+  uint64_t offset,
+  std::vector<bufferptr> ptrs)
+{
+  LOG_PREFIX(block_do_readv);
+  std::vector<iovec> iov;
+  size_t len = 0;
+  for (auto &ptr : ptrs) {
+    iov.emplace_back(ptr.c_str(), ptr.length());
+    len += ptr.length();
+  }
+  TRACE("{} poffset=0x{:x}~0x{:x} {} buffers",
+    device_id_printer_t{device_id}, offset, len, ptrs.size());
+  return device.dma_read(offset, std::move(iov)
+  ).handle_exception(
+    //FIXME: this is a little bit tricky, since seastar::future<T>::handle_exception
+    // returns seastar::future<T>, to return an crimson::ct_error, we have to create
+    // a seastar::future<T> holding that crimson::ct_error. This is not necessary
+    // once seastar::future<T>::handle_exception() returns seastar::futurize_t<T>
+    [FNAME, device_id, offset, len](auto e) -> read_ertr::future<size_t>
+  {
+    ERROR("{} poffset=0x{:x}~0x{:x} got error -- {}",
+          device_id_printer_t{device_id}, offset, len, e);
+    return crimson::ct_error::input_output_error::make();
+  }).then([FNAME, device_id, offset, len](auto result) -> read_ertr::future<> {
+    if (result != len) {
+      ERROR("{} poffset=0x{:x}~0x{:x} read len=0x{:x} inconsistent",
+            device_id_printer_t{device_id}, offset, len, result);
+      return crimson::ct_error::input_output_error::make();
+    }
+    TRACE("{} poffset=0x{:x}~0x{:x} done", device_id_printer_t{device_id}, offset, len);
+    return read_ertr::now();
+  });
+}
+
 write_ertr::future<>
 SegmentStateTracker::write_out(
   device_id_t device_id,
@@ -689,6 +726,58 @@ SegmentManager::release_ertr::future<> BlockSegmentManager::release(
       shard_info.tracker_offset);
 }
 
+SegmentManager::read_ertr::future<> BlockSegmentManager::readv(
+  paddr_t addr,
+  std::vector<bufferptr> ptrs)
+{
+  LOG_PREFIX(BlockSegmentManager::readv);
+  size_t len = 0;
+  for (auto &ptr : ptrs) {
+    len += ptr.length();
+  }
+  auto& seg_addr = addr.as_seg_paddr();
+  auto id = seg_addr.get_segment_id();
+  auto s_id = id.device_segment_id();
+  auto s_off = seg_addr.get_segment_off();
+  auto p_off = get_offset(addr);
+  DEBUG("{} offset=0x{:x}~0x{:x} poffset=0x{:x} ...", id, s_off, len, p_off);
+
+  assert(addr.get_device_id() == get_device_id());
+
+  if (s_off % superblock.block_size != 0 ||
+      len % superblock.block_size != 0) {
+    ERROR("{} offset=0x{:x}~0x{:x} poffset=0x{:x} invalid read", id, s_off, len, p_off);
+    return crimson::ct_error::invarg::make();
+  }
+
+  if (s_id >= get_num_segments()) {
+    ERROR("{} offset=0x{:x}~0x{:x} poffset=0x{:x} segment-id out of range {}",
+          id, s_off, len, p_off, get_num_segments());
+    return crimson::ct_error::invarg::make();
+  }
+
+  if (s_off + len > superblock.segment_size) {
+    ERROR("{} offset=0x{:x}~0x{:x} poffset=0x{:x} read out of range 0x{:x}",
+          id, s_off, len, p_off, superblock.segment_size);
+    return crimson::ct_error::invarg::make();
+  }
+
+  if (tracker->get(s_id) == segment_state_t::EMPTY) {
+    // XXX: not an error during scanning,
+    // might need refactor to increase the log level
+    DEBUG("{} offset=0x{:x}~0x{:x} poffset=0x{:x} invalid state {}",
+          id, s_off, len, p_off, tracker->get(s_id));
+    return crimson::ct_error::enoent::make();
+  }
+
+  stats.data_read.increment(len);
+  return do_readv(
+    get_device_id(),
+    device,
+    p_off,
+    std::move(ptrs));
+}
+
 SegmentManager::read_ertr::future<> BlockSegmentManager::read(
   paddr_t addr,
   size_t len,
index cb2d00d0fa8dbea443465320f2fe1e2233749808..515772e7736c8e2cb0fe7637273964db00f96a7c 100644 (file)
@@ -145,6 +145,8 @@ public:
     paddr_t addr,
     size_t len,
     ceph::bufferptr &out) final;
+  read_ertr::future<> readv(
+    paddr_t addr, std::vector<bufferptr> vecs) final;
 
   read_ertr::future<uint32_t> get_shard_nums() final;
 
index 6b34e5df093925b8406e9f24644917a71646578f..393c20ed0429885ddc341a0f1f4911585f09613a 100644 (file)
@@ -290,4 +290,47 @@ SegmentManager::read_ertr::future<> EphemeralSegmentManager::read(
   });
 }
 
+SegmentManager::read_ertr::future<> EphemeralSegmentManager::readv(
+  paddr_t addr,
+  std::vector<bufferptr> ptrs)
+{
+  size_t len = 0;
+  for (auto &ptr : ptrs) {
+    len += ptr.length();
+  }
+  auto& seg_addr = addr.as_seg_paddr();
+  if (seg_addr.get_segment_id().device_segment_id() >= get_num_segments()) {
+    logger().error(
+      "EphemeralSegmentManager::readv: invalid segment {}",
+      addr);
+    return crimson::ct_error::invarg::make();
+  }
+
+  if (seg_addr.get_segment_off() + len > config.segment_size) {
+    logger().error(
+      "EphemeralSegmentManager::read: invalid offset {}~0x{:x}!",
+      addr,
+      len);
+    return crimson::ct_error::invarg::make();
+  }
+
+  auto offset = get_offset(addr);
+  for (auto &ptr : ptrs) {
+    ptr.copy_in(0, ptr.length(), buffer + offset);
+    offset += ptr.length();
+  }
+
+  logger().debug(
+    "segment_read to segment {} at offset 0x{:x}, "
+    "physical offset 0x{:x}, length 0x{:x}",
+    seg_addr.get_segment_id().device_segment_id(),
+    seg_addr.get_segment_off(),
+    get_offset(addr),
+    len);
+
+  return read_ertr::now().safe_then([] {
+    return seastar::yield();
+  });
+}
+
 }
index 95bff4b1affe7df78b157c3dcce7d9d6bacaf6eb..dc9580a11d5a5629a293b95f5b4026dbfd10fc14 100644 (file)
@@ -121,6 +121,10 @@ public:
     size_t len,
     ceph::bufferptr &out) final;
 
+  read_ertr::future<> readv(
+    paddr_t addr,
+    std::vector<bufferptr> ptr) final;
+
   size_t get_available_size() const final {
     return config.size;
   }
index 3eef55cc2b244173b0694db772c4bf3ebe19278d..f0f3391f7d96e2e2aefb37f3adf4f84bbaf3b718 100644 (file)
@@ -392,6 +392,43 @@ static read_ertr::future<> do_read(
   });
 }
 
+static read_ertr::future<> do_readv(
+  device_id_t device_id,
+  seastar::file &device,
+  uint64_t offset,
+  std::vector<bufferptr> ptrs)
+{
+  LOG_PREFIX(block_do_readv);
+  std::vector<iovec> iov;
+  size_t len = 0;
+  for (auto &ptr : ptrs) {
+    iov.emplace_back(ptr.c_str(), ptr.length());
+    len += ptr.length();
+  }
+  TRACE("{} poffset=0x{:x}~0x{:x} {} buffers",
+    device_id_printer_t{device_id}, offset, len, vecs.size());
+  return device.dma_read(offset, std::move(iov)
+  ).handle_exception(
+    //FIXME: this is a little bit tricky, since seastar::future<T>::handle_exception
+    // returns seastar::future<T>, to return an crimson::ct_error, we have to create
+    // a seastar::future<T> holding that crimson::ct_error. This is not necessary
+    // once seastar::future<T>::handle_exception() returns seastar::futurize_t<T>
+    [FNAME, device_id, offset, len](auto e) -> read_ertr::future<size_t>
+  {
+    ERROR("{} poffset=0x{:x}~0x{:x} got error -- {}",
+          device_id_printer_t{device_id}, offset, len, e);
+    return crimson::ct_error::input_output_error::make();
+  }).then([FNAME, device_id, offset, len](auto result) -> read_ertr::future<> {
+    if (result != len) {
+      ERROR("{} poffset=0x{:x}~0x{:x} read len=0x{:x} inconsistent",
+            device_id_printer_t{device_id}, offset, len, result);
+      return crimson::ct_error::input_output_error::make();
+    }
+    TRACE("{} poffset=0x{:x}~0x{:x} done", device_id_printer_t{device_id}, offset, len);
+    return read_ertr::now();
+  });
+}
+
 static
 ZBDSegmentManager::access_ertr::future<zbd_sm_metadata_t>
 read_metadata(seastar::file &device, seastar::stat_data sd)
@@ -695,6 +732,35 @@ ZBDSegmentManager::release_ertr::future<> ZBDSegmentManager::release(
   });
 }
 
+SegmentManager::read_ertr::future<> ZBDSegmentManager::readv(
+  paddr_t addr,
+  std::vector<bufferptr> ptrs)
+{
+  LOG_PREFIX(ZBDSegmentManager::readv);
+  size_t len = 0;
+  for (auto &ptr : ptrs) {
+    len += ptr.length();
+  }
+  auto& seg_addr = addr.as_seg_paddr();
+  if (seg_addr.get_segment_id().device_segment_id() >= get_num_segments()) {
+    ERROR("invalid segment {}",
+      seg_addr.get_segment_id().device_segment_id());
+    return crimson::ct_error::invarg::make();
+  }
+  
+  if (seg_addr.get_segment_off() + len > metadata.segment_capacity) {
+    ERROR("invalid read offset {}, len 0x{:x}",
+      addr,
+      len);
+    return crimson::ct_error::invarg::make();
+  }
+  return do_readv(
+    get_device_id(),
+    device,
+    get_offset(addr),
+    std::move(ptrs));
+}
+
 SegmentManager::read_ertr::future<> ZBDSegmentManager::read(
   paddr_t addr,
   size_t len,
index cff300124d993d24b842b6748a68b2ccf0761841..4163991fbec042b8f108c3818d9e5fa117922ca2 100644 (file)
@@ -146,6 +146,9 @@ namespace crimson::os::seastore::segment_manager::zbd {
       paddr_t addr, 
       size_t len, 
       ceph::bufferptr &out) final;
+    read_ertr::future<> readv(
+      paddr_t addr,
+      std::vector<bufferptr> ptrs) final;
 
     read_ertr::future<uint32_t> get_shard_nums() final;