From: Xuehan Xu Date: Fri, 15 Aug 2025 11:12:18 +0000 (+0800) Subject: crimson/os/seastore/device: add readv X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=cd42ed4e511eee8c3eb1ceaeef802b8bffbe26e9;p=ceph-ci.git crimson/os/seastore/device: add readv Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/os/seastore/device.h b/src/crimson/os/seastore/device.h index 5ef1b642171..d6f2a2f862e 100644 --- a/src/crimson/os/seastore/device.h +++ b/src/crimson/os/seastore/device.h @@ -155,6 +155,8 @@ public: size_t len, ceph::bufferptr &out) = 0; + virtual read_ertr::future<> readv(paddr_t addr, std::vector vecs) = 0; + read_ertr::future read( paddr_t addr, size_t len diff --git a/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc b/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc index 8938b8f344e..76cbd39e07a 100644 --- a/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc +++ b/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc @@ -165,6 +165,39 @@ read_ertr::future<> NVMeBlockDevice::read( } } +read_ertr::future<> NVMeBlockDevice::_readv( + uint64_t offset, + std::vector ptrs) { + LOG_PREFIX(NVMeBlockDevice::_readv); + DEBUG("block: read offset {}, {} buffers", offset, ptrs.size()); + if (ptrs.size() == 0) { + return read_ertr::now(); + } + + if (is_end_to_end_data_protection()) { + return nvme_readv(offset, std::move(ptrs)); + } + std::vector iov; + size_t length = 0; + for (auto &ptr : ptrs) { + length += ptr.length(); + assert((ptr.length() % super.block_size) == 0); + iov.emplace_back(ptr.c_str(), ptr.length()); + } + return device.dma_read(offset, std::move(iov) + ).handle_exception( + [FNAME](auto e) -> read_ertr::future { + ERROR("read: dma_read got error{}", e); + return crimson::ct_error::input_output_error::make(); + }).then([length, FNAME](auto result) -> read_ertr::future<> { + if (result != length) { + ERROR("read: dma_read got error with not proper length"); + return crimson::ct_error::input_output_error::make(); + } + return read_ertr::now(); + }); +} + write_ertr::future<> NVMeBlockDevice::writev( uint64_t offset, ceph::bufferlist bl, @@ -419,4 +452,28 @@ read_ertr::future<> NVMeBlockDevice::nvme_read( } } +read_ertr::future<> NVMeBlockDevice::nvme_readv( + uint64_t offset, std::vector ptrs) { + struct io_t { + uint64_t offset = 0; + bufferptr ptr; + }; + std::vector iov; + size_t off = 0; + for (auto &ptr : ptrs) { + auto len = ptr.length(); + iov.emplace_back(offset + off, std::move(ptr)); + off += len; + } + return seastar::do_with( + std::move(iov), + [this](auto &iov) { + return read_ertr::parallel_for_each( + iov, + [this](auto &io) { + return nvme_read(io.offset, io.ptr.length(), io.ptr.c_str()); + }); + }); +} + } diff --git a/src/crimson/os/seastore/random_block_manager/nvme_block_device.h b/src/crimson/os/seastore/random_block_manager/nvme_block_device.h index f8535e8417a..959f3bdda4d 100644 --- a/src/crimson/os/seastore/random_block_manager/nvme_block_device.h +++ b/src/crimson/os/seastore/random_block_manager/nvme_block_device.h @@ -228,9 +228,14 @@ public: read_ertr::future<> read( uint64_t offset, bufferptr &bptr) final; + read_ertr::future<> _readv( + uint64_t offset, + std::vector ptrs) final; read_ertr::future<> nvme_read( uint64_t offset, size_t len, void *buffer_ptr); + read_ertr::future<> nvme_readv( + uint64_t offset, std::vector ptrs); close_ertr::future<> close() override; diff --git a/src/crimson/os/seastore/random_block_manager/rbm_device.cc b/src/crimson/os/seastore/random_block_manager/rbm_device.cc index 91d42170666..f2d4e666cc1 100644 --- a/src/crimson/os/seastore/random_block_manager/rbm_device.cc +++ b/src/crimson/os/seastore/random_block_manager/rbm_device.cc @@ -239,6 +239,24 @@ write_ertr::future<> EphemeralRBMDevice::write( return write_ertr::now(); } +read_ertr::future<> EphemeralRBMDevice::_readv( + uint64_t offset, + std::vector ptrs) { + LOG_PREFIX(EphemeralRBMDevice::_readv); + ceph_assert(buf); + DEBUG( + "EphemeralRBMDevice: read offset {} {} buffers", + offset, + ptrs.size()); + + for (auto &ptr : ptrs) { + ptr.copy_in(0, ptr.length(), buf + offset); + offset += ptr.length(); + } + + return read_ertr::now(); +} + read_ertr::future<> EphemeralRBMDevice::read( uint64_t offset, bufferptr &bptr) { diff --git a/src/crimson/os/seastore/random_block_manager/rbm_device.h b/src/crimson/os/seastore/random_block_manager/rbm_device.h index ad79b61a5cf..2409d22c634 100644 --- a/src/crimson/os/seastore/random_block_manager/rbm_device.h +++ b/src/crimson/os/seastore/random_block_manager/rbm_device.h @@ -77,9 +77,19 @@ public: uint64_t rbm_addr = convert_paddr_to_abs_addr(addr); return read(rbm_addr, out); } + read_ertr::future<> readv( + paddr_t addr, + std::vector ptrs) final { + uint64_t rbm_addr = convert_paddr_to_abs_addr(addr); + return _readv(rbm_addr, std::move(ptrs)); + } protected: rbm_superblock_t super; rbm_shard_info_t shard_info; + virtual read_ertr::future<> _readv( + uint64_t offset, + std::vector ptrs) = 0; + public: RBMDevice() {} virtual ~RBMDevice() = default; @@ -230,6 +240,9 @@ public: read_ertr::future<> read( uint64_t offset, bufferptr &bptr) override; + read_ertr::future<> _readv( + uint64_t offset, + std::vector ptrs) override; close_ertr::future<> close() override; diff --git a/src/crimson/os/seastore/segment_manager/block.cc b/src/crimson/os/seastore/segment_manager/block.cc index b68b8cc8609..72eb927d3ba 100644 --- a/src/crimson/os/seastore/segment_manager/block.cc +++ b/src/crimson/os/seastore/segment_manager/block.cc @@ -169,6 +169,43 @@ static read_ertr::future<> do_read( }); } +static read_ertr::future<> do_readv( + device_id_t device_id, + seastar::file &device, + uint64_t offset, + std::vector ptrs) +{ + LOG_PREFIX(block_do_readv); + std::vector iov; + size_t len = 0; + for (auto &ptr : ptrs) { + iov.emplace_back(ptr.c_str(), ptr.length()); + len += ptr.length(); + } + TRACE("{} poffset=0x{:x}~0x{:x} {} buffers", + device_id_printer_t{device_id}, offset, len, ptrs.size()); + return device.dma_read(offset, std::move(iov) + ).handle_exception( + //FIXME: this is a little bit tricky, since seastar::future::handle_exception + // returns seastar::future, to return an crimson::ct_error, we have to create + // a seastar::future holding that crimson::ct_error. This is not necessary + // once seastar::future::handle_exception() returns seastar::futurize_t + [FNAME, device_id, offset, len](auto e) -> read_ertr::future + { + ERROR("{} poffset=0x{:x}~0x{:x} got error -- {}", + device_id_printer_t{device_id}, offset, len, e); + return crimson::ct_error::input_output_error::make(); + }).then([FNAME, device_id, offset, len](auto result) -> read_ertr::future<> { + if (result != len) { + ERROR("{} poffset=0x{:x}~0x{:x} read len=0x{:x} inconsistent", + device_id_printer_t{device_id}, offset, len, result); + return crimson::ct_error::input_output_error::make(); + } + TRACE("{} poffset=0x{:x}~0x{:x} done", device_id_printer_t{device_id}, offset, len); + return read_ertr::now(); + }); +} + write_ertr::future<> SegmentStateTracker::write_out( device_id_t device_id, @@ -637,6 +674,58 @@ SegmentManager::release_ertr::future<> BlockSegmentManager::release( shard_info.tracker_offset); } +SegmentManager::read_ertr::future<> BlockSegmentManager::readv( + paddr_t addr, + std::vector ptrs) +{ + LOG_PREFIX(BlockSegmentManager::readv); + size_t len = 0; + for (auto &ptr : ptrs) { + len += ptr.length(); + } + auto& seg_addr = addr.as_seg_paddr(); + auto id = seg_addr.get_segment_id(); + auto s_id = id.device_segment_id(); + auto s_off = seg_addr.get_segment_off(); + auto p_off = get_offset(addr); + DEBUG("{} offset=0x{:x}~0x{:x} poffset=0x{:x} ...", id, s_off, len, p_off); + + assert(addr.get_device_id() == get_device_id()); + + if (s_off % superblock.block_size != 0 || + len % superblock.block_size != 0) { + ERROR("{} offset=0x{:x}~0x{:x} poffset=0x{:x} invalid read", id, s_off, len, p_off); + return crimson::ct_error::invarg::make(); + } + + if (s_id >= get_num_segments()) { + ERROR("{} offset=0x{:x}~0x{:x} poffset=0x{:x} segment-id out of range {}", + id, s_off, len, p_off, get_num_segments()); + return crimson::ct_error::invarg::make(); + } + + if (s_off + len > superblock.segment_size) { + ERROR("{} offset=0x{:x}~0x{:x} poffset=0x{:x} read out of range 0x{:x}", + id, s_off, len, p_off, superblock.segment_size); + return crimson::ct_error::invarg::make(); + } + + if (tracker->get(s_id) == segment_state_t::EMPTY) { + // XXX: not an error during scanning, + // might need refactor to increase the log level + DEBUG("{} offset=0x{:x}~0x{:x} poffset=0x{:x} invalid state {}", + id, s_off, len, p_off, tracker->get(s_id)); + return crimson::ct_error::enoent::make(); + } + + stats.data_read.increment(len); + return do_readv( + get_device_id(), + device, + p_off, + std::move(ptrs)); +} + SegmentManager::read_ertr::future<> BlockSegmentManager::read( paddr_t addr, size_t len, diff --git a/src/crimson/os/seastore/segment_manager/block.h b/src/crimson/os/seastore/segment_manager/block.h index a0445371016..3a6e5fd42c1 100644 --- a/src/crimson/os/seastore/segment_manager/block.h +++ b/src/crimson/os/seastore/segment_manager/block.h @@ -149,6 +149,9 @@ public: size_t len, ceph::bufferptr &out) final; + read_ertr::future<> readv( + paddr_t addr, std::vector vecs) final; + device_type_t get_device_type() const final { return superblock.config.spec.dtype; } diff --git a/src/crimson/os/seastore/segment_manager/ephemeral.cc b/src/crimson/os/seastore/segment_manager/ephemeral.cc index 6b34e5df093..393c20ed042 100644 --- a/src/crimson/os/seastore/segment_manager/ephemeral.cc +++ b/src/crimson/os/seastore/segment_manager/ephemeral.cc @@ -290,4 +290,47 @@ SegmentManager::read_ertr::future<> EphemeralSegmentManager::read( }); } +SegmentManager::read_ertr::future<> EphemeralSegmentManager::readv( + paddr_t addr, + std::vector ptrs) +{ + size_t len = 0; + for (auto &ptr : ptrs) { + len += ptr.length(); + } + auto& seg_addr = addr.as_seg_paddr(); + if (seg_addr.get_segment_id().device_segment_id() >= get_num_segments()) { + logger().error( + "EphemeralSegmentManager::readv: invalid segment {}", + addr); + return crimson::ct_error::invarg::make(); + } + + if (seg_addr.get_segment_off() + len > config.segment_size) { + logger().error( + "EphemeralSegmentManager::read: invalid offset {}~0x{:x}!", + addr, + len); + return crimson::ct_error::invarg::make(); + } + + auto offset = get_offset(addr); + for (auto &ptr : ptrs) { + ptr.copy_in(0, ptr.length(), buffer + offset); + offset += ptr.length(); + } + + logger().debug( + "segment_read to segment {} at offset 0x{:x}, " + "physical offset 0x{:x}, length 0x{:x}", + seg_addr.get_segment_id().device_segment_id(), + seg_addr.get_segment_off(), + get_offset(addr), + len); + + return read_ertr::now().safe_then([] { + return seastar::yield(); + }); +} + } diff --git a/src/crimson/os/seastore/segment_manager/ephemeral.h b/src/crimson/os/seastore/segment_manager/ephemeral.h index 95bff4b1aff..dc9580a11d5 100644 --- a/src/crimson/os/seastore/segment_manager/ephemeral.h +++ b/src/crimson/os/seastore/segment_manager/ephemeral.h @@ -121,6 +121,10 @@ public: size_t len, ceph::bufferptr &out) final; + read_ertr::future<> readv( + paddr_t addr, + std::vector ptr) final; + size_t get_available_size() const final { return config.size; } diff --git a/src/crimson/os/seastore/segment_manager/zbd.cc b/src/crimson/os/seastore/segment_manager/zbd.cc index cb343e0fa0c..4af70460357 100644 --- a/src/crimson/os/seastore/segment_manager/zbd.cc +++ b/src/crimson/os/seastore/segment_manager/zbd.cc @@ -371,6 +371,43 @@ static read_ertr::future<> do_read( }); } +static read_ertr::future<> do_readv( + device_id_t device_id, + seastar::file &device, + uint64_t offset, + std::vector ptrs) +{ + LOG_PREFIX(block_do_readv); + std::vector iov; + size_t len = 0; + for (auto &ptr : ptrs) { + iov.emplace_back(ptr.c_str(), ptr.length()); + len += ptr.length(); + } + TRACE("{} poffset=0x{:x}~0x{:x} {} buffers", + device_id_printer_t{device_id}, offset, len, vecs.size()); + return device.dma_read(offset, std::move(iov) + ).handle_exception( + //FIXME: this is a little bit tricky, since seastar::future::handle_exception + // returns seastar::future, to return an crimson::ct_error, we have to create + // a seastar::future holding that crimson::ct_error. This is not necessary + // once seastar::future::handle_exception() returns seastar::futurize_t + [FNAME, device_id, offset, len](auto e) -> read_ertr::future + { + ERROR("{} poffset=0x{:x}~0x{:x} got error -- {}", + device_id_printer_t{device_id}, offset, len, e); + return crimson::ct_error::input_output_error::make(); + }).then([FNAME, device_id, offset, len](auto result) -> read_ertr::future<> { + if (result != len) { + ERROR("{} poffset=0x{:x}~0x{:x} read len=0x{:x} inconsistent", + device_id_printer_t{device_id}, offset, len, result); + return crimson::ct_error::input_output_error::make(); + } + TRACE("{} poffset=0x{:x}~0x{:x} done", device_id_printer_t{device_id}, offset, len); + return read_ertr::now(); + }); +} + static ZBDSegmentManager::access_ertr::future read_metadata(seastar::file &device, seastar::stat_data sd) @@ -646,6 +683,35 @@ ZBDSegmentManager::release_ertr::future<> ZBDSegmentManager::release( }); } +SegmentManager::read_ertr::future<> ZBDSegmentManager::readv( + paddr_t addr, + std::vector ptrs) +{ + LOG_PREFIX(ZBDSegmentManager::readv); + size_t len = 0; + for (auto &ptr : ptrs) { + len += ptr.length(); + } + auto& seg_addr = addr.as_seg_paddr(); + if (seg_addr.get_segment_id().device_segment_id() >= get_num_segments()) { + ERROR("invalid segment {}", + seg_addr.get_segment_id().device_segment_id()); + return crimson::ct_error::invarg::make(); + } + + if (seg_addr.get_segment_off() + len > metadata.segment_capacity) { + ERROR("invalid read offset {}, len 0x{:x}", + addr, + len); + return crimson::ct_error::invarg::make(); + } + return do_readv( + get_device_id(), + device, + get_offset(addr), + std::move(ptrs)); +} + SegmentManager::read_ertr::future<> ZBDSegmentManager::read( paddr_t addr, size_t len, diff --git a/src/crimson/os/seastore/segment_manager/zbd.h b/src/crimson/os/seastore/segment_manager/zbd.h index d82974783d4..c23860e4e1a 100644 --- a/src/crimson/os/seastore/segment_manager/zbd.h +++ b/src/crimson/os/seastore/segment_manager/zbd.h @@ -152,6 +152,10 @@ namespace crimson::os::seastore::segment_manager::zbd { size_t len, ceph::bufferptr &out) final; + read_ertr::future<> readv( + paddr_t addr, + std::vector ptrs) final; + device_type_t get_device_type() const final { return device_type_t::ZBD; }