From: Xuehan Xu Date: Fri, 15 Aug 2025 11:12:18 +0000 (+0800) Subject: crimson/os/seastore/device: add readv X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3f06a8f0c4d760d5c75c19a394be6dcf57e977fe;p=ceph.git crimson/os/seastore/device: add readv Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/os/seastore/device.h b/src/crimson/os/seastore/device.h index dc21522899a2..da37ebf198ff 100644 --- a/src/crimson/os/seastore/device.h +++ b/src/crimson/os/seastore/device.h @@ -157,6 +157,8 @@ public: size_t len, ceph::bufferptr &out) = 0; + virtual read_ertr::future<> readv(paddr_t addr, std::vector vecs) = 0; + read_ertr::future read( paddr_t addr, size_t len diff --git a/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc b/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc index 88d259b1521b..dfe04f5bd296 100644 --- a/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc +++ b/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc @@ -188,6 +188,39 @@ read_ertr::future<> NVMeBlockDevice::read( } } +read_ertr::future<> NVMeBlockDevice::_readv( + uint64_t offset, + std::vector ptrs) { + LOG_PREFIX(NVMeBlockDevice::_readv); + DEBUG("block: read offset {}, {} buffers", offset, ptrs.size()); + if (ptrs.size() == 0) { + return read_ertr::now(); + } + + if (is_end_to_end_data_protection()) { + return nvme_readv(offset, std::move(ptrs)); + } + std::vector iov; + size_t length = 0; + for (auto &ptr : ptrs) { + length += ptr.length(); + assert((ptr.length() % super.block_size) == 0); + iov.emplace_back(ptr.c_str(), ptr.length()); + } + return device.dma_read(offset, std::move(iov) + ).handle_exception( + [FNAME](auto e) -> read_ertr::future { + ERROR("read: dma_read got error{}", e); + return crimson::ct_error::input_output_error::make(); + }).then([length, FNAME](auto result) -> read_ertr::future<> { + if (result != length) { + ERROR("read: dma_read got error with not proper length"); + return crimson::ct_error::input_output_error::make(); + } + return read_ertr::now(); + }); +} + write_ertr::future<> NVMeBlockDevice::writev( uint64_t offset, ceph::bufferlist bl, @@ -442,4 +475,28 @@ read_ertr::future<> NVMeBlockDevice::nvme_read( } } +read_ertr::future<> NVMeBlockDevice::nvme_readv( + uint64_t offset, std::vector ptrs) { + struct io_t { + uint64_t offset = 0; + bufferptr ptr; + }; + std::vector iov; + size_t off = 0; + for (auto &ptr : ptrs) { + auto len = ptr.length(); + iov.emplace_back(offset + off, std::move(ptr)); + off += len; + } + return seastar::do_with( + std::move(iov), + [this](auto &iov) { + return read_ertr::parallel_for_each( + iov, + [this](auto &io) { + return nvme_read(io.offset, io.ptr.length(), io.ptr.c_str()); + }); + }); +} + } diff --git a/src/crimson/os/seastore/random_block_manager/nvme_block_device.h b/src/crimson/os/seastore/random_block_manager/nvme_block_device.h index 2ac0a5a7264f..3031b8d01917 100644 --- a/src/crimson/os/seastore/random_block_manager/nvme_block_device.h +++ b/src/crimson/os/seastore/random_block_manager/nvme_block_device.h @@ -230,9 +230,14 @@ public: read_ertr::future<> read( uint64_t offset, bufferptr &bptr) final; + read_ertr::future<> _readv( + uint64_t offset, + std::vector ptrs) final; read_ertr::future<> nvme_read( uint64_t offset, size_t len, void *buffer_ptr); + read_ertr::future<> nvme_readv( + uint64_t offset, std::vector ptrs); close_ertr::future<> close() override; diff --git a/src/crimson/os/seastore/random_block_manager/rbm_device.cc b/src/crimson/os/seastore/random_block_manager/rbm_device.cc index e81a959bbbd9..0b759e05c4d4 100644 --- a/src/crimson/os/seastore/random_block_manager/rbm_device.cc +++ b/src/crimson/os/seastore/random_block_manager/rbm_device.cc @@ -273,6 +273,24 @@ write_ertr::future<> EphemeralRBMDevice::write( return write_ertr::now(); } +read_ertr::future<> EphemeralRBMDevice::_readv( + uint64_t offset, + std::vector ptrs) { + LOG_PREFIX(EphemeralRBMDevice::_readv); + ceph_assert(buf); + DEBUG( + "EphemeralRBMDevice: read offset {} {} buffers", + offset, + ptrs.size()); + + for (auto &ptr : ptrs) { + ptr.copy_in(0, ptr.length(), buf + offset); + offset += ptr.length(); + } + + return read_ertr::now(); +} + read_ertr::future<> EphemeralRBMDevice::read( uint64_t offset, bufferptr &bptr) { diff --git a/src/crimson/os/seastore/random_block_manager/rbm_device.h b/src/crimson/os/seastore/random_block_manager/rbm_device.h index debdb03067b7..2bf2336b9de5 100644 --- a/src/crimson/os/seastore/random_block_manager/rbm_device.h +++ b/src/crimson/os/seastore/random_block_manager/rbm_device.h @@ -77,12 +77,22 @@ public: uint64_t rbm_addr = convert_paddr_to_abs_addr(addr); return read(rbm_addr, out); } + read_ertr::future<> readv( + paddr_t addr, + std::vector ptrs) final { + uint64_t rbm_addr = convert_paddr_to_abs_addr(addr); + return _readv(rbm_addr, std::move(ptrs)); + } protected: rbm_superblock_t super; rbm_shard_info_t shard_info; uint32_t device_shard_nums = 0; store_index_t store_index = 0; bool shard_status = true; + virtual read_ertr::future<> _readv( + uint64_t offset, + std::vector ptrs) = 0; + public: RBMDevice(store_index_t store_index = 0) : store_index(store_index) {} @@ -236,6 +246,9 @@ public: read_ertr::future<> read( uint64_t offset, bufferptr &bptr) override; + read_ertr::future<> _readv( + uint64_t offset, + std::vector ptrs) override; close_ertr::future<> close() override; diff --git a/src/crimson/os/seastore/segment_manager/block.cc b/src/crimson/os/seastore/segment_manager/block.cc index d871026d442c..d4f2129338b4 100644 --- a/src/crimson/os/seastore/segment_manager/block.cc +++ b/src/crimson/os/seastore/segment_manager/block.cc @@ -171,6 +171,43 @@ static read_ertr::future<> do_read( }); } +static read_ertr::future<> do_readv( + device_id_t device_id, + seastar::file &device, + uint64_t offset, + std::vector ptrs) +{ + LOG_PREFIX(block_do_readv); + std::vector iov; + size_t len = 0; + for (auto &ptr : ptrs) { + iov.emplace_back(ptr.c_str(), ptr.length()); + len += ptr.length(); + } + TRACE("{} poffset=0x{:x}~0x{:x} {} buffers", + device_id_printer_t{device_id}, offset, len, ptrs.size()); + return device.dma_read(offset, std::move(iov) + ).handle_exception( + //FIXME: this is a little bit tricky, since seastar::future::handle_exception + // returns seastar::future, to return an crimson::ct_error, we have to create + // a seastar::future holding that crimson::ct_error. This is not necessary + // once seastar::future::handle_exception() returns seastar::futurize_t + [FNAME, device_id, offset, len](auto e) -> read_ertr::future + { + ERROR("{} poffset=0x{:x}~0x{:x} got error -- {}", + device_id_printer_t{device_id}, offset, len, e); + return crimson::ct_error::input_output_error::make(); + }).then([FNAME, device_id, offset, len](auto result) -> read_ertr::future<> { + if (result != len) { + ERROR("{} poffset=0x{:x}~0x{:x} read len=0x{:x} inconsistent", + device_id_printer_t{device_id}, offset, len, result); + return crimson::ct_error::input_output_error::make(); + } + TRACE("{} poffset=0x{:x}~0x{:x} done", device_id_printer_t{device_id}, offset, len); + return read_ertr::now(); + }); +} + write_ertr::future<> SegmentStateTracker::write_out( device_id_t device_id, @@ -689,6 +726,58 @@ SegmentManager::release_ertr::future<> BlockSegmentManager::release( shard_info.tracker_offset); } +SegmentManager::read_ertr::future<> BlockSegmentManager::readv( + paddr_t addr, + std::vector ptrs) +{ + LOG_PREFIX(BlockSegmentManager::readv); + size_t len = 0; + for (auto &ptr : ptrs) { + len += ptr.length(); + } + auto& seg_addr = addr.as_seg_paddr(); + auto id = seg_addr.get_segment_id(); + auto s_id = id.device_segment_id(); + auto s_off = seg_addr.get_segment_off(); + auto p_off = get_offset(addr); + DEBUG("{} offset=0x{:x}~0x{:x} poffset=0x{:x} ...", id, s_off, len, p_off); + + assert(addr.get_device_id() == get_device_id()); + + if (s_off % superblock.block_size != 0 || + len % superblock.block_size != 0) { + ERROR("{} offset=0x{:x}~0x{:x} poffset=0x{:x} invalid read", id, s_off, len, p_off); + return crimson::ct_error::invarg::make(); + } + + if (s_id >= get_num_segments()) { + ERROR("{} offset=0x{:x}~0x{:x} poffset=0x{:x} segment-id out of range {}", + id, s_off, len, p_off, get_num_segments()); + return crimson::ct_error::invarg::make(); + } + + if (s_off + len > superblock.segment_size) { + ERROR("{} offset=0x{:x}~0x{:x} poffset=0x{:x} read out of range 0x{:x}", + id, s_off, len, p_off, superblock.segment_size); + return crimson::ct_error::invarg::make(); + } + + if (tracker->get(s_id) == segment_state_t::EMPTY) { + // XXX: not an error during scanning, + // might need refactor to increase the log level + DEBUG("{} offset=0x{:x}~0x{:x} poffset=0x{:x} invalid state {}", + id, s_off, len, p_off, tracker->get(s_id)); + return crimson::ct_error::enoent::make(); + } + + stats.data_read.increment(len); + return do_readv( + get_device_id(), + device, + p_off, + std::move(ptrs)); +} + SegmentManager::read_ertr::future<> BlockSegmentManager::read( paddr_t addr, size_t len, diff --git a/src/crimson/os/seastore/segment_manager/block.h b/src/crimson/os/seastore/segment_manager/block.h index cb2d00d0fa8d..515772e7736c 100644 --- a/src/crimson/os/seastore/segment_manager/block.h +++ b/src/crimson/os/seastore/segment_manager/block.h @@ -145,6 +145,8 @@ public: paddr_t addr, size_t len, ceph::bufferptr &out) final; + read_ertr::future<> readv( + paddr_t addr, std::vector vecs) final; read_ertr::future get_shard_nums() final; diff --git a/src/crimson/os/seastore/segment_manager/ephemeral.cc b/src/crimson/os/seastore/segment_manager/ephemeral.cc index 6b34e5df0939..393c20ed0429 100644 --- a/src/crimson/os/seastore/segment_manager/ephemeral.cc +++ b/src/crimson/os/seastore/segment_manager/ephemeral.cc @@ -290,4 +290,47 @@ SegmentManager::read_ertr::future<> EphemeralSegmentManager::read( }); } +SegmentManager::read_ertr::future<> EphemeralSegmentManager::readv( + paddr_t addr, + std::vector ptrs) +{ + size_t len = 0; + for (auto &ptr : ptrs) { + len += ptr.length(); + } + auto& seg_addr = addr.as_seg_paddr(); + if (seg_addr.get_segment_id().device_segment_id() >= get_num_segments()) { + logger().error( + "EphemeralSegmentManager::readv: invalid segment {}", + addr); + return crimson::ct_error::invarg::make(); + } + + if (seg_addr.get_segment_off() + len > config.segment_size) { + logger().error( + "EphemeralSegmentManager::read: invalid offset {}~0x{:x}!", + addr, + len); + return crimson::ct_error::invarg::make(); + } + + auto offset = get_offset(addr); + for (auto &ptr : ptrs) { + ptr.copy_in(0, ptr.length(), buffer + offset); + offset += ptr.length(); + } + + logger().debug( + "segment_read to segment {} at offset 0x{:x}, " + "physical offset 0x{:x}, length 0x{:x}", + seg_addr.get_segment_id().device_segment_id(), + seg_addr.get_segment_off(), + get_offset(addr), + len); + + return read_ertr::now().safe_then([] { + return seastar::yield(); + }); +} + } diff --git a/src/crimson/os/seastore/segment_manager/ephemeral.h b/src/crimson/os/seastore/segment_manager/ephemeral.h index 95bff4b1affe..dc9580a11d5a 100644 --- a/src/crimson/os/seastore/segment_manager/ephemeral.h +++ b/src/crimson/os/seastore/segment_manager/ephemeral.h @@ -121,6 +121,10 @@ public: size_t len, ceph::bufferptr &out) final; + read_ertr::future<> readv( + paddr_t addr, + std::vector ptr) final; + size_t get_available_size() const final { return config.size; } diff --git a/src/crimson/os/seastore/segment_manager/zbd.cc b/src/crimson/os/seastore/segment_manager/zbd.cc index 3eef55cc2b24..f0f3391f7d96 100644 --- a/src/crimson/os/seastore/segment_manager/zbd.cc +++ b/src/crimson/os/seastore/segment_manager/zbd.cc @@ -392,6 +392,43 @@ static read_ertr::future<> do_read( }); } +static read_ertr::future<> do_readv( + device_id_t device_id, + seastar::file &device, + uint64_t offset, + std::vector ptrs) +{ + LOG_PREFIX(block_do_readv); + std::vector iov; + size_t len = 0; + for (auto &ptr : ptrs) { + iov.emplace_back(ptr.c_str(), ptr.length()); + len += ptr.length(); + } + TRACE("{} poffset=0x{:x}~0x{:x} {} buffers", + device_id_printer_t{device_id}, offset, len, vecs.size()); + return device.dma_read(offset, std::move(iov) + ).handle_exception( + //FIXME: this is a little bit tricky, since seastar::future::handle_exception + // returns seastar::future, to return an crimson::ct_error, we have to create + // a seastar::future holding that crimson::ct_error. This is not necessary + // once seastar::future::handle_exception() returns seastar::futurize_t + [FNAME, device_id, offset, len](auto e) -> read_ertr::future + { + ERROR("{} poffset=0x{:x}~0x{:x} got error -- {}", + device_id_printer_t{device_id}, offset, len, e); + return crimson::ct_error::input_output_error::make(); + }).then([FNAME, device_id, offset, len](auto result) -> read_ertr::future<> { + if (result != len) { + ERROR("{} poffset=0x{:x}~0x{:x} read len=0x{:x} inconsistent", + device_id_printer_t{device_id}, offset, len, result); + return crimson::ct_error::input_output_error::make(); + } + TRACE("{} poffset=0x{:x}~0x{:x} done", device_id_printer_t{device_id}, offset, len); + return read_ertr::now(); + }); +} + static ZBDSegmentManager::access_ertr::future read_metadata(seastar::file &device, seastar::stat_data sd) @@ -695,6 +732,35 @@ ZBDSegmentManager::release_ertr::future<> ZBDSegmentManager::release( }); } +SegmentManager::read_ertr::future<> ZBDSegmentManager::readv( + paddr_t addr, + std::vector ptrs) +{ + LOG_PREFIX(ZBDSegmentManager::readv); + size_t len = 0; + for (auto &ptr : ptrs) { + len += ptr.length(); + } + auto& seg_addr = addr.as_seg_paddr(); + if (seg_addr.get_segment_id().device_segment_id() >= get_num_segments()) { + ERROR("invalid segment {}", + seg_addr.get_segment_id().device_segment_id()); + return crimson::ct_error::invarg::make(); + } + + if (seg_addr.get_segment_off() + len > metadata.segment_capacity) { + ERROR("invalid read offset {}, len 0x{:x}", + addr, + len); + return crimson::ct_error::invarg::make(); + } + return do_readv( + get_device_id(), + device, + get_offset(addr), + std::move(ptrs)); +} + SegmentManager::read_ertr::future<> ZBDSegmentManager::read( paddr_t addr, size_t len, diff --git a/src/crimson/os/seastore/segment_manager/zbd.h b/src/crimson/os/seastore/segment_manager/zbd.h index cff300124d99..4163991fbec0 100644 --- a/src/crimson/os/seastore/segment_manager/zbd.h +++ b/src/crimson/os/seastore/segment_manager/zbd.h @@ -146,6 +146,9 @@ namespace crimson::os::seastore::segment_manager::zbd { paddr_t addr, size_t len, ceph::bufferptr &out) final; + read_ertr::future<> readv( + paddr_t addr, + std::vector ptrs) final; read_ertr::future get_shard_nums() final;