From c4370146748f20746c4be2f0b3668e9526e1ecd8 Mon Sep 17 00:00:00 2001 From: Alex Ainscow Date: Fri, 3 Oct 2025 14:24:49 +0100 Subject: [PATCH] osd: Add extent_to_shard_extent interface to PGBackend. This allows a backend to expose how an object offset/length translates to an offset/length on a particular shard. For Replica, this is trivial. For EC, this means looking up the start and end offsets, then translating this to shard address space. Signed-off-by: Alex Ainscow --- src/osd/ECBackend.cc | 24 ++++++++++++++++++++++++ src/osd/ECBackend.h | 2 ++ src/osd/ECSwitch.h | 8 ++++++++ src/osd/PGBackend.h | 5 +++++ 4 files changed, 39 insertions(+) diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 6c7695c8845e1..bf134d6759152 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -1010,6 +1010,30 @@ int ECBackend::objects_read_sync( return -EOPNOTSUPP; } +std::pair ECBackend::extent_to_shard_extent(uint64_t off, uint64_t len) { + // sync reads are supported for sub-chunk reads where no reconstruct is + // required. + uint64_t chunk_size = sinfo.get_chunk_size(); + uint64_t start_chunk = off / chunk_size; + // This calculation is wrong for length = 0, but it doesn't matter if these reads get sent to the primary + uint64_t end_chunk = (off + len - 1) / chunk_size; + uint64_t shard_offset, shard_len; + shard_id_t shard = get_parent()->whoami_shard().shard; + raw_shard_id_t raw_shard = sinfo.get_raw_shard(shard); + + if (end_chunk == start_chunk) { + shard_offset = sinfo.ro_offset_to_shard_offset(off, raw_shard); + shard_len = len; + } else { + ECUtil::shard_extent_set_t full_read(sinfo.get_k_plus_m()); + sinfo.ro_range_to_shard_extent_set(off, len, full_read); + shard_offset = full_read[shard].range_start(); + shard_len = full_read[shard].range_end() - shard_offset; + } + + return std::pair(shard_offset, shard_len); +} + void ECBackend::objects_read_async( const hobject_t &hoid, uint64_t object_size, diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h index ea8c72ddde2d9..d3c490b1581f0 100644 --- a/src/osd/ECBackend.h +++ b/src/osd/ECBackend.h @@ -138,6 +138,8 @@ class ECBackend : public ECCommon { ceph::buffer::list *bl ); + std::pair extent_to_shard_extent(uint64_t off, uint64_t len); + /** * Async read mechanism * diff --git a/src/osd/ECSwitch.h b/src/osd/ECSwitch.h index 8775c86625811..a8b133456f635 100644 --- a/src/osd/ECSwitch.h +++ b/src/osd/ECSwitch.h @@ -267,6 +267,14 @@ public: return legacy.objects_read_sync(hoid, off, len, op_flags, bl); } + std::pair extent_to_shard_extent( + uint64_t off, uint64_t len) override { + if (is_optimized()) { + return optimized.extent_to_shard_extent(off, len); + } + ceph_abort_msg("Extent conversion not supported in legacy EC"); + } + void objects_read_async( const hobject_t &hoid, uint64_t object_size, diff --git a/src/osd/PGBackend.h b/src/osd/PGBackend.h index 5914f558b9aac..a3110b6fe5593 100644 --- a/src/osd/PGBackend.h +++ b/src/osd/PGBackend.h @@ -610,6 +610,11 @@ typedef std::shared_ptr OSDMapRef; return -EOPNOTSUPP; } + virtual std::pair extent_to_shard_extent( + uint64_t off, uint64_t len) { + return std::pair(off, len); + } + virtual void objects_read_async( const hobject_t &hoid, uint64_t object_size, -- 2.39.5