From bd9d44dd89a7fa587182cf49c1f444c79126a99e Mon Sep 17 00:00:00 2001 From: Alex Ainscow Date: Fri, 6 Jun 2025 12:09:04 +0100 Subject: [PATCH] osd: Optimised EC should avoid decodes off the end of objects. This was a particular edge case whereby the need to do an encode and a decode as part of recovery was causing EC to attempt to do a decode off the end of the shard, despite this being unnecessary. Signed-off-by: Alex Ainscow --- src/osd/ECUtil.cc | 25 ++++++++++++++++++--- src/osd/ECUtil.h | 2 ++ src/test/osd/TestECBackend.cc | 42 +++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/src/osd/ECUtil.cc b/src/osd/ECUtil.cc index e931e68d086d9..51a00b7c27ccc 100644 --- a/src/osd/ECUtil.cc +++ b/src/osd/ECUtil.cc @@ -600,8 +600,25 @@ void shard_extent_map_t::pad_on_shards(const shard_extent_set_t &pad_to, } } +void shard_extent_map_t::pad_on_shard(const extent_set &pad_to, + const shard_id_t shard) { + + if (pad_to.size() == 0) { + return; + } + for (auto &[off, length] : pad_to) { + bufferlist bl; + bl.push_back(buffer::create_aligned(length, EC_ALIGN_SIZE)); + insert_in_shard(shard, off, bl); + } +} + void shard_extent_map_t::pad_on_shards(const extent_set &pad_to, const shard_id_set &shards) { + + if (pad_to.size() == 0) { + return; + } for (auto &shard : shards) { for (auto &[off, length] : pad_to) { bufferlist bl; @@ -667,6 +684,8 @@ int shard_extent_map_t::decode(const ErasureCodeInterfaceRef &ec_impl, shard_id_set decode_set = shard_id_set::intersection(need_set, sinfo->get_data_shards()); shard_id_set encode_set = shard_id_set::intersection(need_set, sinfo->get_parity_shards()); + shard_extent_set_t read_mask(sinfo->get_k_plus_m()); + sinfo->ro_size_to_read_mask(object_size, read_mask); int r = 0; if (!decode_set.empty()) { pad_on_shards(want, decode_set); @@ -674,11 +693,11 @@ int shard_extent_map_t::decode(const ErasureCodeInterfaceRef &ec_impl, * shards are decoded. The get_min_available functions should have already * worked out what needs to be read for this. */ - extent_set decode_for_parity; for (auto shard : encode_set) { - decode_for_parity.insert(want.at(shard)); + extent_set decode_for_parity; + decode_for_parity.intersection_of(want.at(shard), read_mask.at(shard)); + pad_on_shard(decode_for_parity, shard); } - pad_on_shards(decode_for_parity, decode_set); r = _decode(ec_impl, want_set, decode_set); } if (!r && !encode_set.empty()) { diff --git a/src/osd/ECUtil.h b/src/osd/ECUtil.h index e859b016baa32..f6f21b05353c6 100644 --- a/src/osd/ECUtil.h +++ b/src/osd/ECUtil.h @@ -950,6 +950,8 @@ public: const shard_id_set &shards); void pad_on_shards(const extent_set &pad_to, const shard_id_set &shards); + void pad_on_shard(const extent_set &pad_to, + const shard_id_t shard); void trim(const shard_extent_set_t &trim_to); int decode(const ErasureCodeInterfaceRef &ec_impl, const shard_extent_set_t &want, diff --git a/src/test/osd/TestECBackend.cc b/src/test/osd/TestECBackend.cc index a77b7ed239b3f..efcdf6f39f90a 100644 --- a/src/test/osd/TestECBackend.cc +++ b/src/test/osd/TestECBackend.cc @@ -1306,3 +1306,45 @@ TEST(ECCommon, decode) ceph_assert(0 == semap.decode(ec_impl, want, 2*1024*1024)); } + + +TEST(ECCommon, decode2) +{ + const unsigned int k = 4; + const unsigned int m = 2; + const uint64_t align_size = EC_ALIGN_SIZE; + const uint64_t swidth = k*align_size; + + + ECUtil::stripe_info_t s(k, m, swidth, vector(0)); + ECListenerStub listenerStub; + ASSERT_EQ(s.get_stripe_width(), swidth); + ASSERT_EQ(s.get_chunk_size(), swidth/k); + + const std::vector chunk_mapping = {}; // no remapping + ErasureCodeDummyImpl *ecode = new ErasureCodeDummyImpl(); + ecode->data_chunk_count = k; + ecode->chunk_count = k + m; + ErasureCodeInterfaceRef ec_impl(ecode); + ECCommon::ReadPipeline pipeline(g_ceph_context, ec_impl, s, &listenerStub); + + ECUtil::shard_extent_map_t semap(&s); + bufferlist bl528k; + bl528k.append_zero(528*1024); + bufferlist bl524k; + bl524k.append_zero(524*1024); + semap.insert_in_shard(shard_id_t(0), 0, bl524k); + semap.insert_in_shard(shard_id_t(1), 0, bl528k); + semap.insert_in_shard(shard_id_t(3), 0, bl524k); + semap.insert_in_shard(shard_id_t(4), 0, bl528k); + ECUtil::shard_extent_set_t want(k + m); + + //shard_want_to_read={1:[0~540672],2:[0~536576],3:[0~536576],4:[0~540672],5:[0~540672]} + want[shard_id_t(1)].insert(0, 528*1024); + want[shard_id_t(2)].insert(0, 524*1024); + want[shard_id_t(3)].insert(0, 524*1024); + want[shard_id_t(4)].insert(0, 528*1024); + want[shard_id_t(5)].insert(0, 528*1024); + + ceph_assert(0 == semap.decode(ec_impl, want, 2104*1024)); +} \ No newline at end of file -- 2.39.5