From c4f19f1d29d24147575beac1afc2fb28d939f121 Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Tue, 12 Mar 2024 18:06:06 +0000 Subject: [PATCH] osd: glue read scheduling with decoding in terms of want_to_read Signed-off-by: Radoslaw Zarzynski --- src/erasure-code/ErasureCode.cc | 37 ++++++------------- src/erasure-code/ErasureCode.h | 5 ++- src/erasure-code/ErasureCodeInterface.h | 11 +++++- src/osd/ECBackend.cc | 3 +- src/osd/ECCommon.cc | 8 +++- src/osd/ECCommon.h | 3 +- src/osd/ECUtil.cc | 7 +++- src/osd/ECUtil.h | 1 + .../erasure-code/TestErasureCodeExample.cc | 5 ++- .../erasure-code/ceph-erasure-code-tool.cc | 7 +++- 10 files changed, 51 insertions(+), 36 deletions(-) diff --git a/src/erasure-code/ErasureCode.cc b/src/erasure-code/ErasureCode.cc index 1a44a55340359..39a96e0f1e123 100644 --- a/src/erasure-code/ErasureCode.cc +++ b/src/erasure-code/ErasureCode.cc @@ -348,33 +348,10 @@ int ErasureCode::to_string(const std::string &name, return 0; } -int ErasureCode::decode_concat(const map &chunks, +int ErasureCode::decode_concat(const set& want_to_read, + const map &chunks, bufferlist *decoded) { - set want_to_read; - set decode_chunks; - bool need_decode = false; - - for (unsigned int i = 0; i < get_data_chunk_count(); i++) { - want_to_read.insert(chunk_index(i)); - } - if (g_conf()->osd_ec_partial_reads && - chunks.size() < get_data_chunk_count()) { - // for partial_read - for (const auto& [key, bl] : chunks) { - if (want_to_read.contains(key)) { - decode_chunks.insert(key); - } else { - need_decode = true; - break; - } - } - if (!need_decode) { - // we need to decode if the input `chunks` contains anything else - // than data chunks (which boils down into coding chunks) - want_to_read.swap(decode_chunks); - } - } map decoded_map; int r = _decode(want_to_read, chunks, &decoded_map); if (r == 0) { @@ -387,6 +364,16 @@ int ErasureCode::decode_concat(const map &chunks, return r; } +int ErasureCode::decode_concat(const map &chunks, + bufferlist *decoded) +{ + set want_to_read; + for (unsigned int i = 0; i < get_data_chunk_count(); i++) { + want_to_read.insert(chunk_index(i)); + } + return decode_concat(want_to_read, chunks, decoded); +} + bool ErasureCode::is_systematic() const { return true; } diff --git a/src/erasure-code/ErasureCode.h b/src/erasure-code/ErasureCode.h index 5813109b70038..5a4b3e5185b32 100644 --- a/src/erasure-code/ErasureCode.h +++ b/src/erasure-code/ErasureCode.h @@ -112,8 +112,11 @@ namespace ceph { const std::string &default_value, std::ostream *ss); + int decode_concat(const std::set& want_to_read, + const std::map &chunks, + bufferlist *decoded) override; int decode_concat(const std::map &chunks, - bufferlist *decoded) override; + bufferlist *decoded) override; bool is_systematic() const override; diff --git a/src/erasure-code/ErasureCodeInterface.h b/src/erasure-code/ErasureCodeInterface.h index 2e5c3d91e3ace..fb4463c9590ef 100644 --- a/src/erasure-code/ErasureCodeInterface.h +++ b/src/erasure-code/ErasureCodeInterface.h @@ -453,10 +453,17 @@ namespace ceph { * * Returns 0 on success. * - * @param [in] chunks map chunk indexes to chunk data - * @param [out] decoded concatenante of the data chunks + * @param [in] want_to_read mapped std::set of chunks caller wants + * concatenated to `decoded`. This works as + * selectors for `chunks` + * @param [in] chunks set of chunks with data available for decoding + * @param [out] decoded must be non-null, chunks specified in `want_to_read` + * will be concatenated into `decoded` in index order * @return **0** on success or a negative errno on error. */ + virtual int decode_concat(const std::set& want_to_read, + const std::map &chunks, + bufferlist *decoded) = 0; virtual int decode_concat(const std::map &chunks, bufferlist *decoded) = 0; diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 7095995ef9737..188004e64596e 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -471,7 +471,8 @@ struct RecoveryReadCompleter : ECCommon::ReadCompleter { void finish_single_request( const hobject_t &hoid, ECCommon::read_result_t &res, - list) override + list, + set wanted_to_read) override { if (!(res.r == 0 && res.errors.empty())) { backend._failed_push(hoid, res); diff --git a/src/osd/ECCommon.cc b/src/osd/ECCommon.cc index d3e5a50fafe1d..3f2ae7601207d 100644 --- a/src/osd/ECCommon.cc +++ b/src/osd/ECCommon.cc @@ -177,10 +177,12 @@ void ECCommon::ReadPipeline::complete_read_op(ReadOp &rop) rop.complete.begin(); ceph_assert(rop.to_read.size() == rop.complete.size()); for (; req_iter != rop.to_read.end(); ++req_iter, ++resiter) { + ceph_assert(rop.want_to_read.contains(req_iter->first)); rop.on_complete->finish_single_request( req_iter->first, resiter->second, - req_iter->second.to_read); + req_iter->second.to_read, + rop.want_to_read[req_iter->first]); } ceph_assert(rop.on_complete); std::move(*rop.on_complete).finish(rop.priority); @@ -506,7 +508,8 @@ struct ClientReadCompleter : ECCommon::ReadCompleter { void finish_single_request( const hobject_t &hoid, ECCommon::read_result_t &res, - list to_read) override + list to_read, + set wanted_to_read) override { extent_map result; if (res.r != 0) @@ -531,6 +534,7 @@ struct ClientReadCompleter : ECCommon::ReadCompleter { int r = ECUtil::decode( read_pipeline.sinfo, read_pipeline.ec_impl, + wanted_to_read, to_decode, &bl); if (r < 0) { diff --git a/src/osd/ECCommon.h b/src/osd/ECCommon.h index 3262ee5a239b6..6d18901fbde60 100644 --- a/src/osd/ECCommon.h +++ b/src/osd/ECCommon.h @@ -285,7 +285,8 @@ struct ECCommon { virtual void finish_single_request( const hobject_t &hoid, read_result_t &res, - std::list to_read) = 0; + std::list to_read, + std::set wanted_to_read) = 0; virtual void finish(int priority) && = 0; diff --git a/src/osd/ECUtil.cc b/src/osd/ECUtil.cc index 9a4d0d9adb9e8..44e3665844931 100644 --- a/src/osd/ECUtil.cc +++ b/src/osd/ECUtil.cc @@ -24,8 +24,10 @@ std::pair ECUtil::stripe_info_t::aligned_offset_len_to_chunk int ECUtil::decode( const stripe_info_t &sinfo, ErasureCodeInterfaceRef &ec_impl, + const set want_to_read, map &to_decode, - bufferlist *out) { + bufferlist *out) +{ ceph_assert(to_decode.size()); uint64_t total_data_size = to_decode.begin()->second.length(); @@ -51,8 +53,9 @@ int ECUtil::decode( chunks[j->first].substr_of(j->second, i, sinfo.get_chunk_size()); } bufferlist bl; - int r = ec_impl->decode_concat(chunks, &bl); + int r = ec_impl->decode_concat(want_to_read, chunks, &bl); ceph_assert(r == 0); + ceph_assert(bl.length() % sinfo.get_chunk_size() == 0); if (!g_conf()->osd_ec_partial_reads) { ceph_assert(bl.length() == sinfo.get_stripe_width()); } diff --git a/src/osd/ECUtil.h b/src/osd/ECUtil.h index f153f795964c1..a835ec3226476 100644 --- a/src/osd/ECUtil.h +++ b/src/osd/ECUtil.h @@ -97,6 +97,7 @@ public: int decode( const stripe_info_t &sinfo, ceph::ErasureCodeInterfaceRef &ec_impl, + const std::set want_to_read, std::map &to_decode, ceph::buffer::list *out); diff --git a/src/test/erasure-code/TestErasureCodeExample.cc b/src/test/erasure-code/TestErasureCodeExample.cc index 9fcf5c3632885..ce89cba337245 100644 --- a/src/test/erasure-code/TestErasureCodeExample.cc +++ b/src/test/erasure-code/TestErasureCodeExample.cc @@ -200,7 +200,10 @@ TEST(ErasureCodeExample, decode) // partial chunk decode map partial_decode; partial_decode[0] = encoded[0]; - EXPECT_EQ(0, example.decode_concat(partial_decode, &out)); + set partial_want_to_read{want_to_encode, want_to_encode+1}; + EXPECT_EQ(0, example.decode_concat(partial_want_to_read, + partial_decode, + &out)); // cannot recover map degraded; diff --git a/src/tools/erasure-code/ceph-erasure-code-tool.cc b/src/tools/erasure-code/ceph-erasure-code-tool.cc index 39f16a8cbbcc4..51343f7d615e4 100644 --- a/src/tools/erasure-code/ceph-erasure-code-tool.cc +++ b/src/tools/erasure-code/ceph-erasure-code-tool.cc @@ -260,6 +260,8 @@ int do_decode(const std::vector &args) { ceph::bufferlist decoded_data; std::string fname = args[3]; + std::set want_to_read; + const auto chunk_mapping = ec_impl->get_chunk_mapping(); for (auto &[shard, bl] : encoded_data) { std::string name = fname + "." + stringify(shard); std::string error; @@ -268,9 +270,12 @@ int do_decode(const std::vector &args) { std::cerr << "failed to read " << name << ": " << error << std::endl; return 1; } + auto chunk = static_cast(chunk_mapping.size()) > shard ? + chunk_mapping[shard] : shard; + want_to_read.insert(chunk); } - r = ECUtil::decode(*sinfo, ec_impl, encoded_data, &decoded_data); + r = ECUtil::decode(*sinfo, ec_impl, want_to_read, encoded_data, &decoded_data); if (r < 0) { std::cerr << "failed to decode: " << cpp_strerror(r) << std::endl; return 1; -- 2.39.5