From f7a164a892363cc45a78a6b909e160380549c0ed Mon Sep 17 00:00:00 2001 From: Zengran Zhang Date: Mon, 29 Jun 2020 11:37:16 +0800 Subject: [PATCH] osd/ECBackend: optimize remaining read as readop contain multiple objects in github.com/ceph/ceph/pull/21911 we s/rop.to_read.swap/rop.to_read.erase/ in send_all_remaining_reads(...), and then introduce a new little issue: if one of to_read objects meet shard error, we will only change the uncompleted object's to read, but do_read_op will resend the completed objects shard read, because we did not modify their to_read's need. Fixes: https://tracker.ceph.com/issues/46876 Signed-off-by: Zengran Zhang --- src/osd/ECBackend.cc | 13 ++++++++++--- src/osd/ECBackend.h | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 6c33e6c4052..5d5edd42476 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -1248,6 +1248,7 @@ void ECBackend::handle_sub_read_reply( ceph_assert(rop.in_progress.count(from)); rop.in_progress.erase(from); unsigned is_complete = 0; + bool need_resend = false; // For redundant reads check for completion as each shard comes in, // or in a non-recovery read check for completion once all the shards read. if (rop.do_redundant_reads || rop.in_progress.empty()) { @@ -1274,7 +1275,8 @@ void ECBackend::handle_sub_read_reply( if (!rop.do_redundant_reads) { int r = send_all_remaining_reads(iter->first, rop); if (r == 0) { - // We added to in_progress and not incrementing is_complete + // We changed the rop's to_read and not incrementing is_complete + need_resend = true; continue; } // Couldn't read any additional shards so handle as completed with errors @@ -1302,11 +1304,17 @@ void ECBackend::handle_sub_read_reply( rop.complete[iter->first].errors.clear(); } } + // avoid re-read for completed object as we may send remaining reads for uncopmpleted objects + rop.to_read.at(iter->first).need.clear(); + rop.to_read.at(iter->first).want_attrs = false; ++is_complete; } } } - if (rop.in_progress.empty() || is_complete == rop.complete.size()) { + if (need_resend) { + do_read_op(rop); + } else if (rop.in_progress.empty() || + is_complete == rop.complete.size()) { dout(20) << __func__ << " Complete: " << rop << dendl; rop.trace.event("ec read complete"); complete_read_op(rop, m); @@ -2452,7 +2460,6 @@ int ECBackend::send_all_remaining_reads( shards, want_attrs, c))); - do_read_op(rop); return 0; } diff --git a/src/osd/ECBackend.h b/src/osd/ECBackend.h index d659d64dff6..d833b4f00f5 100644 --- a/src/osd/ECBackend.h +++ b/src/osd/ECBackend.h @@ -352,8 +352,8 @@ public: }; struct read_request_t { const std::list > to_read; - const std::map>> need; - const bool want_attrs; + std::map>> need; + bool want_attrs; GenContext &> *cb; read_request_t( const std::list > &to_read, -- 2.47.3