From b27b0b68c39d8102ed3fa42dd3f63a5d671d03dc Mon Sep 17 00:00:00 2001 From: Alex Ainscow Date: Tue, 12 Aug 2025 17:12:45 +0100 Subject: [PATCH] osd: Clear zero_for_decode for shards where read failed on recovery Not clearing this can lead to a failed decode, which panics, rather than a recovery or IO failure. Signed-off-by: Alex Ainscow (cherry picked from commit 6365803275b1b6a142200cc2db9735d48c86ae03) --- src/osd/ECBackend.cc | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 4a6530c14b1..e1477b737d6 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -707,13 +707,10 @@ void ECBackend::handle_sub_read_reply( rop.debug_log.emplace_back(ECUtil::ERROR, op.from, complete.buffers_read); complete.buffers_read.erase_shard(from.shard); complete.processed_read_requests.erase(from.shard); - // If we are doing redundant reads, then we must take care that any failed - // reads are not replaced with a zero buffer. When fast_reads are disabled, - // the send_all_remaining_reads() call will replace the zeros_for_decode - // based on the recovery read. - if (rop.do_redundant_reads) { - rop.to_read.at(hoid).zeros_for_decode.erase(from.shard); - } + // If there was an error for non-zero data on this shard, then we must also + // ignore all zeros, or minimum_to_decode may conclude that it has enough + // shards available. + rop.to_read.at(hoid).zeros_for_decode.erase(from.shard); dout(20) << __func__ << " shard=" << from << " error=" << err << dendl; } -- 2.39.5