From b27b0b68c39d8102ed3fa42dd3f63a5d671d03dc Mon Sep 17 00:00:00 2001
From: Alex Ainscow <aainscow@uk.ibm.com>
Date: Tue, 12 Aug 2025 17:12:45 +0100
Subject: [PATCH] osd: Clear zero_for_decode for shards where read failed on
 recovery

Not clearing this can lead to a failed decode, which panics, rather than
a recovery or IO failure.

Signed-off-by: Alex Ainscow <aainscow@uk.ibm.com>
(cherry picked from commit 6365803275b1b6a142200cc2db9735d48c86ae03)
---
 src/osd/ECBackend.cc | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc
index 4a6530c14b1..e1477b737d6 100644
--- a/src/osd/ECBackend.cc
+++ b/src/osd/ECBackend.cc
@@ -707,13 +707,10 @@ void ECBackend::handle_sub_read_reply(
     rop.debug_log.emplace_back(ECUtil::ERROR, op.from, complete.buffers_read);
     complete.buffers_read.erase_shard(from.shard);
     complete.processed_read_requests.erase(from.shard);
-    // If we are doing redundant reads, then we must take care that any failed
-    // reads are not replaced with a zero buffer. When fast_reads are disabled,
-    // the send_all_remaining_reads() call will replace the zeros_for_decode
-    // based on the recovery read.
-    if (rop.do_redundant_reads) {
-      rop.to_read.at(hoid).zeros_for_decode.erase(from.shard);
-    }
+    // If there was an error for non-zero data on this shard, then we must also
+    // ignore all zeros, or minimum_to_decode may conclude that it has enough
+    // shards available.
+    rop.to_read.at(hoid).zeros_for_decode.erase(from.shard);
     dout(20) << __func__ << " shard=" << from << " error=" << err << dendl;
   }
 
-- 
2.39.5