]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: Clear zero_for_decode for shards where read failed on recovery
authorAlex Ainscow <aainscow@uk.ibm.com>
Tue, 12 Aug 2025 16:12:45 +0000 (17:12 +0100)
committerJon <jonathan.bailey1@ibm.com>
Fri, 3 Oct 2025 13:31:25 +0000 (14:31 +0100)
Not clearing this can lead to a failed decode, which panics, rather than
a recovery or IO failure.

Signed-off-by: Alex Ainscow <aainscow@uk.ibm.com>
(cherry picked from commit 6365803275b1b6a142200cc2db9735d48c86ae03)

src/osd/ECBackend.cc

index 4a6530c14b1cdacdfae1e46d8ebf27c245b60830..e1477b737d65339a458c1ceb93442d10a01e1cb0 100644 (file)
@@ -707,13 +707,10 @@ void ECBackend::handle_sub_read_reply(
     rop.debug_log.emplace_back(ECUtil::ERROR, op.from, complete.buffers_read);
     complete.buffers_read.erase_shard(from.shard);
     complete.processed_read_requests.erase(from.shard);
-    // If we are doing redundant reads, then we must take care that any failed
-    // reads are not replaced with a zero buffer. When fast_reads are disabled,
-    // the send_all_remaining_reads() call will replace the zeros_for_decode
-    // based on the recovery read.
-    if (rop.do_redundant_reads) {
-      rop.to_read.at(hoid).zeros_for_decode.erase(from.shard);
-    }
+    // If there was an error for non-zero data on this shard, then we must also
+    // ignore all zeros, or minimum_to_decode may conclude that it has enough
+    // shards available.
+    rop.to_read.at(hoid).zeros_for_decode.erase(from.shard);
     dout(20) << __func__ << " shard=" << from << " error=" << err << dendl;
   }