]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Clear zero_for_decode for shards where read failed on recovery
authorAlex Ainscow <aainscow@uk.ibm.com>
Tue, 12 Aug 2025 16:12:45 +0000 (17:12 +0100)
committerAlex Ainscow <aainscow@uk.ibm.com>
Thu, 4 Sep 2025 10:36:06 +0000 (11:36 +0100)
Not clearing this can lead to a failed decode, which panics, rather than
a recovery or IO failure.

Signed-off-by: Alex Ainscow <aainscow@uk.ibm.com>
src/osd/ECBackend.cc

index 354ddbb364784f2d9bba6f1bf0b0196ce01e15ce..6567f4fad1db526984306c49802b6d987b04db99 100644 (file)
@@ -707,13 +707,10 @@ void ECBackend::handle_sub_read_reply(
     rop.debug_log.emplace_back(ECUtil::ERROR, op.from, complete.buffers_read);
     complete.buffers_read.erase_shard(from.shard);
     complete.processed_read_requests.erase(from.shard);
-    // If we are doing redundant reads, then we must take care that any failed
-    // reads are not replaced with a zero buffer. When fast_reads are disabled,
-    // the send_all_remaining_reads() call will replace the zeros_for_decode
-    // based on the recovery read.
-    if (rop.do_redundant_reads) {
-      rop.to_read.at(hoid).zeros_for_decode.erase(from.shard);
-    }
+    // If there was an error for non-zero data on this shard, then we must also
+    // ignore all zeros, or minimum_to_decode may conclude that it has enough
+    // shards available.
+    rop.to_read.at(hoid).zeros_for_decode.erase(from.shard);
     dout(20) << __func__ << " shard=" << from << " error=" << err << dendl;
   }