]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: accept undecodable multi-block bluefs transactions on log 42830/head
authorIgor Fedotov <ifedotov@suse.com>
Wed, 18 Aug 2021 10:39:02 +0000 (13:39 +0300)
committerIgor Fedotov <ifedotov@suse.com>
Wed, 18 Aug 2021 16:18:43 +0000 (19:18 +0300)
replay.

We should proceed with OSD startup when detecting undecodable bluefs
transaction spanning multiple disk blocks during log replay.
The rationale is that such a transaction might appear during unexpected
power down - just not every disk block is written to disk. Hence we can
consider this a normal log replay stop condition.

https://tracker.ceph.com/issues/52079

Signed-off-by: Igor Fedotov <ifedotov@suse.com>
src/os/bluestore/BlueFS.cc

index 3a653f425d043392016acae6c17c1aad8f0c2e94..a0d4b1161d09c69b4f4c7fb17f00fa008c580515 100644 (file)
@@ -1152,18 +1152,27 @@ int BlueFS::_replay(bool noop, bool to_stdout)
       bl.claim_append(t);
       read_pos += r;
     }
-    seen_recs = true;
     bluefs_transaction_t t;
     try {
       auto p = bl.cbegin();
       decode(t, p);
+      seen_recs = true;
     }
     catch (ceph::buffer::error& e) {
-      derr << __func__ << " 0x" << std::hex << pos << std::dec
-           << ": stop: failed to decode: " << e.what()
-           << dendl;
-      delete log_reader;
-      return -EIO;
+      // Multi-block transactions might be incomplete due to unexpected
+      // power off. Hence let's treat that as a regular stop condition.
+      if (seen_recs && more) {
+        dout(10) << __func__ << " 0x" << std::hex << pos << std::dec
+                 << ": stop: failed to decode: " << e.what()
+                 << dendl;
+      } else {
+        derr << __func__ << " 0x" << std::hex << pos << std::dec
+             << ": stop: failed to decode: " << e.what()
+             << dendl;
+        delete log_reader;
+        return -EIO;
+      }
+      break;
     }
     ceph_assert(seq == t.seq);
     dout(10) << __func__ << " 0x" << std::hex << pos << std::dec