]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore: Verify checksum on full_extent reads only
authorMatan Breizman <mbreizma@redhat.com>
Wed, 3 Dec 2025 13:02:27 +0000 (13:02 +0000)
committerMatan Breizman <mbreizma@redhat.com>
Wed, 3 Dec 2025 18:08:20 +0000 (18:08 +0000)
Prior to getting the absent extent, define `is_full_extent` which would
determine if this is a full extent read request.

Verify checksum for full_extent_reads only. Otherwise, when `is_full_extent`
is false. A diffrent transaction might have fully loaded our
extent and would verify the checksum. Our current transaction data
is incomplete and might have an outdated checksum.

See:
```
      // NOTE: We should skip crc verification in this case.
      // There is a data race that makes checksum verification unsafe.
      // Consider a scenario where the current read request only requires a
      // portion of data from the extent. After Cache::read_extent finishes
      // but before this lambda is invoked, another transaction/continuation
      // may start to fill the entire extent, making the extent fully loaded
      // at that moment. When the previous partial read callback is invoked,
      // the extent's bufferptr is fully loaded but the data may be incomplete.
      // We should only verify the checksum when the current read request fills
      // the entire extent here.
```

Note: An alternative solution would be to verify the checksum prior to
      calling copmlete_io using a dedicated callback.
      See:
      https://github.com/ceph/ceph/pull/66496/commits/d5e63ee24d11722c03ef94e834a2bae03acaf6bc

Fixes: https://tracker.ceph.com/issues/73790
Fixes: https://tracker.ceph.com/issues/72864
Signed-off-by: Zhang Song <zhangsong02@qianxin.com>
Signed-off-by: Matan Breizman <mbreizma@redhat.com>
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h

index ca1a45af9a77431ae68831c2d138406413c31414..5ad13eec5e1a61128e41963388863f98808a0f69 100644 (file)
@@ -625,6 +625,32 @@ TransactionManager::do_submit_transaction(
   );
 }
 
+void TransactionManager::check_full_extent_integrity(
+  Transaction &t, uint32_t ref_crc, uint32_t pin_crc)
+{
+  LOG_PREFIX(TransactionManager::check_full_extent_integrity);;
+  SUBDEBUGT(seastore_tm,
+    "checksum in the lba tree: 0x{:x}, actual checksum: 0x{:x}",
+    t,
+    pin_crc,
+    ref_crc);
+  bool inconsistent = false;
+  if (full_extent_integrity_check) {
+    inconsistent = (pin_crc != ref_crc);
+  } else { // !full_extent_integrity_check: remapped extent may be skipped
+    inconsistent = !(pin_crc == 0 ||
+                     pin_crc == ref_crc);
+  }
+  if (unlikely(inconsistent)) {
+    SUBERRORT(seastore_tm,
+      "extent checksum inconsistent, recorded: 0x{:x}, actual: 0x{:x}",
+      t,
+      pin_crc,
+      ref_crc);
+      ceph_abort_msg("extent checksum inconsistent");
+  }
+}
+
 seastar::future<> TransactionManager::flush(OrderingHandle &handle)
 {
   LOG_PREFIX(TransactionManager::flush);
index 0ae35a957a8bca60ffe3147d2e4ef531eae42b1d..0c595e63f0cfdf0208b0718be6ddda21e5faefff 100644 (file)
@@ -1441,6 +1441,9 @@ private:
     extent_len_t partial_len,
     lextent_init_func_t<T> &&maybe_init) {
     LOG_PREFIX(TransactionManager::pin_to_extent);
+    SUBDEBUGT(seastore_tm, "getting absent extent from pin {}, 0x{:x}~0x{:x} ...",
+              t, pin, direct_partial_off, partial_len);
+
     static_assert(is_logical_type(T::TYPE));
     // must be user-oriented required by maybe_init
     assert(is_user_transaction(t.get_src()));
@@ -1450,8 +1453,13 @@ private:
       direct_partial_off = 0;
       partial_len = direct_length;
     }
-    SUBTRACET(seastore_tm, "getting absent extent from pin {}, 0x{:x}~0x{:x} ...",
-              t, pin, direct_partial_off, partial_len);
+    
+    // are we reading the entire extent?
+    bool is_full_extent = (direct_partial_off == 0 &&
+                           partial_len == direct_length);
+
+    SUBDEBUGT(seastore_tm, "getting absent extent from pin {}, 0x{:x}~0x{:x} full extent: {}...",
+              t, pin, direct_partial_off, partial_len, is_full_extent);
 
     auto ref = co_await cache->get_absent_extent<T>(
       t,
@@ -1476,37 +1484,24 @@ private:
       })
     );
 
-    if (ref->is_fully_loaded()) {
-      auto crc = ref->calc_crc32c();
-      SUBTRACET(
-        seastore_tm,
-        "got extent -- {}, chksum in the lba tree: 0x{:x}, actual chksum: 0x{:x}",
-        t,
-        *ref,
-        pin.get_checksum(),
-        crc);
-      bool inconsistent = false;
-      if (full_extent_integrity_check) {
-        inconsistent = (pin.get_checksum() != crc);
-      } else { // !full_extent_integrity_check: remapped extent may be skipped
-        inconsistent = !(pin.get_checksum() == 0 ||
-                         pin.get_checksum() == crc);
-      }
-      if (unlikely(inconsistent)) {
-        SUBERRORT(seastore_tm,
-          "extent checksum inconsistent, recorded: 0x{:x}, actual: 0x{:x}, {}",
-          t,
-          pin.get_checksum(),
-          crc,
-          *ref);
-        ceph_abort();
-      }
-    } else {
-      assert(!full_extent_integrity_check);
+    SUBDEBUGT(seastore_tm, "got extent -- {} fully_loaded: {}",
+              t, *ref, ref->is_fully_loaded());
+
+    // Check integrity for full extent reads only.
+    // Avoid if this extent was fully loaded
+    // by another transaction.
+    // See: https://tracker.ceph.com/issues/73790
+    if (is_full_extent) {
+      assert(ref->is_fully_loaded());
+      check_full_extent_integrity(t, ref->calc_crc32c(), pin.get_checksum());
     }
+
     co_return std::move(ref);
   }
 
+  void check_full_extent_integrity(
+    Transaction &t, uint32_t ref_crc, uint32_t pin_crc);
+
   /**
    * pin_to_extent_by_type
    *