]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore/transaction_manager: fully load extents when
authorXuehan Xu <xuxuehan@qianxin.com>
Wed, 21 Feb 2024 08:09:59 +0000 (16:09 +0800)
committerXuehan Xu <xuxuehan@qianxin.com>
Mon, 22 Apr 2024 03:51:01 +0000 (11:51 +0800)
remapping extents with full extent integrity check enabled

Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
src/common/options/crimson.yaml.in
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h

index 4a0eb5627b818009cffef7809510181761fb8d8c..7030780d9a9fdc8022a5d0542ac8bee08700dc87 100644 (file)
@@ -83,6 +83,16 @@ options:
   level: dev
   desc: default logical address space reservation for seastore objects' metadata
   default: 16777216
+# TODO: implement sub-extent checksum and deprecate this configuration.
+- name: seastore_full_integrity_check
+  type: bool
+  level: dev
+  desc: Whether seastore need to fully check the integrity of each extent,
+        non-full integrity check means the integrity check might be skipped
+        during extent remapping for better performance, disable with caution
+  default: false
+# TODO: seastore_max_data_allocation_size should be dropped once the sub-extent
+#       read/checksum is implemented.
 - name: seastore_max_data_allocation_size
   type: size
   level: advanced
index f2f180f2718109f5dfe27fa3f0770aab97d2b3fb..581777e68f111c060deb57496ebab94a4883c01c 100644 (file)
@@ -34,7 +34,10 @@ TransactionManager::TransactionManager(
     lba_manager(std::move(_lba_manager)),
     journal(std::move(_journal)),
     epm(std::move(_epm)),
-    backref_manager(std::move(_backref_manager))
+    backref_manager(std::move(_backref_manager)),
+    full_extent_integrity_check(
+      crimson::common::get_conf<bool>(
+        "seastore_full_integrity_check"))
 {
   epm->set_extent_callback(this);
   journal->set_write_pipeline(&write_pipeline);
index bec1189e8c732ae4c72a53b81207ee22cd32dc05..6d99bd77ec2f08d952d2c7e6114432a4a974f307 100644 (file)
@@ -426,9 +426,21 @@ public:
 #endif
 
     // The according extent might be stable or pending.
-    return cache->get_extent_if_cached(
-      t, pin->get_val(), T::TYPE
-    ).si_then([this, &t, remaps,
+    auto fut = base_iertr::make_ready_future<TCachedExtentRef<T>>();
+    if (full_extent_integrity_check) {
+      fut = read_pin<T>(t, pin->duplicate());
+    } else {
+      fut = cache->get_extent_if_cached(
+       t, pin->get_val(), T::TYPE
+      ).si_then([](auto extent) {
+       if (extent) {
+         return extent->template cast<T>();
+       } else {
+         return TCachedExtentRef<T>();
+       }
+      });
+    }
+    return fut.si_then([this, &t, remaps,
               original_laddr = pin->get_key(),
              intermediate_base = pin->is_indirect()
                                  ? pin->get_intermediate_base()
@@ -807,6 +819,8 @@ private:
 
   WritePipeline write_pipeline;
 
+  bool full_extent_integrity_check = true;
+
   rewrite_extent_ret rewrite_logical_extent(
     Transaction& t,
     LogicalCachedExtentRef extent);
@@ -860,16 +874,24 @@ private:
        pref.link_child(&extent);
        extent.maybe_set_intermediate_laddr(pref);
       }
-    ).si_then([FNAME, &t, pin=std::move(pin)](auto ref) mutable -> ret {
-      SUBTRACET(seastore_tm, "got extent -- {}", t, *ref);
+    ).si_then([FNAME, &t, pin=std::move(pin), this](auto ref) mutable -> ret {
+      auto crc = ref->calc_crc32c();
+      SUBTRACET(
+       seastore_tm,
+       "got extent -- {}, chksum in the lba tree: {}, actual chksum: {}",
+       t,
+       *ref,
+       pin->get_checksum(),
+       crc);
       assert(ref->is_fully_loaded());
       bool inconsistent = false;
       if (pin->is_indirect()) {
        inconsistent = (pin->get_checksum() != 0);
-      } else {
-       inconsistent = !(pin->get_checksum() == 0 || // TODO: remapped extents may
-                                                    // not have recorded chksums
-                        pin->get_checksum() == ref->calc_crc32c());
+      } else if (full_extent_integrity_check) {
+       inconsistent = (pin->get_checksum() != crc);
+      } else { // !full_extent_integrity_check: remapped extent may be skipped
+       inconsistent = !(pin->get_checksum() == 0 ||
+                        pin->get_checksum() == crc);
       }
       if (unlikely(inconsistent)) {
        SUBERRORT(seastore_tm,
@@ -920,16 +942,24 @@ private:
        pref.link_child(&lextent);
        lextent.maybe_set_intermediate_laddr(pref);
       }
-    ).si_then([FNAME, &t, pin=std::move(pin)](auto ref) {
-      SUBTRACET(seastore_tm, "got extent -- {}", t, *ref);
+    ).si_then([FNAME, &t, pin=std::move(pin), this](auto ref) {
+      auto crc = ref->calc_crc32c();
+      SUBTRACET(
+       seastore_tm,
+       "got extent -- {}, chksum in the lba tree: {}, actual chksum: {}",
+       t,
+       *ref,
+       pin->get_checksum(),
+       crc);
       assert(ref->is_fully_loaded());
       bool inconsistent = false;
       if (pin->is_indirect()) {
        inconsistent = (pin->get_checksum() != 0);
-      } else {
-       inconsistent = !(pin->get_checksum() == 0 || // TODO: remapped extents may
-                                                    // not have recorded chksums
-                        pin->get_checksum() == ref->calc_crc32c());
+      } else if (full_extent_integrity_check) {
+       inconsistent = (pin->get_checksum() != crc);
+      } else { // !full_extent_integrity_check: remapped extent may be skipped
+       inconsistent = !(pin->get_checksum() == 0 ||
+                        pin->get_checksum() == crc);
       }
       if (unlikely(inconsistent)) {
        SUBERRORT(seastore_tm,