]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: record extents' chksums in the lba tree
authorXuehan Xu <xuxuehan@qianxin.com>
Sun, 4 Feb 2024 09:54:26 +0000 (17:54 +0800)
committerMatan Breizman <mbreizma@redhat.com>
Tue, 21 May 2024 12:42:51 +0000 (15:42 +0300)
Signed-off-by: Xuehan Xu <xuxuehan@qianxin.com>
(cherry picked from commit f4b570da86f77dfa08f33a0adc6135a1aa608233)

18 files changed:
src/crimson/common/fixed_kv_node_layout.h
src/crimson/os/seastore/btree/fixed_kv_btree.h
src/crimson/os/seastore/btree/fixed_kv_node.h
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/cached_extent.h
src/crimson/os/seastore/extent_placement_manager.cc
src/crimson/os/seastore/lba_manager.cc
src/crimson/os/seastore/lba_manager.h
src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h
src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h
src/crimson/os/seastore/ordering_handle.h
src/crimson/os/seastore/transaction.h
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h
src/test/crimson/seastore/test_btree_lba_manager.cc
src/test/crimson/seastore/test_seastore_cache.cc

index 676563594e0cebc08315b201adb5beb0a3fe9db8..1b379f843fb4ae1baf2ea79e41ef2810eebbb74b 100644 (file)
@@ -9,6 +9,7 @@
 #include <boost/iterator/counting_iterator.hpp>
 
 #include "include/byteorder.h"
+#include "include/crc32c.h"
 
 #include "crimson/common/layout.h"
 
@@ -52,8 +53,9 @@ template <
 class FixedKVNodeLayout {
   char *buf = nullptr;
 
-  using L = absl::container_internal::Layout<ceph_le32, MetaInt, KINT, VINT>;
-  static constexpr L layout{1, 1, CAPACITY, CAPACITY};
+  using L = absl::container_internal::Layout<
+    ceph_le32, ceph_le32, MetaInt, KINT, VINT>;
+  static constexpr L layout{1, 1, 1, CAPACITY, CAPACITY};
 
 public:
   template <bool is_const>
@@ -431,7 +433,7 @@ public:
   }
 
   uint16_t get_size() const {
-    return *layout.template Pointer<0>(buf);
+    return *layout.template Pointer<1>(buf);
   }
 
   /**
@@ -440,7 +442,19 @@ public:
    * Set size representation to match size
    */
   void set_size(uint16_t size) {
-    *layout.template Pointer<0>(buf) = size;
+    *layout.template Pointer<1>(buf) = size;
+  }
+
+  uint32_t get_phy_checksum() const {
+    return *layout.template Pointer<0>(buf);
+  }
+
+  void set_phy_checksum(uint32_t checksum) {
+    *layout.template Pointer<0>(buf) = checksum;
+  }
+
+  uint32_t calc_phy_checksum() const {
+    return calc_phy_checksum_iteratively<4>(1);
   }
 
   /**
@@ -451,11 +465,11 @@ public:
    * in delta_t
    */
   Meta get_meta() const {
-    MetaInt &metaint = *layout.template Pointer<1>(buf);
+    MetaInt &metaint = *layout.template Pointer<2>(buf);
     return Meta(metaint);
   }
   void set_meta(const Meta &meta) {
-    *layout.template Pointer<1>(buf) = MetaInt(meta);
+    *layout.template Pointer<2>(buf) = MetaInt(meta);
   }
 
   constexpr static size_t get_capacity() {
@@ -652,10 +666,23 @@ private:
    * Get pointer to start of key array
    */
   KINT *get_key_ptr() {
-    return layout.template Pointer<2>(buf);
+    return layout.template Pointer<3>(buf);
   }
   const KINT *get_key_ptr() const {
-    return layout.template Pointer<2>(buf);
+    return layout.template Pointer<3>(buf);
+  }
+
+  template <size_t N>
+  uint32_t calc_phy_checksum_iteratively(uint32_t crc) const {
+    if constexpr (N == 0) {
+      return crc;
+    } else {
+      uint32_t r = ceph_crc32c(
+       crc,
+       (unsigned char const *)layout.template Pointer<N>(buf),
+       layout.template Size<N>());
+      return calc_phy_checksum_iteratively<N-1>(r);
+    }
   }
 
   /**
@@ -664,10 +691,10 @@ private:
    * Get pointer to start of val array
    */
   VINT *get_val_ptr() {
-    return layout.template Pointer<3>(buf);
+    return layout.template Pointer<4>(buf);
   }
   const VINT *get_val_ptr() const {
-    return layout.template Pointer<3>(buf);
+    return layout.template Pointer<4>(buf);
   }
 
   /**
index f15682621fb77cea4103c86f7a055aabd2c4ae8f..b0901213716bf3152b7fbd02919f5d973fa1e7b4 100644 (file)
@@ -1275,6 +1275,17 @@ private:
       init_internal
     ).si_then([FNAME, c, offset, init_internal, depth, begin, end](
               typename internal_node_t::Ref ret) {
+      if (unlikely(ret->get_in_extent_checksum()
+          != ret->get_last_committed_crc())) {
+        SUBERRORT(
+          seastore_fixedkv_tree,
+          "internal fixedkv extent checksum inconsistent, "
+          "recorded: {}, actually: {}",
+          c.trans,
+          ret->get_in_extent_checksum(),
+          ret->get_last_committed_crc());
+        ceph_abort();
+      }
       SUBTRACET(
         seastore_fixedkv_tree,
         "read internal at offset {} {}",
@@ -1349,6 +1360,16 @@ private:
       init_leaf
     ).si_then([FNAME, c, offset, init_leaf, begin, end]
       (typename leaf_node_t::Ref ret) {
+      if (unlikely(ret->get_in_extent_checksum()
+          != ret->get_last_committed_crc())) {
+        SUBERRORT(
+          seastore_fixedkv_tree,
+          "leaf fixedkv extent checksum inconsistent, recorded: {}, actually: {}",
+          c.trans,
+          ret->get_in_extent_checksum(),
+          ret->get_last_committed_crc());
+        ceph_abort();
+      }
       SUBTRACET(
         seastore_fixedkv_tree,
         "read leaf at offset {} {}",
index e9ffeed452ee13de5eca9b834e0c2580833cf8cc..6d7db2c4102ae9a6a6ed921da338013a1cf7a829 100644 (file)
@@ -662,6 +662,18 @@ struct FixedKVInternalNode
     return this->get_size();
   }
 
+  uint32_t get_crc32c() const final {
+    return this->calc_phy_checksum();
+  }
+
+  void update_in_extent_chksum_field(uint32_t crc) final {
+    this->set_phy_checksum(crc);
+  }
+
+  uint32_t get_in_extent_checksum() const {
+    return this->get_phy_checksum();
+  }
+
   typename node_layout_t::delta_buffer_t delta_buffer;
   typename node_layout_t::delta_buffer_t *maybe_get_delta_buffer() {
     return this->is_mutation_pending() 
@@ -887,7 +899,9 @@ struct FixedKVInternalNode
     typename node_layout_t::delta_buffer_t buffer;
     buffer.copy_in(bl.front().c_str(), bl.front().length());
     buffer.replay(*this);
-    this->set_last_committed_crc(this->get_crc32c());
+    auto crc = calc_crc32c();
+    this->set_last_committed_crc(crc);
+    this->update_in_extent_chksum_field(crc);
     resolve_relative_addrs(base);
   }
 
@@ -1086,6 +1100,18 @@ struct FixedKVLeafNode
     return this->get_size();
   }
 
+  uint32_t get_crc32c() const final {
+    return this->calc_phy_checksum();
+  }
+
+  void update_in_extent_chksum_field(uint32_t crc) final {
+    this->set_phy_checksum(crc);
+  }
+
+  uint32_t get_in_extent_checksum() const {
+    return this->get_phy_checksum();
+  }
+
   typename node_layout_t::delta_buffer_t delta_buffer;
   virtual typename node_layout_t::delta_buffer_t *maybe_get_delta_buffer() {
     return this->is_mutation_pending() ? &delta_buffer : nullptr;
@@ -1189,7 +1215,9 @@ struct FixedKVLeafNode
     typename node_layout_t::delta_buffer_t buffer;
     buffer.copy_in(bl.front().c_str(), bl.front().length());
     buffer.replay(*this);
-    this->set_last_committed_crc(this->get_crc32c());
+    auto crc = calc_crc32c();
+    this->set_last_committed_crc(crc);
+    this->update_in_extent_chksum_field(crc);
     this->resolve_relative_addrs(base);
   }
 
index 880967031544aee2a97edb3787b104749cf8d0df..537f7bea52fd8045cdd36618e49dde1a95c9f7b7 100644 (file)
@@ -1520,7 +1520,7 @@ void Cache::complete_commit(
             t, final_block_start, start_seq);
 
   std::vector<backref_entry_ref> backref_list;
-  t.for_each_fresh_block([&](const CachedExtentRef &i) {
+  t.for_each_finalized_fresh_block([&](const CachedExtentRef &i) {
     if (!i->is_valid()) {
       return;
     }
@@ -1530,7 +1530,7 @@ void Cache::complete_commit(
       is_inline = true;
       i->set_paddr(final_block_start.add_relative(i->get_paddr()));
     }
-    i->last_committed_crc = i->get_crc32c();
+    assert(i->get_last_committed_crc() == i->get_crc32c());
     i->pending_for_transaction = TRANS_ID_NULL;
     i->on_initial_write();
 
index c06193cbbf57584195c20a2ce9027056985f8947..b6a8cddde511d50302cb72b2fc70cd86a70011a4 100644 (file)
@@ -1652,16 +1652,15 @@ private:
         LOG_PREFIX(Cache::read_extent);
        if (likely(extent->state == CachedExtent::extent_state_t::CLEAN_PENDING)) {
          extent->state = CachedExtent::extent_state_t::CLEAN;
-         /* TODO: crc should be checked against LBA manager */
+       }
+       ceph_assert(extent->state == CachedExtent::extent_state_t::EXIST_CLEAN
+         || extent->state == CachedExtent::extent_state_t::CLEAN
+         || !extent->is_valid());
+       if (extent->is_valid()) {
+         // crc will be checked against LBA leaf entry for logical extents,
+         // or check against in-extent crc for physical extents.
          extent->last_committed_crc = extent->get_crc32c();
-
          extent->on_clean_read();
-       } else if (extent->state == CachedExtent::extent_state_t::EXIST_CLEAN ||
-          extent->state == CachedExtent::extent_state_t::CLEAN) {
-         /* TODO: crc should be checked against LBA manager */
-         extent->last_committed_crc = extent->get_crc32c();
-        } else {
-         ceph_assert(!extent->is_valid());
        }
         extent->complete_io();
         SUBDEBUG(seastore_cache, "read extent done -- {}", *extent);
index 214e1283601027d90b7f14ccd91476d168638472..6fc430d039a84c5cca000f4f0d7cc292edc7b4f8 100644 (file)
@@ -17,6 +17,9 @@
 #include "crimson/os/seastore/seastore_types.h"
 
 struct btree_lba_manager_test;
+struct lba_btree_test;
+struct btree_test_base;
+struct cache_test_t;
 
 namespace crimson::os::seastore {
 
@@ -539,7 +542,7 @@ public:
   }
 
   /// Returns crc32c of buffer
-  uint32_t get_crc32c() {
+  virtual uint32_t get_crc32c() const {
     return ceph_crc32c(
       1,
       reinterpret_cast<const unsigned char *>(get_bptr().c_str()),
@@ -600,7 +603,9 @@ public:
   }
 
   paddr_t get_prior_paddr_and_reset() {
-    assert(prior_poffset);
+    if (!prior_poffset) {
+      return poffset;
+    }
     auto ret = *prior_poffset;
     prior_poffset.reset();
     return ret;
@@ -781,6 +786,13 @@ protected:
     prior_instance.reset();
   }
 
+  /**
+   * Called when updating extents' last_committed_crc, some extents may
+   * have in-extent checksum fields, like LBA/backref nodes, which are
+   * supposed to be updated in this method.
+   */
+  virtual void update_in_extent_chksum_field(uint32_t) {}
+
   /// Sets last_committed_crc
   void set_last_committed_crc(uint32_t crc) {
     last_committed_crc = crc;
@@ -832,6 +844,9 @@ protected:
   template <typename, typename>
   friend class BtreeNodeMapping;
   friend class ::btree_lba_manager_test;
+  friend class ::lba_btree_test;
+  friend class ::btree_test_base;
+  friend class ::cache_test_t;
 };
 
 std::ostream &operator<<(std::ostream &, CachedExtent::extent_state_t);
index 4d09e8da459534654a5e75ac62b50d6809bebe8c..77bf1c5184c696de77282ec26accd105b7170de9 100644 (file)
@@ -335,6 +335,14 @@ ExtentPlacementManager::write_delayed_ool_extents(
   return trans_intr::do_for_each(alloc_map, [&t](auto& p) {
     auto writer = p.first;
     auto& extents = p.second;
+#ifndef NDEBUG
+    std::for_each(
+      extents.begin(),
+      extents.end(),
+      [](auto &extent) {
+      assert(extent->is_valid());
+    });
+#endif
     return writer->alloc_write_ool_extents(t, extents);
   });
 }
index 466fa8bd06297def84d9d6e44d3e3e53a6cc76cf..fc0b72a9814c18ad34bf59217ae66796fee16472 100644 (file)
@@ -13,6 +13,7 @@ LBAManager::update_mappings(
 {
   return trans_intr::do_for_each(extents,
                                 [this, &t](auto &extent) {
+    assert(extent->get_last_committed_crc());
     return update_mapping(
       t,
       extent->get_laddr(),
@@ -20,6 +21,7 @@ LBAManager::update_mappings(
       extent->get_prior_paddr_and_reset(),
       extent->get_length(),
       extent->get_paddr(),
+      extent->get_last_committed_crc(),
       nullptr  // all the extents should have already been
                // added to the fixed_kv_btree
     ).discard_result();
index 3fa0987e150af8281a1bd7465f6e7c2694564dba..6082f110f0db8403a8a987996ce941b75653b2b7 100644 (file)
@@ -87,6 +87,7 @@ public:
     laddr_t hint,
     extent_len_t len,
     paddr_t addr,
+    uint32_t checksum,
     LogicalCachedExtent &nextent,
     extent_ref_count_t refcount = EXTENT_DEFAULT_REF_COUNT) = 0;
 
@@ -196,6 +197,7 @@ public:
     paddr_t prev_addr,
     extent_len_t len,
     paddr_t paddr,
+    uint32_t checksum,
     LogicalCachedExtent *nextent) = 0;
 
   /**
index d2a8b3dc0cbdddd5270bb827713d7445de6715db..e9fb620836b69728e056d43b280bef52a92ecd33 100644 (file)
@@ -307,6 +307,7 @@ BtreeLBAManager::_alloc_extent(
   extent_len_t len,
   pladdr_t addr,
   paddr_t actual_addr,
+  uint32_t checksum,
   LogicalCachedExtent* nextent,
   extent_ref_count_t refcount)
 {
@@ -331,7 +332,7 @@ BtreeLBAManager::_alloc_extent(
     c,
     hint,
     [this, FNAME, c, hint, len, addr, lookup_attempts,
-    &t, nextent, refcount](auto &btree, auto &state) {
+    &t, nextent, refcount, checksum](auto &btree, auto &state) {
       return LBABtree::iterate_repeat(
        c,
        btree.upper_bound_right(c, hint),
@@ -367,12 +368,13 @@ BtreeLBAManager::_alloc_extent(
              interruptible::ready_future_marker{},
              seastar::stop_iteration::no);
          }
-       }).si_then([FNAME, c, addr, len, hint, &btree, &state, nextent, refcount] {
+       }).si_then([FNAME, c, addr, len, hint, &btree,
+                   &state, nextent, refcount, checksum] {
          return btree.insert(
            c,
            *state.insert_iter,
            state.last_end,
-           lba_map_val_t{len, pladdr_t(addr), refcount, 0},
+           lba_map_val_t{len, pladdr_t(addr), refcount, checksum},
            nextent
          ).si_then([&state, FNAME, c, addr, len, hint, nextent](auto &&p) {
            auto [iter, inserted] = std::move(p);
@@ -535,6 +537,7 @@ BtreeLBAManager::update_mapping(
   paddr_t prev_addr,
   extent_len_t len,
   paddr_t addr,
+  uint32_t checksum,
   LogicalCachedExtent *nextent)
 {
   LOG_PREFIX(BtreeLBAManager::update_mapping);
@@ -542,7 +545,7 @@ BtreeLBAManager::update_mapping(
   return _update_mapping(
     t,
     laddr,
-    [prev_addr, addr, prev_len, len](
+    [prev_addr, addr, prev_len, len, checksum](
       const lba_map_val_t &in) {
       assert(!addr.is_null());
       lba_map_val_t ret = in;
@@ -551,6 +554,7 @@ BtreeLBAManager::update_mapping(
       ceph_assert(in.len == prev_len);
       ret.pladdr = addr;
       ret.len = len;
+      ret.checksum = checksum;
       return ret;
     },
     nextent
index 3ad6f9380c2c73c24ff00e9a0516477cee50438e..85ffe3b805cf2165d140753648e7e23d83d3c58c 100644 (file)
@@ -221,6 +221,7 @@ public:
       len,
       P_ADDR_ZERO,
       P_ADDR_NULL,
+      0,
       nullptr,
       EXTENT_DEFAULT_REF_COUNT);
   }
@@ -241,6 +242,8 @@ public:
       len,
       intermediate_key,
       actual_addr,
+      0,       // crc will only be used and checked with LBA direct mappings
+               // also see pin_to_extent(_by_type)
       nullptr,
       EXTENT_DEFAULT_REF_COUNT
     ).si_then([&t, this, intermediate_base](auto indirect_mapping) {
@@ -267,6 +270,7 @@ public:
     laddr_t hint,
     extent_len_t len,
     paddr_t addr,
+    uint32_t checksum,
     LogicalCachedExtent &ext,
     extent_ref_count_t refcount = EXTENT_DEFAULT_REF_COUNT) final
   {
@@ -276,6 +280,7 @@ public:
       len,
       addr,
       P_ADDR_NULL,
+      checksum,
       &ext,
       refcount);
   }
@@ -341,6 +346,7 @@ public:
     paddr_t prev_addr,
     extent_len_t len,
     paddr_t paddr,
+    uint32_t checksum,
     LogicalCachedExtent*) final;
 
   get_physical_extent_if_live_ret get_physical_extent_if_live(
@@ -410,6 +416,7 @@ private:
     extent_len_t len,
     pladdr_t addr,
     paddr_t actual_addr,
+    uint32_t checksum,
     LogicalCachedExtent*,
     extent_ref_count_t refcount);
 
index fc5d4e98fd9fe296d8fa4bd471aacf7e5fcf98e6..3b760ff06a24e95843d46aeb5f773b25c37fa35c 100644 (file)
@@ -63,8 +63,8 @@ using lba_node_meta_le_t = fixed_kv_node_meta_le_t<laddr_le_t>;
  * LBA Tree.
  *
  * Layout (4k):
+ *   checksum   :                            4b
  *   size       : uint32_t[1]                4b
- *   (padding)  :                            4b
  *   meta       : lba_node_meta_le_t[3]      (1*24)b
  *   keys       : laddr_t[255]               (254*8)b
  *   values     : paddr_t[255]               (254*8)b
@@ -101,8 +101,8 @@ using LBAInternalNodeRef = LBAInternalNode::Ref;
  * LBA Tree.
  *
  * Layout (4k):
+ *   checksum   :                            4b
  *   size       : uint32_t[1]                4b
- *   (padding)  :                            4b
  *   meta       : lba_node_meta_le_t[3]      (1*24)b
  *   keys       : laddr_t[170]               (140*8)b
  *   values     : lba_map_val_t[170]         (140*21)b
index a7802fda383d97dac9085b566774b5d26f7c232c..8ab8442acd9f7acc6b8073beb6bc5ce556fa47b3 100644 (file)
@@ -14,9 +14,9 @@ struct WritePipeline {
   struct ReserveProjectedUsage : OrderedExclusivePhaseT<ReserveProjectedUsage> {
     constexpr static auto type_name = "WritePipeline::reserve_projected_usage";
   } reserve_projected_usage;
-  struct OolWrites : UnorderedStageT<OolWrites> {
-    constexpr static auto type_name = "UnorderedStage::ool_writes_stage";
-  } ool_writes;
+  struct OolWritesAndLBAUpdates : UnorderedStageT<OolWritesAndLBAUpdates> {
+    constexpr static auto type_name = "UnorderedStage::ool_writes_and_update_lba_stage";
+  } ool_writes_and_lba_updates;
   struct Prepare : OrderedExclusivePhaseT<Prepare> {
     constexpr static auto type_name = "WritePipeline::prepare_phase";
   } prepare;
@@ -29,7 +29,7 @@ struct WritePipeline {
 
   using  BlockingEvents = std::tuple<
     ReserveProjectedUsage::BlockingEvent,
-    OolWrites::BlockingEvent,
+    OolWritesAndLBAUpdates::BlockingEvent,
     Prepare::BlockingEvent,
     DeviceSubmission::BlockingEvent,
     Finalize::BlockingEvent
@@ -70,7 +70,7 @@ struct OperationProxy {
   OperationProxy(OperationRef op) : op(std::move(op)) {}
 
   virtual seastar::future<> enter(WritePipeline::ReserveProjectedUsage&) = 0;
-  virtual seastar::future<> enter(WritePipeline::OolWrites&) = 0;
+  virtual seastar::future<> enter(WritePipeline::OolWritesAndLBAUpdates&) = 0;
   virtual seastar::future<> enter(WritePipeline::Prepare&) = 0;
   virtual seastar::future<> enter(WritePipeline::DeviceSubmission&) = 0;
   virtual seastar::future<> enter(WritePipeline::Finalize&) = 0;
@@ -95,7 +95,7 @@ struct OperationProxyT : OperationProxy {
   seastar::future<> enter(WritePipeline::ReserveProjectedUsage& s) final {
     return that()->enter_stage(s);
   }
-  seastar::future<> enter(WritePipeline::OolWrites& s) final {
+  seastar::future<> enter(WritePipeline::OolWritesAndLBAUpdates& s) final {
     return that()->enter_stage(s);
   }
   seastar::future<> enter(WritePipeline::Prepare& s) final {
index 691a569cdd602cb65c8fe65cf66ceea91a7fcccc..5e3f165560e58a4c4134a7501caee3975395d26a 100644 (file)
@@ -333,11 +333,16 @@ public:
   }
 
   template <typename F>
-  auto for_each_fresh_block(F &&f) const {
+  auto for_each_finalized_fresh_block(F &&f) const {
     std::for_each(written_ool_block_list.begin(), written_ool_block_list.end(), f);
     std::for_each(inline_block_list.begin(), inline_block_list.end(), f);
   }
 
+  template <typename F>
+  auto for_each_existing_block(F &&f) {
+    std::for_each(existing_block_list.begin(), existing_block_list.end(), f);
+  }
+
   const io_stat_t& get_fresh_block_stats() const {
     return fresh_block_stats;
   }
index c33d93169197e9b72da7e6cfaf5e3b145dbe4910..a65fcbe58a27320a57f6ce4766491bf32a0f0c79 100644 (file)
@@ -309,6 +309,67 @@ TransactionManager::submit_transaction_direct(
     trim_alloc_to);
 }
 
+TransactionManager::update_lba_mappings_ret
+TransactionManager::update_lba_mappings(
+  Transaction &t,
+  std::list<CachedExtentRef> &pre_allocated_extents)
+{
+  LOG_PREFIX(TransactionManager::update_lba_mappings);
+  SUBTRACET(seastore_t, "update extent lba mappings", t);
+  return seastar::do_with(
+    std::list<LogicalCachedExtentRef>(),
+    std::list<CachedExtentRef>(),
+    [this, &t, &pre_allocated_extents](auto &lextents, auto &pextents) {
+    auto chksum_func = [&lextents, &pextents](auto &extent) {
+      if (!extent->is_valid() ||
+          !extent->is_fully_loaded() ||
+          // EXIST_MUTATION_PENDING extents' crc will be calculated when
+          // preparing records
+          extent->is_exist_mutation_pending()) {
+        return;
+      }
+      if (extent->is_logical()) {
+        // for rewritten extents, last_committed_crc should have been set
+        // because the crc of the original extent may be reused.
+        // also see rewrite_logical_extent()
+        if (!extent->get_last_committed_crc()) {
+          extent->set_last_committed_crc(extent->calc_crc32c());
+        }
+        assert(extent->get_crc32c() == extent->get_last_committed_crc());
+        lextents.emplace_back(extent->template cast<LogicalCachedExtent>());
+      } else {
+        pextents.emplace_back(extent);
+      }
+    };
+
+    // For delayed-ool fresh logical extents, update lba-leaf crc and paddr.
+    // For other fresh logical extents, update lba-leaf crc.
+    t.for_each_finalized_fresh_block(chksum_func);
+    // For existing-clean logical extents, update lba-leaf crc.
+    t.for_each_existing_block(chksum_func);
+    // For pre-allocated fresh logical extents, update lba-leaf crc.
+    // For inplace-rewrite dirty logical extents, update lba-leaf crc.
+    std::for_each(
+      pre_allocated_extents.begin(),
+      pre_allocated_extents.end(),
+      chksum_func);
+
+    return lba_manager->update_mappings(
+      t, lextents
+    ).si_then([&pextents] {
+      for (auto &extent : pextents) {
+        assert(!extent->is_logical() && extent->is_valid());
+        // for non-logical extents, we update its last_committed_crc
+        // and in-extent checksum fields
+        // For pre-allocated fresh physical extents, update in-extent crc.
+        auto crc = extent->calc_crc32c();
+        extent->set_last_committed_crc(crc);
+        extent->update_in_extent_chksum_field(crc);
+      }
+    });
+  });
+}
+
 TransactionManager::submit_transaction_direct_ret
 TransactionManager::do_submit_transaction(
   Transaction &tref,
@@ -318,29 +379,32 @@ TransactionManager::do_submit_transaction(
   LOG_PREFIX(TransactionManager::do_submit_transaction);
   SUBTRACET(seastore_t, "start", tref);
   return trans_intr::make_interruptible(
-    tref.get_handle().enter(write_pipeline.ool_writes)
-  ).then_interruptible([this, FNAME, &tref,
+    tref.get_handle().enter(write_pipeline.ool_writes_and_lba_updates)
+  ).then_interruptible([this, &tref,
                        dispatch_result = std::move(dispatch_result)] {
     return seastar::do_with(std::move(dispatch_result),
-                           [this, FNAME, &tref](auto &dispatch_result) {
+                           [this, &tref](auto &dispatch_result) {
       return epm->write_delayed_ool_extents(tref, dispatch_result.alloc_map
-      ).si_then([this, FNAME, &tref, &dispatch_result] {
-        SUBTRACET(seastore_t, "update delayed extent mappings", tref);
-        return lba_manager->update_mappings(tref, dispatch_result.delayed_extents);
-      }).handle_error_interruptible(
+      ).handle_error_interruptible(
         crimson::ct_error::input_output_error::pass_further(),
         crimson::ct_error::assert_all("invalid error")
       );
     });
-  }).si_then([this, FNAME, &tref] {
-    auto allocated_extents = tref.get_valid_pre_alloc_list();
-    auto num_extents = allocated_extents.size();
-    SUBTRACET(seastore_t, "process {} allocated extents", tref, num_extents);
-    return epm->write_preallocated_ool_extents(tref, allocated_extents
-    ).handle_error_interruptible(
-      crimson::ct_error::input_output_error::pass_further(),
-      crimson::ct_error::assert_all("invalid error")
-    );
+  }).si_then([&tref, FNAME, this] {
+    return seastar::do_with(
+      tref.get_valid_pre_alloc_list(),
+      [this, FNAME, &tref](auto &allocated_extents) {
+      return update_lba_mappings(tref, allocated_extents
+      ).si_then([this, FNAME, &tref, &allocated_extents] {
+        auto num_extents = allocated_extents.size();
+        SUBTRACET(seastore_t, "process {} allocated extents", tref, num_extents);
+        return epm->write_preallocated_ool_extents(tref, allocated_extents
+        ).handle_error_interruptible(
+          crimson::ct_error::input_output_error::pass_further(),
+          crimson::ct_error::assert_all("invalid error")
+        );
+      });
+    });
   }).si_then([this, FNAME, &tref] {
     SUBTRACET(seastore_t, "about to prepare", tref);
     return tref.get_handle().enter(write_pipeline.prepare);
@@ -401,7 +465,7 @@ seastar::future<> TransactionManager::flush(OrderingHandle &handle)
   SUBDEBUG(seastore_t, "H{} start", (void*)&handle);
   return handle.enter(write_pipeline.reserve_projected_usage
   ).then([this, &handle] {
-    return handle.enter(write_pipeline.ool_writes);
+    return handle.enter(write_pipeline.ool_writes_and_lba_updates);
   }).then([this, &handle] {
     return handle.enter(write_pipeline.prepare);
   }).then([this, &handle] {
@@ -454,6 +518,8 @@ TransactionManager::rewrite_logical_extent(
 
     DEBUGT("rewriting logical extent -- {} to {}", t, *lextent, *nlextent);
 
+    assert(lextent->get_last_committed_crc() == lextent->calc_crc32c());
+    nlextent->set_last_committed_crc(lextent->get_last_committed_crc());
     /* This update_mapping is, strictly speaking, unnecessary for delayed_alloc
      * extents since we're going to do it again once we either do the ool write
      * or allocate a relative inline addr.  TODO: refactor AsyncCleaner to
@@ -465,6 +531,7 @@ TransactionManager::rewrite_logical_extent(
       lextent->get_paddr(),
       nlextent->get_length(),
       nlextent->get_paddr(),
+      nlextent->get_last_committed_crc(),
       nlextent.get()).discard_result();
   } else {
     assert(get_extent_category(lextent->get_type()) == data_category_t::DATA);
@@ -494,6 +561,7 @@ TransactionManager::rewrite_logical_extent(
           nlextent->get_bptr().c_str());
         nlextent->set_laddr(lextent->get_laddr() + off);
         nlextent->set_modify_time(lextent->get_modify_time());
+        nlextent->set_last_committed_crc(nlextent->calc_crc32c());
         DEBUGT("rewriting logical extent -- {} to {}", t, *lextent, *nlextent);
 
         /* This update_mapping is, strictly speaking, unnecessary for delayed_alloc
@@ -509,6 +577,7 @@ TransactionManager::rewrite_logical_extent(
             lextent->get_paddr(),
             nlextent->get_length(),
             nlextent->get_paddr(),
+            nlextent->get_last_committed_crc(),
             nlextent.get()
          ).si_then([&refcount](auto c) {
            refcount = c;
@@ -520,6 +589,7 @@ TransactionManager::rewrite_logical_extent(
             lextent->get_laddr() + off,
             nlextent->get_length(),
             nlextent->get_paddr(),
+            nlextent->get_last_committed_crc(),
             *nlextent,
            refcount
           ).si_then([lextent, nlextent, off](auto mapping) {
index 371e7f96c4253ee40e38c0917aa499533f1f20cd..94b6bd5d2dd9323fa2d8d9d7dca01908ec9ba476 100644 (file)
@@ -311,6 +311,7 @@ public:
       laddr_hint,
       len,
       ext->get_paddr(),
+      0, // checksum will be updated upon transaction commit
       *ext
     ).si_then([ext=std::move(ext), laddr_hint, &t](auto &&) mutable {
       LOG_PREFIX(TransactionManager::alloc_non_data_extent);
@@ -360,6 +361,7 @@ public:
           laddr_hint,
           ext->get_length(),
           ext->get_paddr(),
+         0, // checksum will be updated upon trans commit
           *ext
         ).si_then([&ext, &laddr_hint, &t](auto &&) mutable {
           LOG_PREFIX(TransactionManager::alloc_extents);
@@ -833,6 +835,11 @@ private:
     laddr_t offset,
     bool cascade_remove);
 
+  using update_lba_mappings_ret = LBAManager::update_mappings_ret;
+  update_lba_mappings_ret update_lba_mappings(
+    Transaction &t,
+    std::list<CachedExtentRef> &pre_allocated_extents);
+
   /**
    * pin_to_extent
    *
@@ -963,7 +970,10 @@ private:
        original_laddr,
        std::move(original_bptr));
       fut = lba_manager->alloc_extent(
-       t, remap_laddr, remap_length, remap_paddr, *ext);
+       t, remap_laddr, remap_length, remap_paddr,
+       //TODO: oringal_bptr must be present if crc is enabled
+       (original_bptr.has_value() ? ext->get_crc32c() : 0),
+       *ext);
     } else {
       fut = lba_manager->clone_mapping(
        t,
index 5199f963dc95997c44a9e8919f9097c772e72b74..0be7bdd604951102b775a4f4d281059d46cc2cec 100644 (file)
@@ -150,6 +150,26 @@ struct btree_test_base :
            return cache->mkfs(t
            ).si_then([this, &t] {
              return test_structure_setup(t);
+           }).si_then([&t] {
+             auto chksum_func = [](auto &extent) {
+               if (!extent->is_valid()) {
+                 return;
+               }
+               if (!extent->is_logical() ||
+                   !extent->get_last_committed_crc()) {
+                 auto crc = extent->calc_crc32c();
+                 extent->set_last_committed_crc(crc);
+                 extent->update_in_extent_chksum_field(crc);
+               }
+               assert(extent->get_crc32c() == extent->get_last_committed_crc());
+             };
+             t.for_each_finalized_fresh_block(chksum_func);
+             t.for_each_existing_block(chksum_func);
+             auto pre_allocated_extents = t.get_valid_pre_alloc_list();
+             std::for_each(
+               pre_allocated_extents.begin(),
+               pre_allocated_extents.end(),
+               chksum_func);
            });
          }).safe_then([this, &ref_t] {
            return submit_transaction(std::move(ref_t));
@@ -221,6 +241,27 @@ struct lba_btree_test : btree_test_base {
                std::move(f), btree, t
              );
            });
+       }).si_then([t=tref.get()]() mutable {
+         auto chksum_func = [](auto &extent) {
+           if (!extent->is_valid()) {
+             return;
+           }
+           if (!extent->is_logical() ||
+               !extent->get_last_committed_crc()) {
+             auto crc = extent->calc_crc32c();
+             extent->set_last_committed_crc(crc);
+             extent->update_in_extent_chksum_field(crc);
+           }
+           assert(extent->get_crc32c() == extent->get_last_committed_crc());
+         };
+
+         t->for_each_finalized_fresh_block(chksum_func);
+         t->for_each_existing_block(chksum_func);
+         auto pre_allocated_extents = t->get_valid_pre_alloc_list();
+         std::for_each(
+           pre_allocated_extents.begin(),
+           pre_allocated_extents.end(),
+           chksum_func);
        }).si_then([this, tref=std::move(tref)]() mutable {
          return submit_transaction(std::move(tref));
        });
@@ -395,6 +436,50 @@ struct btree_lba_manager_test : btree_test_base {
   }
 
   void submit_test_transaction(test_transaction_t t) {
+    with_trans_intr(
+      *t.t,
+      [this](auto &t) {
+       return seastar::do_with(
+         std::list<LogicalCachedExtentRef>(),
+         std::list<CachedExtentRef>(),
+         [this, &t](auto &lextents, auto &pextents) {
+         auto chksum_func = [&lextents, &pextents](auto &extent) {
+           if (!extent->is_valid()) {
+             return;
+           }
+           if (extent->is_logical()) {
+             if (!extent->get_last_committed_crc()) {
+               auto crc = extent->calc_crc32c();
+               extent->set_last_committed_crc(crc);
+               extent->update_in_extent_chksum_field(crc);
+             }
+             assert(extent->get_crc32c() == extent->get_last_committed_crc());
+             lextents.emplace_back(extent->template cast<LogicalCachedExtent>());
+           } else {
+             pextents.push_back(extent);
+           }
+         };
+
+         t.for_each_finalized_fresh_block(chksum_func);
+         t.for_each_existing_block(chksum_func);
+         auto pre_allocated_extents = t.get_valid_pre_alloc_list();
+         std::for_each(
+           pre_allocated_extents.begin(),
+           pre_allocated_extents.end(),
+           chksum_func);
+
+         return lba_manager->update_mappings(
+           t, lextents
+         ).si_then([&pextents] {
+           for (auto &extent : pextents) {
+             assert(!extent->is_logical() && extent->is_valid());
+             auto crc = extent->calc_crc32c();
+             extent->set_last_committed_crc(crc);
+             extent->update_in_extent_chksum_field(crc);
+           }
+         });
+       });
+      }).unsafe_get0();
     submit_transaction(std::move(t.t)).get();
     test_lba_mappings.swap(t.mappings);
   }
@@ -436,7 +521,7 @@ struct btree_lba_manager_test : btree_test_base {
        assert(extents.size() == 1);
        auto extent = extents.front();
        return lba_manager->alloc_extent(
-         t, hint, len, extent->get_paddr(), *extent);
+         t, hint, len, extent->get_paddr(), 0, *extent);
       }).unsafe_get0();
     logger().debug("alloc'd: {}", *ret);
     EXPECT_EQ(len, ret->get_length());
index b099ddb684a23bac5a74ef8a7b911f11faba3dd9..03f4af17c7ba2be2bd270f23400bb610f19b29ad 100644 (file)
@@ -30,6 +30,27 @@ struct cache_test_t : public seastar_test_suite_t {
 
   seastar::future<paddr_t> submit_transaction(
     TransactionRef t) {
+    auto chksum_func = [](auto &extent) {
+      if (!extent->is_valid()) {
+       return;
+      }
+      if (!extent->is_logical() ||
+         !extent->get_last_committed_crc()) {
+       auto crc = extent->calc_crc32c();
+       extent->set_last_committed_crc(crc);
+       extent->update_in_extent_chksum_field(crc);
+      }
+      assert(extent->get_crc32c() == extent->get_last_committed_crc());
+    };
+
+    t->for_each_finalized_fresh_block(chksum_func);
+    t->for_each_existing_block(chksum_func);
+    auto pre_allocated_extents = t->get_valid_pre_alloc_list();
+    std::for_each(
+      pre_allocated_extents.begin(),
+      pre_allocated_extents.end(),
+      chksum_func);
+
     auto record = cache->prepare_record(*t, JOURNAL_SEQ_NULL, JOURNAL_SEQ_NULL);
 
     bufferlist bl;