From f4b570da86f77dfa08f33a0adc6135a1aa608233 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Sun, 4 Feb 2024 17:54:26 +0800 Subject: [PATCH] crimson/os/seastore: record extents' chksums in the lba tree Signed-off-by: Xuehan Xu --- src/crimson/common/fixed_kv_node_layout.h | 47 ++++++-- .../os/seastore/btree/fixed_kv_btree.h | 21 ++++ src/crimson/os/seastore/btree/fixed_kv_node.h | 32 +++++- src/crimson/os/seastore/cache.cc | 4 +- src/crimson/os/seastore/cache.h | 15 ++- src/crimson/os/seastore/cached_extent.h | 19 +++- .../os/seastore/extent_placement_manager.cc | 8 ++ src/crimson/os/seastore/lba_manager.cc | 2 + src/crimson/os/seastore/lba_manager.h | 2 + .../lba_manager/btree/btree_lba_manager.cc | 12 +- .../lba_manager/btree/btree_lba_manager.h | 7 ++ .../lba_manager/btree/lba_btree_node.h | 4 +- src/crimson/os/seastore/ordering_handle.h | 12 +- src/crimson/os/seastore/transaction.h | 7 +- .../os/seastore/transaction_manager.cc | 104 +++++++++++++++--- src/crimson/os/seastore/transaction_manager.h | 12 +- .../seastore/test_btree_lba_manager.cc | 87 ++++++++++++++- .../crimson/seastore/test_seastore_cache.cc | 21 ++++ 18 files changed, 360 insertions(+), 56 deletions(-) diff --git a/src/crimson/common/fixed_kv_node_layout.h b/src/crimson/common/fixed_kv_node_layout.h index 676563594e0..1b379f843fb 100644 --- a/src/crimson/common/fixed_kv_node_layout.h +++ b/src/crimson/common/fixed_kv_node_layout.h @@ -9,6 +9,7 @@ #include #include "include/byteorder.h" +#include "include/crc32c.h" #include "crimson/common/layout.h" @@ -52,8 +53,9 @@ template < class FixedKVNodeLayout { char *buf = nullptr; - using L = absl::container_internal::Layout; - static constexpr L layout{1, 1, CAPACITY, CAPACITY}; + using L = absl::container_internal::Layout< + ceph_le32, ceph_le32, MetaInt, KINT, VINT>; + static constexpr L layout{1, 1, 1, CAPACITY, CAPACITY}; public: template @@ -431,7 +433,7 @@ public: } uint16_t get_size() const { - return *layout.template Pointer<0>(buf); + return *layout.template Pointer<1>(buf); } /** @@ -440,7 +442,19 @@ public: * Set size representation to match size */ void set_size(uint16_t size) { - *layout.template Pointer<0>(buf) = size; + *layout.template Pointer<1>(buf) = size; + } + + uint32_t get_phy_checksum() const { + return *layout.template Pointer<0>(buf); + } + + void set_phy_checksum(uint32_t checksum) { + *layout.template Pointer<0>(buf) = checksum; + } + + uint32_t calc_phy_checksum() const { + return calc_phy_checksum_iteratively<4>(1); } /** @@ -451,11 +465,11 @@ public: * in delta_t */ Meta get_meta() const { - MetaInt &metaint = *layout.template Pointer<1>(buf); + MetaInt &metaint = *layout.template Pointer<2>(buf); return Meta(metaint); } void set_meta(const Meta &meta) { - *layout.template Pointer<1>(buf) = MetaInt(meta); + *layout.template Pointer<2>(buf) = MetaInt(meta); } constexpr static size_t get_capacity() { @@ -652,10 +666,23 @@ private: * Get pointer to start of key array */ KINT *get_key_ptr() { - return layout.template Pointer<2>(buf); + return layout.template Pointer<3>(buf); } const KINT *get_key_ptr() const { - return layout.template Pointer<2>(buf); + return layout.template Pointer<3>(buf); + } + + template + uint32_t calc_phy_checksum_iteratively(uint32_t crc) const { + if constexpr (N == 0) { + return crc; + } else { + uint32_t r = ceph_crc32c( + crc, + (unsigned char const *)layout.template Pointer(buf), + layout.template Size()); + return calc_phy_checksum_iteratively(r); + } } /** @@ -664,10 +691,10 @@ private: * Get pointer to start of val array */ VINT *get_val_ptr() { - return layout.template Pointer<3>(buf); + return layout.template Pointer<4>(buf); } const VINT *get_val_ptr() const { - return layout.template Pointer<3>(buf); + return layout.template Pointer<4>(buf); } /** diff --git a/src/crimson/os/seastore/btree/fixed_kv_btree.h b/src/crimson/os/seastore/btree/fixed_kv_btree.h index f15682621fb..b0901213716 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_btree.h +++ b/src/crimson/os/seastore/btree/fixed_kv_btree.h @@ -1275,6 +1275,17 @@ private: init_internal ).si_then([FNAME, c, offset, init_internal, depth, begin, end]( typename internal_node_t::Ref ret) { + if (unlikely(ret->get_in_extent_checksum() + != ret->get_last_committed_crc())) { + SUBERRORT( + seastore_fixedkv_tree, + "internal fixedkv extent checksum inconsistent, " + "recorded: {}, actually: {}", + c.trans, + ret->get_in_extent_checksum(), + ret->get_last_committed_crc()); + ceph_abort(); + } SUBTRACET( seastore_fixedkv_tree, "read internal at offset {} {}", @@ -1349,6 +1360,16 @@ private: init_leaf ).si_then([FNAME, c, offset, init_leaf, begin, end] (typename leaf_node_t::Ref ret) { + if (unlikely(ret->get_in_extent_checksum() + != ret->get_last_committed_crc())) { + SUBERRORT( + seastore_fixedkv_tree, + "leaf fixedkv extent checksum inconsistent, recorded: {}, actually: {}", + c.trans, + ret->get_in_extent_checksum(), + ret->get_last_committed_crc()); + ceph_abort(); + } SUBTRACET( seastore_fixedkv_tree, "read leaf at offset {} {}", diff --git a/src/crimson/os/seastore/btree/fixed_kv_node.h b/src/crimson/os/seastore/btree/fixed_kv_node.h index e9ffeed452e..6d7db2c4102 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_node.h +++ b/src/crimson/os/seastore/btree/fixed_kv_node.h @@ -662,6 +662,18 @@ struct FixedKVInternalNode return this->get_size(); } + uint32_t get_crc32c() const final { + return this->calc_phy_checksum(); + } + + void update_in_extent_chksum_field(uint32_t crc) final { + this->set_phy_checksum(crc); + } + + uint32_t get_in_extent_checksum() const { + return this->get_phy_checksum(); + } + typename node_layout_t::delta_buffer_t delta_buffer; typename node_layout_t::delta_buffer_t *maybe_get_delta_buffer() { return this->is_mutation_pending() @@ -887,7 +899,9 @@ struct FixedKVInternalNode typename node_layout_t::delta_buffer_t buffer; buffer.copy_in(bl.front().c_str(), bl.front().length()); buffer.replay(*this); - this->set_last_committed_crc(this->get_crc32c()); + auto crc = calc_crc32c(); + this->set_last_committed_crc(crc); + this->update_in_extent_chksum_field(crc); resolve_relative_addrs(base); } @@ -1086,6 +1100,18 @@ struct FixedKVLeafNode return this->get_size(); } + uint32_t get_crc32c() const final { + return this->calc_phy_checksum(); + } + + void update_in_extent_chksum_field(uint32_t crc) final { + this->set_phy_checksum(crc); + } + + uint32_t get_in_extent_checksum() const { + return this->get_phy_checksum(); + } + typename node_layout_t::delta_buffer_t delta_buffer; virtual typename node_layout_t::delta_buffer_t *maybe_get_delta_buffer() { return this->is_mutation_pending() ? &delta_buffer : nullptr; @@ -1189,7 +1215,9 @@ struct FixedKVLeafNode typename node_layout_t::delta_buffer_t buffer; buffer.copy_in(bl.front().c_str(), bl.front().length()); buffer.replay(*this); - this->set_last_committed_crc(this->get_crc32c()); + auto crc = calc_crc32c(); + this->set_last_committed_crc(crc); + this->update_in_extent_chksum_field(crc); this->resolve_relative_addrs(base); } diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 88096703154..537f7bea52f 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -1520,7 +1520,7 @@ void Cache::complete_commit( t, final_block_start, start_seq); std::vector backref_list; - t.for_each_fresh_block([&](const CachedExtentRef &i) { + t.for_each_finalized_fresh_block([&](const CachedExtentRef &i) { if (!i->is_valid()) { return; } @@ -1530,7 +1530,7 @@ void Cache::complete_commit( is_inline = true; i->set_paddr(final_block_start.add_relative(i->get_paddr())); } - i->last_committed_crc = i->get_crc32c(); + assert(i->get_last_committed_crc() == i->get_crc32c()); i->pending_for_transaction = TRANS_ID_NULL; i->on_initial_write(); diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index c06193cbbf5..b6a8cddde51 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -1652,16 +1652,15 @@ private: LOG_PREFIX(Cache::read_extent); if (likely(extent->state == CachedExtent::extent_state_t::CLEAN_PENDING)) { extent->state = CachedExtent::extent_state_t::CLEAN; - /* TODO: crc should be checked against LBA manager */ + } + ceph_assert(extent->state == CachedExtent::extent_state_t::EXIST_CLEAN + || extent->state == CachedExtent::extent_state_t::CLEAN + || !extent->is_valid()); + if (extent->is_valid()) { + // crc will be checked against LBA leaf entry for logical extents, + // or check against in-extent crc for physical extents. extent->last_committed_crc = extent->get_crc32c(); - extent->on_clean_read(); - } else if (extent->state == CachedExtent::extent_state_t::EXIST_CLEAN || - extent->state == CachedExtent::extent_state_t::CLEAN) { - /* TODO: crc should be checked against LBA manager */ - extent->last_committed_crc = extent->get_crc32c(); - } else { - ceph_assert(!extent->is_valid()); } extent->complete_io(); SUBDEBUG(seastore_cache, "read extent done -- {}", *extent); diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index 214e1283601..6fc430d039a 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -17,6 +17,9 @@ #include "crimson/os/seastore/seastore_types.h" struct btree_lba_manager_test; +struct lba_btree_test; +struct btree_test_base; +struct cache_test_t; namespace crimson::os::seastore { @@ -539,7 +542,7 @@ public: } /// Returns crc32c of buffer - uint32_t get_crc32c() { + virtual uint32_t get_crc32c() const { return ceph_crc32c( 1, reinterpret_cast(get_bptr().c_str()), @@ -600,7 +603,9 @@ public: } paddr_t get_prior_paddr_and_reset() { - assert(prior_poffset); + if (!prior_poffset) { + return poffset; + } auto ret = *prior_poffset; prior_poffset.reset(); return ret; @@ -781,6 +786,13 @@ protected: prior_instance.reset(); } + /** + * Called when updating extents' last_committed_crc, some extents may + * have in-extent checksum fields, like LBA/backref nodes, which are + * supposed to be updated in this method. + */ + virtual void update_in_extent_chksum_field(uint32_t) {} + /// Sets last_committed_crc void set_last_committed_crc(uint32_t crc) { last_committed_crc = crc; @@ -832,6 +844,9 @@ protected: template friend class BtreeNodeMapping; friend class ::btree_lba_manager_test; + friend class ::lba_btree_test; + friend class ::btree_test_base; + friend class ::cache_test_t; }; std::ostream &operator<<(std::ostream &, CachedExtent::extent_state_t); diff --git a/src/crimson/os/seastore/extent_placement_manager.cc b/src/crimson/os/seastore/extent_placement_manager.cc index 4d09e8da459..77bf1c5184c 100644 --- a/src/crimson/os/seastore/extent_placement_manager.cc +++ b/src/crimson/os/seastore/extent_placement_manager.cc @@ -335,6 +335,14 @@ ExtentPlacementManager::write_delayed_ool_extents( return trans_intr::do_for_each(alloc_map, [&t](auto& p) { auto writer = p.first; auto& extents = p.second; +#ifndef NDEBUG + std::for_each( + extents.begin(), + extents.end(), + [](auto &extent) { + assert(extent->is_valid()); + }); +#endif return writer->alloc_write_ool_extents(t, extents); }); } diff --git a/src/crimson/os/seastore/lba_manager.cc b/src/crimson/os/seastore/lba_manager.cc index 466fa8bd062..fc0b72a9814 100644 --- a/src/crimson/os/seastore/lba_manager.cc +++ b/src/crimson/os/seastore/lba_manager.cc @@ -13,6 +13,7 @@ LBAManager::update_mappings( { return trans_intr::do_for_each(extents, [this, &t](auto &extent) { + assert(extent->get_last_committed_crc()); return update_mapping( t, extent->get_laddr(), @@ -20,6 +21,7 @@ LBAManager::update_mappings( extent->get_prior_paddr_and_reset(), extent->get_length(), extent->get_paddr(), + extent->get_last_committed_crc(), nullptr // all the extents should have already been // added to the fixed_kv_btree ).discard_result(); diff --git a/src/crimson/os/seastore/lba_manager.h b/src/crimson/os/seastore/lba_manager.h index 3fa0987e150..6082f110f0d 100644 --- a/src/crimson/os/seastore/lba_manager.h +++ b/src/crimson/os/seastore/lba_manager.h @@ -87,6 +87,7 @@ public: laddr_t hint, extent_len_t len, paddr_t addr, + uint32_t checksum, LogicalCachedExtent &nextent, extent_ref_count_t refcount = EXTENT_DEFAULT_REF_COUNT) = 0; @@ -196,6 +197,7 @@ public: paddr_t prev_addr, extent_len_t len, paddr_t paddr, + uint32_t checksum, LogicalCachedExtent *nextent) = 0; /** diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc index d2a8b3dc0cb..e9fb620836b 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc @@ -307,6 +307,7 @@ BtreeLBAManager::_alloc_extent( extent_len_t len, pladdr_t addr, paddr_t actual_addr, + uint32_t checksum, LogicalCachedExtent* nextent, extent_ref_count_t refcount) { @@ -331,7 +332,7 @@ BtreeLBAManager::_alloc_extent( c, hint, [this, FNAME, c, hint, len, addr, lookup_attempts, - &t, nextent, refcount](auto &btree, auto &state) { + &t, nextent, refcount, checksum](auto &btree, auto &state) { return LBABtree::iterate_repeat( c, btree.upper_bound_right(c, hint), @@ -367,12 +368,13 @@ BtreeLBAManager::_alloc_extent( interruptible::ready_future_marker{}, seastar::stop_iteration::no); } - }).si_then([FNAME, c, addr, len, hint, &btree, &state, nextent, refcount] { + }).si_then([FNAME, c, addr, len, hint, &btree, + &state, nextent, refcount, checksum] { return btree.insert( c, *state.insert_iter, state.last_end, - lba_map_val_t{len, pladdr_t(addr), refcount, 0}, + lba_map_val_t{len, pladdr_t(addr), refcount, checksum}, nextent ).si_then([&state, FNAME, c, addr, len, hint, nextent](auto &&p) { auto [iter, inserted] = std::move(p); @@ -535,6 +537,7 @@ BtreeLBAManager::update_mapping( paddr_t prev_addr, extent_len_t len, paddr_t addr, + uint32_t checksum, LogicalCachedExtent *nextent) { LOG_PREFIX(BtreeLBAManager::update_mapping); @@ -542,7 +545,7 @@ BtreeLBAManager::update_mapping( return _update_mapping( t, laddr, - [prev_addr, addr, prev_len, len]( + [prev_addr, addr, prev_len, len, checksum]( const lba_map_val_t &in) { assert(!addr.is_null()); lba_map_val_t ret = in; @@ -551,6 +554,7 @@ BtreeLBAManager::update_mapping( ceph_assert(in.len == prev_len); ret.pladdr = addr; ret.len = len; + ret.checksum = checksum; return ret; }, nextent diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h index 3ad6f9380c2..85ffe3b805c 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h @@ -221,6 +221,7 @@ public: len, P_ADDR_ZERO, P_ADDR_NULL, + 0, nullptr, EXTENT_DEFAULT_REF_COUNT); } @@ -241,6 +242,8 @@ public: len, intermediate_key, actual_addr, + 0, // crc will only be used and checked with LBA direct mappings + // also see pin_to_extent(_by_type) nullptr, EXTENT_DEFAULT_REF_COUNT ).si_then([&t, this, intermediate_base](auto indirect_mapping) { @@ -267,6 +270,7 @@ public: laddr_t hint, extent_len_t len, paddr_t addr, + uint32_t checksum, LogicalCachedExtent &ext, extent_ref_count_t refcount = EXTENT_DEFAULT_REF_COUNT) final { @@ -276,6 +280,7 @@ public: len, addr, P_ADDR_NULL, + checksum, &ext, refcount); } @@ -341,6 +346,7 @@ public: paddr_t prev_addr, extent_len_t len, paddr_t paddr, + uint32_t checksum, LogicalCachedExtent*) final; get_physical_extent_if_live_ret get_physical_extent_if_live( @@ -410,6 +416,7 @@ private: extent_len_t len, pladdr_t addr, paddr_t actual_addr, + uint32_t checksum, LogicalCachedExtent*, extent_ref_count_t refcount); diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h index fc5d4e98fd9..3b760ff06a2 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h @@ -63,8 +63,8 @@ using lba_node_meta_le_t = fixed_kv_node_meta_le_t; * LBA Tree. * * Layout (4k): + * checksum : 4b * size : uint32_t[1] 4b - * (padding) : 4b * meta : lba_node_meta_le_t[3] (1*24)b * keys : laddr_t[255] (254*8)b * values : paddr_t[255] (254*8)b @@ -101,8 +101,8 @@ using LBAInternalNodeRef = LBAInternalNode::Ref; * LBA Tree. * * Layout (4k): + * checksum : 4b * size : uint32_t[1] 4b - * (padding) : 4b * meta : lba_node_meta_le_t[3] (1*24)b * keys : laddr_t[170] (140*8)b * values : lba_map_val_t[170] (140*21)b diff --git a/src/crimson/os/seastore/ordering_handle.h b/src/crimson/os/seastore/ordering_handle.h index a7802fda383..8ab8442acd9 100644 --- a/src/crimson/os/seastore/ordering_handle.h +++ b/src/crimson/os/seastore/ordering_handle.h @@ -14,9 +14,9 @@ struct WritePipeline { struct ReserveProjectedUsage : OrderedExclusivePhaseT { constexpr static auto type_name = "WritePipeline::reserve_projected_usage"; } reserve_projected_usage; - struct OolWrites : UnorderedStageT { - constexpr static auto type_name = "UnorderedStage::ool_writes_stage"; - } ool_writes; + struct OolWritesAndLBAUpdates : UnorderedStageT { + constexpr static auto type_name = "UnorderedStage::ool_writes_and_update_lba_stage"; + } ool_writes_and_lba_updates; struct Prepare : OrderedExclusivePhaseT { constexpr static auto type_name = "WritePipeline::prepare_phase"; } prepare; @@ -29,7 +29,7 @@ struct WritePipeline { using BlockingEvents = std::tuple< ReserveProjectedUsage::BlockingEvent, - OolWrites::BlockingEvent, + OolWritesAndLBAUpdates::BlockingEvent, Prepare::BlockingEvent, DeviceSubmission::BlockingEvent, Finalize::BlockingEvent @@ -70,7 +70,7 @@ struct OperationProxy { OperationProxy(OperationRef op) : op(std::move(op)) {} virtual seastar::future<> enter(WritePipeline::ReserveProjectedUsage&) = 0; - virtual seastar::future<> enter(WritePipeline::OolWrites&) = 0; + virtual seastar::future<> enter(WritePipeline::OolWritesAndLBAUpdates&) = 0; virtual seastar::future<> enter(WritePipeline::Prepare&) = 0; virtual seastar::future<> enter(WritePipeline::DeviceSubmission&) = 0; virtual seastar::future<> enter(WritePipeline::Finalize&) = 0; @@ -95,7 +95,7 @@ struct OperationProxyT : OperationProxy { seastar::future<> enter(WritePipeline::ReserveProjectedUsage& s) final { return that()->enter_stage(s); } - seastar::future<> enter(WritePipeline::OolWrites& s) final { + seastar::future<> enter(WritePipeline::OolWritesAndLBAUpdates& s) final { return that()->enter_stage(s); } seastar::future<> enter(WritePipeline::Prepare& s) final { diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h index 8d77ee728a7..81d5d146629 100644 --- a/src/crimson/os/seastore/transaction.h +++ b/src/crimson/os/seastore/transaction.h @@ -333,11 +333,16 @@ public: } template - auto for_each_fresh_block(F &&f) const { + auto for_each_finalized_fresh_block(F &&f) const { std::for_each(written_ool_block_list.begin(), written_ool_block_list.end(), f); std::for_each(inline_block_list.begin(), inline_block_list.end(), f); } + template + auto for_each_existing_block(F &&f) { + std::for_each(existing_block_list.begin(), existing_block_list.end(), f); + } + const io_stat_t& get_fresh_block_stats() const { return fresh_block_stats; } diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 5869df86340..3b68e4069de 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -309,6 +309,67 @@ TransactionManager::submit_transaction_direct( trim_alloc_to); } +TransactionManager::update_lba_mappings_ret +TransactionManager::update_lba_mappings( + Transaction &t, + std::list &pre_allocated_extents) +{ + LOG_PREFIX(TransactionManager::update_lba_mappings); + SUBTRACET(seastore_t, "update extent lba mappings", t); + return seastar::do_with( + std::list(), + std::list(), + [this, &t, &pre_allocated_extents](auto &lextents, auto &pextents) { + auto chksum_func = [&lextents, &pextents](auto &extent) { + if (!extent->is_valid() || + !extent->is_fully_loaded() || + // EXIST_MUTATION_PENDING extents' crc will be calculated when + // preparing records + extent->is_exist_mutation_pending()) { + return; + } + if (extent->is_logical()) { + // for rewritten extents, last_committed_crc should have been set + // because the crc of the original extent may be reused. + // also see rewrite_logical_extent() + if (!extent->get_last_committed_crc()) { + extent->set_last_committed_crc(extent->calc_crc32c()); + } + assert(extent->get_crc32c() == extent->get_last_committed_crc()); + lextents.emplace_back(extent->template cast()); + } else { + pextents.emplace_back(extent); + } + }; + + // For delayed-ool fresh logical extents, update lba-leaf crc and paddr. + // For other fresh logical extents, update lba-leaf crc. + t.for_each_finalized_fresh_block(chksum_func); + // For existing-clean logical extents, update lba-leaf crc. + t.for_each_existing_block(chksum_func); + // For pre-allocated fresh logical extents, update lba-leaf crc. + // For inplace-rewrite dirty logical extents, update lba-leaf crc. + std::for_each( + pre_allocated_extents.begin(), + pre_allocated_extents.end(), + chksum_func); + + return lba_manager->update_mappings( + t, lextents + ).si_then([&pextents] { + for (auto &extent : pextents) { + assert(!extent->is_logical() && extent->is_valid()); + // for non-logical extents, we update its last_committed_crc + // and in-extent checksum fields + // For pre-allocated fresh physical extents, update in-extent crc. + auto crc = extent->calc_crc32c(); + extent->set_last_committed_crc(crc); + extent->update_in_extent_chksum_field(crc); + } + }); + }); +} + TransactionManager::submit_transaction_direct_ret TransactionManager::do_submit_transaction( Transaction &tref, @@ -318,29 +379,32 @@ TransactionManager::do_submit_transaction( LOG_PREFIX(TransactionManager::do_submit_transaction); SUBTRACET(seastore_t, "start", tref); return trans_intr::make_interruptible( - tref.get_handle().enter(write_pipeline.ool_writes) - ).then_interruptible([this, FNAME, &tref, + tref.get_handle().enter(write_pipeline.ool_writes_and_lba_updates) + ).then_interruptible([this, &tref, dispatch_result = std::move(dispatch_result)] { return seastar::do_with(std::move(dispatch_result), - [this, FNAME, &tref](auto &dispatch_result) { + [this, &tref](auto &dispatch_result) { return epm->write_delayed_ool_extents(tref, dispatch_result.alloc_map - ).si_then([this, FNAME, &tref, &dispatch_result] { - SUBTRACET(seastore_t, "update delayed extent mappings", tref); - return lba_manager->update_mappings(tref, dispatch_result.delayed_extents); - }).handle_error_interruptible( + ).handle_error_interruptible( crimson::ct_error::input_output_error::pass_further(), crimson::ct_error::assert_all("invalid error") ); }); - }).si_then([this, FNAME, &tref] { - auto allocated_extents = tref.get_valid_pre_alloc_list(); - auto num_extents = allocated_extents.size(); - SUBTRACET(seastore_t, "process {} allocated extents", tref, num_extents); - return epm->write_preallocated_ool_extents(tref, allocated_extents - ).handle_error_interruptible( - crimson::ct_error::input_output_error::pass_further(), - crimson::ct_error::assert_all("invalid error") - ); + }).si_then([&tref, FNAME, this] { + return seastar::do_with( + tref.get_valid_pre_alloc_list(), + [this, FNAME, &tref](auto &allocated_extents) { + return update_lba_mappings(tref, allocated_extents + ).si_then([this, FNAME, &tref, &allocated_extents] { + auto num_extents = allocated_extents.size(); + SUBTRACET(seastore_t, "process {} allocated extents", tref, num_extents); + return epm->write_preallocated_ool_extents(tref, allocated_extents + ).handle_error_interruptible( + crimson::ct_error::input_output_error::pass_further(), + crimson::ct_error::assert_all("invalid error") + ); + }); + }); }).si_then([this, FNAME, &tref] { SUBTRACET(seastore_t, "about to prepare", tref); return tref.get_handle().enter(write_pipeline.prepare); @@ -403,7 +467,7 @@ seastar::future<> TransactionManager::flush(OrderingHandle &handle) SUBDEBUG(seastore_t, "H{} start", (void*)&handle); return handle.enter(write_pipeline.reserve_projected_usage ).then([this, &handle] { - return handle.enter(write_pipeline.ool_writes); + return handle.enter(write_pipeline.ool_writes_and_lba_updates); }).then([this, &handle] { return handle.enter(write_pipeline.prepare); }).then([this, &handle] { @@ -456,6 +520,8 @@ TransactionManager::rewrite_logical_extent( DEBUGT("rewriting logical extent -- {} to {}", t, *lextent, *nlextent); + assert(lextent->get_last_committed_crc() == lextent->calc_crc32c()); + nlextent->set_last_committed_crc(lextent->get_last_committed_crc()); /* This update_mapping is, strictly speaking, unnecessary for delayed_alloc * extents since we're going to do it again once we either do the ool write * or allocate a relative inline addr. TODO: refactor AsyncCleaner to @@ -467,6 +533,7 @@ TransactionManager::rewrite_logical_extent( lextent->get_paddr(), nlextent->get_length(), nlextent->get_paddr(), + nlextent->get_last_committed_crc(), nlextent.get()).discard_result(); } else { assert(get_extent_category(lextent->get_type()) == data_category_t::DATA); @@ -496,6 +563,7 @@ TransactionManager::rewrite_logical_extent( nlextent->get_bptr().c_str()); nlextent->set_laddr(lextent->get_laddr() + off); nlextent->set_modify_time(lextent->get_modify_time()); + nlextent->set_last_committed_crc(nlextent->calc_crc32c()); DEBUGT("rewriting logical extent -- {} to {}", t, *lextent, *nlextent); /* This update_mapping is, strictly speaking, unnecessary for delayed_alloc @@ -511,6 +579,7 @@ TransactionManager::rewrite_logical_extent( lextent->get_paddr(), nlextent->get_length(), nlextent->get_paddr(), + nlextent->get_last_committed_crc(), nlextent.get() ).si_then([&refcount](auto c) { refcount = c; @@ -522,6 +591,7 @@ TransactionManager::rewrite_logical_extent( lextent->get_laddr() + off, nlextent->get_length(), nlextent->get_paddr(), + nlextent->get_last_committed_crc(), *nlextent, refcount ).si_then([lextent, nlextent, off](auto mapping) { diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 371e7f96c42..94b6bd5d2dd 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -311,6 +311,7 @@ public: laddr_hint, len, ext->get_paddr(), + 0, // checksum will be updated upon transaction commit *ext ).si_then([ext=std::move(ext), laddr_hint, &t](auto &&) mutable { LOG_PREFIX(TransactionManager::alloc_non_data_extent); @@ -360,6 +361,7 @@ public: laddr_hint, ext->get_length(), ext->get_paddr(), + 0, // checksum will be updated upon trans commit *ext ).si_then([&ext, &laddr_hint, &t](auto &&) mutable { LOG_PREFIX(TransactionManager::alloc_extents); @@ -833,6 +835,11 @@ private: laddr_t offset, bool cascade_remove); + using update_lba_mappings_ret = LBAManager::update_mappings_ret; + update_lba_mappings_ret update_lba_mappings( + Transaction &t, + std::list &pre_allocated_extents); + /** * pin_to_extent * @@ -963,7 +970,10 @@ private: original_laddr, std::move(original_bptr)); fut = lba_manager->alloc_extent( - t, remap_laddr, remap_length, remap_paddr, *ext); + t, remap_laddr, remap_length, remap_paddr, + //TODO: oringal_bptr must be present if crc is enabled + (original_bptr.has_value() ? ext->get_crc32c() : 0), + *ext); } else { fut = lba_manager->clone_mapping( t, diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc index 5199f963dc9..0be7bdd6049 100644 --- a/src/test/crimson/seastore/test_btree_lba_manager.cc +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -150,6 +150,26 @@ struct btree_test_base : return cache->mkfs(t ).si_then([this, &t] { return test_structure_setup(t); + }).si_then([&t] { + auto chksum_func = [](auto &extent) { + if (!extent->is_valid()) { + return; + } + if (!extent->is_logical() || + !extent->get_last_committed_crc()) { + auto crc = extent->calc_crc32c(); + extent->set_last_committed_crc(crc); + extent->update_in_extent_chksum_field(crc); + } + assert(extent->get_crc32c() == extent->get_last_committed_crc()); + }; + t.for_each_finalized_fresh_block(chksum_func); + t.for_each_existing_block(chksum_func); + auto pre_allocated_extents = t.get_valid_pre_alloc_list(); + std::for_each( + pre_allocated_extents.begin(), + pre_allocated_extents.end(), + chksum_func); }); }).safe_then([this, &ref_t] { return submit_transaction(std::move(ref_t)); @@ -221,6 +241,27 @@ struct lba_btree_test : btree_test_base { std::move(f), btree, t ); }); + }).si_then([t=tref.get()]() mutable { + auto chksum_func = [](auto &extent) { + if (!extent->is_valid()) { + return; + } + if (!extent->is_logical() || + !extent->get_last_committed_crc()) { + auto crc = extent->calc_crc32c(); + extent->set_last_committed_crc(crc); + extent->update_in_extent_chksum_field(crc); + } + assert(extent->get_crc32c() == extent->get_last_committed_crc()); + }; + + t->for_each_finalized_fresh_block(chksum_func); + t->for_each_existing_block(chksum_func); + auto pre_allocated_extents = t->get_valid_pre_alloc_list(); + std::for_each( + pre_allocated_extents.begin(), + pre_allocated_extents.end(), + chksum_func); }).si_then([this, tref=std::move(tref)]() mutable { return submit_transaction(std::move(tref)); }); @@ -395,6 +436,50 @@ struct btree_lba_manager_test : btree_test_base { } void submit_test_transaction(test_transaction_t t) { + with_trans_intr( + *t.t, + [this](auto &t) { + return seastar::do_with( + std::list(), + std::list(), + [this, &t](auto &lextents, auto &pextents) { + auto chksum_func = [&lextents, &pextents](auto &extent) { + if (!extent->is_valid()) { + return; + } + if (extent->is_logical()) { + if (!extent->get_last_committed_crc()) { + auto crc = extent->calc_crc32c(); + extent->set_last_committed_crc(crc); + extent->update_in_extent_chksum_field(crc); + } + assert(extent->get_crc32c() == extent->get_last_committed_crc()); + lextents.emplace_back(extent->template cast()); + } else { + pextents.push_back(extent); + } + }; + + t.for_each_finalized_fresh_block(chksum_func); + t.for_each_existing_block(chksum_func); + auto pre_allocated_extents = t.get_valid_pre_alloc_list(); + std::for_each( + pre_allocated_extents.begin(), + pre_allocated_extents.end(), + chksum_func); + + return lba_manager->update_mappings( + t, lextents + ).si_then([&pextents] { + for (auto &extent : pextents) { + assert(!extent->is_logical() && extent->is_valid()); + auto crc = extent->calc_crc32c(); + extent->set_last_committed_crc(crc); + extent->update_in_extent_chksum_field(crc); + } + }); + }); + }).unsafe_get0(); submit_transaction(std::move(t.t)).get(); test_lba_mappings.swap(t.mappings); } @@ -436,7 +521,7 @@ struct btree_lba_manager_test : btree_test_base { assert(extents.size() == 1); auto extent = extents.front(); return lba_manager->alloc_extent( - t, hint, len, extent->get_paddr(), *extent); + t, hint, len, extent->get_paddr(), 0, *extent); }).unsafe_get0(); logger().debug("alloc'd: {}", *ret); EXPECT_EQ(len, ret->get_length()); diff --git a/src/test/crimson/seastore/test_seastore_cache.cc b/src/test/crimson/seastore/test_seastore_cache.cc index b099ddb684a..03f4af17c7b 100644 --- a/src/test/crimson/seastore/test_seastore_cache.cc +++ b/src/test/crimson/seastore/test_seastore_cache.cc @@ -30,6 +30,27 @@ struct cache_test_t : public seastar_test_suite_t { seastar::future submit_transaction( TransactionRef t) { + auto chksum_func = [](auto &extent) { + if (!extent->is_valid()) { + return; + } + if (!extent->is_logical() || + !extent->get_last_committed_crc()) { + auto crc = extent->calc_crc32c(); + extent->set_last_committed_crc(crc); + extent->update_in_extent_chksum_field(crc); + } + assert(extent->get_crc32c() == extent->get_last_committed_crc()); + }; + + t->for_each_finalized_fresh_block(chksum_func); + t->for_each_existing_block(chksum_func); + auto pre_allocated_extents = t->get_valid_pre_alloc_list(); + std::for_each( + pre_allocated_extents.begin(), + pre_allocated_extents.end(), + chksum_func); + auto record = cache->prepare_record(*t, JOURNAL_SEQ_NULL, JOURNAL_SEQ_NULL); bufferlist bl; -- 2.39.5