From b87f6141a8865c41943ba9d76cf0f55e675f4d0d Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Sat, 29 Jun 2024 21:23:33 +0800 Subject: [PATCH] crimson/os/seastore/btree: add copy_source->pending links to FixedKVNode Signed-off-by: Xuehan Xu --- .../os/seastore/btree/fixed_kv_btree.h | 2 +- src/crimson/os/seastore/btree/fixed_kv_node.h | 148 ++++++++++++++---- src/crimson/os/seastore/cached_extent.h | 13 +- src/crimson/os/seastore/root_block.h | 2 +- src/crimson/os/seastore/transaction.h | 13 ++ .../os/seastore/transaction_manager.cc | 4 +- src/test/crimson/seastore/test_block.h | 2 +- 7 files changed, 143 insertions(+), 41 deletions(-) diff --git a/src/crimson/os/seastore/btree/fixed_kv_btree.h b/src/crimson/os/seastore/btree/fixed_kv_btree.h index d5f8ecb8f42..159ea269b74 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_btree.h +++ b/src/crimson/os/seastore/btree/fixed_kv_btree.h @@ -1047,7 +1047,7 @@ public: fixed_kv_extent.get_user_hint(), // get target rewrite generation fixed_kv_extent.get_rewrite_generation()); - n_fixed_kv_extent->rewrite(fixed_kv_extent, 0); + n_fixed_kv_extent->rewrite(c.trans, fixed_kv_extent, 0); SUBTRACET( seastore_fixedkv_tree, diff --git a/src/crimson/os/seastore/btree/fixed_kv_node.h b/src/crimson/os/seastore/btree/fixed_kv_node.h index 8b31d82aad5..536c11c479e 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_node.h +++ b/src/crimson/os/seastore/btree/fixed_kv_node.h @@ -32,7 +32,7 @@ struct FixedKVNode : ChildableCachedExtent { using FixedKVNodeRef = TCachedExtentRef; fixed_kv_node_meta_t range; - struct copy_source_cmp_t { + struct fixedkv_node_cmp_t { using is_transparent = node_key_t; bool operator()(const FixedKVNodeRef &l, const FixedKVNodeRef &r) const { assert(l->range.end <= r->range.begin @@ -84,11 +84,56 @@ struct FixedKVNode : ChildableCachedExtent { * cannot be rewritten) because their parents must be mutated upon remapping. */ std::vector children; - std::set copy_sources; + std::set copy_sources; uint16_t capacity = 0; parent_tracker_t* my_tracker = nullptr; RootBlockRef root_block; + // copy dests points from a stable node back to its pending nodes + // having copy sources at the same tree level, it serves as a two-level index: + // transaction-id then node-key to the pending node. + // + // The copy dest pointers must be symmetric to the copy source pointers. + // + // copy_dests_t will be automatically unregisterred upon transaction destruction, + // see Transaction::views + struct copy_dests_t : trans_spec_view_t { + std::set dests_by_key; + copy_dests_t(Transaction &t) : trans_spec_view_t{t.get_trans_id()} {} + ~copy_dests_t() { + LOG_PREFIX(~copy_dests_t); + SUBTRACE(seastore_fixedkv_tree, "copy_dests_t destroyed"); + } + }; + + trans_view_set_t copy_dests_by_trans; + + void add_copy_dest(Transaction &t, FixedKVNodeRef dest) { + ceph_assert(is_stable()); + ceph_assert(dest->is_pending()); + auto tid = t.get_trans_id(); + auto iter = copy_dests_by_trans.lower_bound( + tid, trans_spec_view_t::cmp_t()); + if (iter == copy_dests_by_trans.end() || + iter->pending_for_transaction != tid) { + iter = copy_dests_by_trans.insert_before( + iter, t.add_transactional_view(t)); + } + auto ©_dests = static_cast(*iter); + auto [it, inserted] = copy_dests.dests_by_key.insert(dest); + assert(inserted || it->get() == dest.get()); + } + + void del_copy_dest(Transaction &t, FixedKVNodeRef dest) { + auto iter = copy_dests_by_trans.find( + t.get_trans_id(), trans_spec_view_t::cmp_t()); + ceph_assert(iter != copy_dests_by_trans.end()); + auto ©_dests = static_cast(*iter); + auto it = copy_dests.dests_by_key.find(dest); + ceph_assert(it != copy_dests.dests_by_key.end()); + copy_dests.dests_by_key.erase(dest); + } + bool is_linked() { assert(!has_parent_tracker() || !(bool)root_block); return (bool)has_parent_tracker() || (bool)root_block; @@ -161,7 +206,7 @@ struct FixedKVNode : ChildableCachedExtent { virtual bool have_children() const = 0; - void on_rewrite(CachedExtent &extent, extent_len_t off) final { + void on_rewrite(Transaction &t, CachedExtent &extent, extent_len_t off) final { assert(get_type() == extent.get_type()); assert(off == 0); auto &foreign_extent = (FixedKVNode&)extent; @@ -169,11 +214,14 @@ struct FixedKVNode : ChildableCachedExtent { if (have_children()) { if (!foreign_extent.is_pending()) { + foreign_extent.add_copy_dest(t, this); copy_sources.emplace(&foreign_extent); } else { ceph_assert(foreign_extent.is_mutation_pending()); - copy_sources.emplace( - foreign_extent.get_prior_instance()->template cast()); + auto copy_source = + foreign_extent.get_prior_instance()->template cast(); + copy_source->add_copy_dest(t, this); + copy_sources.emplace(copy_source); children = std::move(foreign_extent.children); adjust_ptracker_for_children(); } @@ -211,17 +259,24 @@ struct FixedKVNode : ChildableCachedExtent { } static void push_copy_sources( + Transaction &t, FixedKVNode &dest, FixedKVNode &src) { ceph_assert(dest.is_initial_pending()); if (!src.is_pending()) { + src.add_copy_dest(t, &dest); dest.copy_sources.emplace(&src); } else if (src.is_mutation_pending()) { - dest.copy_sources.emplace( - src.get_prior_instance()->template cast()); + auto copy_src = + src.get_prior_instance()->template cast(); + copy_src->add_copy_dest(t, &dest); + dest.copy_sources.emplace(copy_src); } else { ceph_assert(src.is_initial_pending()); + for (auto &cs : src.copy_sources) { + cs->add_copy_dest(t, &dest); + } dest.copy_sources.insert( src.copy_sources.begin(), src.copy_sources.end()); @@ -306,13 +361,20 @@ struct FixedKVNode : ChildableCachedExtent { } void split_child_ptrs( + Transaction &t, FixedKVNode &left, FixedKVNode &right) { assert(!left.my_tracker); assert(!right.my_tracker); - push_copy_sources(left, *this); - push_copy_sources(right, *this); + if (is_initial_pending()) { + for (auto &cs : copy_sources) { + cs->del_copy_dest(t, this); + } + } + + push_copy_sources(t, left, *this); + push_copy_sources(t, right, *this); if (is_pending()) { uint16_t pivot = get_node_split_pivot(); move_child_ptrs(left, *this, 0, 0, pivot); @@ -322,12 +384,24 @@ struct FixedKVNode : ChildableCachedExtent { } void merge_child_ptrs( + Transaction &t, FixedKVNode &left, FixedKVNode &right) { ceph_assert(!my_tracker); - push_copy_sources(*this, left); - push_copy_sources(*this, right); + + if (left.is_initial_pending()) { + for (auto &cs : left.copy_sources) { + cs->del_copy_dest(t, &left); + } + } + if (right.is_initial_pending()) { + for (auto &cs : right.copy_sources) { + cs->del_copy_dest(t, &right); + } + } + push_copy_sources(t, *this, left); + push_copy_sources(t, *this, right); if (left.is_pending()) { move_child_ptrs(*this, left, 0, 0, left.get_node_size()); @@ -341,6 +415,7 @@ struct FixedKVNode : ChildableCachedExtent { } static void balance_child_ptrs( + Transaction &t, FixedKVNode &left, FixedKVNode &right, bool prefer_left, @@ -355,12 +430,23 @@ struct FixedKVNode : ChildableCachedExtent { pivot_idx++; } + if (left.is_initial_pending()) { + for (auto &cs : left.copy_sources) { + cs->del_copy_dest(t, &left); + } + } + if (right.is_initial_pending()) { + for (auto &cs : right.copy_sources) { + cs->del_copy_dest(t, &right); + } + } + assert(!replacement_left.my_tracker); assert(!replacement_right.my_tracker); if (pivot_idx < l_size) { // deal with left - push_copy_sources(replacement_left, left); - push_copy_sources(replacement_right, left); + push_copy_sources(t, replacement_left, left); + push_copy_sources(t, replacement_right, left); if (left.is_pending()) { move_child_ptrs(replacement_left, left, 0, 0, pivot_idx); move_child_ptrs(replacement_right, left, 0, pivot_idx, l_size); @@ -368,22 +454,22 @@ struct FixedKVNode : ChildableCachedExtent { } // deal with right - push_copy_sources(replacement_right, right); + push_copy_sources(t, replacement_right, right); if (right.is_pending()) { move_child_ptrs(replacement_right, right, l_size - pivot_idx, 0, r_size); right.my_tracker= nullptr; } } else { // deal with left - push_copy_sources(replacement_left, left); + push_copy_sources(t, replacement_left, left); if (left.is_pending()) { move_child_ptrs(replacement_left, left, 0, 0, l_size); left.my_tracker = nullptr; } // deal with right - push_copy_sources(replacement_left, right); - push_copy_sources(replacement_right, right); + push_copy_sources(t, replacement_left, right); + push_copy_sources(t, replacement_right, right); if (right.is_pending()) { move_child_ptrs(replacement_left, right, l_size, 0, pivot_idx - l_size); move_child_ptrs(replacement_right, right, 0, pivot_idx - l_size, r_size); @@ -821,10 +907,10 @@ struct FixedKVInternalNode c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION); auto right = c.cache.template alloc_new_non_data_extent( c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION); - this->split_child_ptrs(*left, *right); auto pivot = this->split_into(*left, *right); left->range = left->get_meta(); right->range = right->get_meta(); + this->split_child_ptrs(c.trans, *left, *right); return std::make_tuple( left, right, @@ -836,9 +922,9 @@ struct FixedKVInternalNode Ref &right) { auto replacement = c.cache.template alloc_new_non_data_extent( c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION); - replacement->merge_child_ptrs(*this, *right); replacement->merge_from(*this, *right->template cast()); replacement->range = replacement->get_meta(); + replacement->merge_child_ptrs(c.trans, *this, *right); return replacement; } @@ -860,15 +946,15 @@ struct FixedKVInternalNode prefer_left, *replacement_left, *replacement_right); + replacement_left->range = replacement_left->get_meta(); + replacement_right->range = replacement_right->get_meta(); this->balance_child_ptrs( + c.trans, *this, right, prefer_left, *replacement_left, *replacement_right); - - replacement_left->range = replacement_left->get_meta(); - replacement_right->range = replacement_right->get_meta(); return std::make_tuple( replacement_left, replacement_right, @@ -1231,12 +1317,12 @@ struct FixedKVLeafNode c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION); auto right = c.cache.template alloc_new_non_data_extent( c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION); - if constexpr (has_children) { - this->split_child_ptrs(*left, *right); - } auto pivot = this->split_into(*left, *right); left->range = left->get_meta(); right->range = right->get_meta(); + if constexpr (has_children) { + this->split_child_ptrs(c.trans, *left, *right); + } return std::make_tuple( left, right, @@ -1248,11 +1334,11 @@ struct FixedKVLeafNode Ref &right) { auto replacement = c.cache.template alloc_new_non_data_extent( c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION); - if constexpr (has_children) { - replacement->merge_child_ptrs(*this, *right); - } replacement->merge_from(*this, *right->template cast()); replacement->range = replacement->get_meta(); + if constexpr (has_children) { + replacement->merge_child_ptrs(c.trans, *this, *right); + } return replacement; } @@ -1274,17 +1360,17 @@ struct FixedKVLeafNode prefer_left, *replacement_left, *replacement_right); + replacement_left->range = replacement_left->get_meta(); + replacement_right->range = replacement_right->get_meta(); if constexpr (has_children) { this->balance_child_ptrs( + c.trans, *this, right, prefer_left, *replacement_left, *replacement_right); } - - replacement_left->range = replacement_left->get_meta(); - replacement_right->range = replacement_right->get_meta(); return std::make_tuple( replacement_left, replacement_right, diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index e78a0d95028..0214399426d 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -112,6 +112,9 @@ struct trans_spec_view_t { // if the extent is pending, contains the id of the owning transaction; // TRANS_ID_NULL otherwise transaction_id_t pending_for_transaction = TRANS_ID_NULL; + trans_spec_view_t() = default; + trans_spec_view_t(transaction_id_t id) : pending_for_transaction(id) {} + virtual ~trans_spec_view_t() = default; struct cmp_t { bool operator()( @@ -307,7 +310,7 @@ public: return true; } - void rewrite(CachedExtent &e, extent_len_t o) { + void rewrite(Transaction &t, CachedExtent &e, extent_len_t o) { assert(is_initial_pending()); if (!e.is_pending()) { prior_instance = &e; @@ -321,7 +324,7 @@ public: get_bptr().c_str()); set_modify_time(e.get_modify_time()); set_last_committed_crc(e.get_last_committed_crc()); - on_rewrite(e, o); + on_rewrite(t, e, o); } /** @@ -330,7 +333,7 @@ public: * Called when this extent is rewriting another one. * */ - virtual void on_rewrite(CachedExtent &, extent_len_t) = 0; + virtual void on_rewrite(Transaction &, CachedExtent &, extent_len_t) = 0; friend std::ostream &operator<<(std::ostream &, extent_state_t); virtual std::ostream &print_detail(std::ostream &out) const { return out; } @@ -1221,7 +1224,7 @@ public: return false; } - void on_rewrite(CachedExtent&, extent_len_t) final {} + void on_rewrite(Transaction &, CachedExtent&, extent_len_t) final {} std::ostream &print_detail(std::ostream &out) const final { return out << ", RetiredExtentPlaceholder"; @@ -1308,7 +1311,7 @@ public: : ChildableCachedExtent(std::forward(t)...) {} - void on_rewrite(CachedExtent &extent, extent_len_t off) final { + void on_rewrite(Transaction&, CachedExtent &extent, extent_len_t off) final { assert(get_type() == extent.get_type()); auto &lextent = (LogicalCachedExtent&)extent; set_laddr(lextent.get_laddr() + off); diff --git a/src/crimson/os/seastore/root_block.h b/src/crimson/os/seastore/root_block.h index a9a7cd19822..942434dd596 100644 --- a/src/crimson/os/seastore/root_block.h +++ b/src/crimson/os/seastore/root_block.h @@ -50,7 +50,7 @@ struct RootBlock : CachedExtent { backref_root_node(nullptr) {} - void on_rewrite(CachedExtent&, extent_len_t) final {} + void on_rewrite(Transaction&, CachedExtent&, extent_len_t) final {} CachedExtentRef duplicate_for_write(Transaction&) final { return CachedExtentRef(new RootBlock(*this)); diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h index f6af7cfc350..d7cdb30098a 100644 --- a/src/crimson/os/seastore/transaction.h +++ b/src/crimson/os/seastore/transaction.h @@ -391,6 +391,7 @@ public: get_handle().exit(); on_destruct(*this); invalidate_clear_write_set(); + views.clear(); } friend class crimson::os::seastore::SeaStore; @@ -427,6 +428,7 @@ public: has_reset = true; } get_handle().exit(); + views.clear(); } bool did_reset() const { @@ -512,6 +514,15 @@ public: return trans_id; } + using view_ref = std::unique_ptr; + template , int> = 0> + T& add_transactional_view(Args&&... args) { + auto &view = views.emplace_back( + std::make_unique(std::forward(args)...)); + return static_cast(*view); + } + private: friend class Cache; friend Ref make_test_transaction(); @@ -577,6 +588,8 @@ private: std::list existing_block_list; existing_block_stats_t existing_block_stats; + std::list views; + /** * retire_set * diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 0a4e59ebd26..1e0e98288c1 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -512,7 +512,7 @@ TransactionManager::rewrite_logical_extent( lextent->get_user_hint(), // get target rewrite generation lextent->get_rewrite_generation())->cast(); - nlextent->rewrite(*lextent, 0); + nlextent->rewrite(t, *lextent, 0); DEBUGT("rewriting logical extent -- {} to {}", t, *lextent, *nlextent); @@ -553,7 +553,7 @@ TransactionManager::rewrite_logical_extent( bool first_extent = (off == 0); ceph_assert(left >= nextent->get_length()); auto nlextent = nextent->template cast(); - nlextent->rewrite(*lextent, off); + nlextent->rewrite(t, *lextent, off); DEBUGT("rewriting logical extent -- {} to {}", t, *lextent, *nlextent); /* This update_mapping is, strictly speaking, unnecessary for delayed_alloc diff --git a/src/test/crimson/seastore/test_block.h b/src/test/crimson/seastore/test_block.h index 76891076f33..fde6ad99c41 100644 --- a/src/test/crimson/seastore/test_block.h +++ b/src/test/crimson/seastore/test_block.h @@ -111,7 +111,7 @@ struct TestBlockPhysical : crimson::os::seastore::CachedExtent{ std::vector delta = {}; - void on_rewrite(CachedExtent&, extent_len_t) final {} + void on_rewrite(Transaction&, CachedExtent&, extent_len_t) final {} TestBlockPhysical(ceph::bufferptr &&ptr) : CachedExtent(std::move(ptr)) {} -- 2.39.5