From: Xuehan Xu Date: Sun, 12 Jan 2025 07:15:46 +0000 (+0800) Subject: crimson/os/seastore: remove fixed-kv-btree parent<->child pointer codes X-Git-Tag: v20.0.0~123^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=18d787a32b9b0899964664ce189be6b3281ce54c;p=ceph.git crimson/os/seastore: remove fixed-kv-btree parent<->child pointer codes from FixedKVNodes FixedKV(Internal/Leaf)Node inherit parent<->child pointer functions from linked tree nodes instead. Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/os/seastore/CMakeLists.txt b/src/crimson/os/seastore/CMakeLists.txt index 3da5e65ceec..4aa075ec18e 100644 --- a/src/crimson/os/seastore/CMakeLists.txt +++ b/src/crimson/os/seastore/CMakeLists.txt @@ -1,6 +1,7 @@ set(crimson_seastore_srcs cached_extent.cc lba_mapping.cc + logical_child_node.cc seastore_types.cc segment_manager.cc segment_manager/ephemeral.cc @@ -20,7 +21,6 @@ set(crimson_seastore_srcs omap_manager.cc omap_manager/btree/btree_omap_manager.cc omap_manager/btree/omap_btree_node_impl.cc - btree/fixed_kv_node.cc onode.cc onode_manager/staged-fltree/node.cc onode_manager/staged-fltree/node_extent_manager.cc diff --git a/src/crimson/os/seastore/backref/backref_tree_node.h b/src/crimson/os/seastore/backref/backref_tree_node.h index aee6e2a67ea..0d4f8406960 100644 --- a/src/crimson/os/seastore/backref/backref_tree_node.h +++ b/src/crimson/os/seastore/backref/backref_tree_node.h @@ -5,6 +5,10 @@ #include "crimson/os/seastore/btree/fixed_kv_node.h" +namespace crimson::os::seastore { +class LogicalChildNode; +} + namespace crimson::os::seastore::backref { using backref_node_meta_t = fixed_kv_node_meta_t; @@ -86,6 +90,7 @@ class BackrefInternalNode check_capacity(BACKREF_NODE_SIZE), "INTERNAL_NODE_CAPACITY doesn't fit in BACKREF_NODE_SIZE"); public: + using key_type = paddr_t; template BackrefInternalNode(T&&... t) : FixedKVInternalNode(std::forward(t)...) {} @@ -104,12 +109,15 @@ class BackrefLeafNode paddr_t, paddr_le_t, backref_map_val_t, backref_map_val_le_t, BACKREF_NODE_SIZE, + BackrefInternalNode, BackrefLeafNode, + LogicalChildNode, false> { static_assert( check_capacity(BACKREF_NODE_SIZE), "LEAF_NODE_CAPACITY doesn't fit in BACKREF_NODE_SIZE"); public: + using key_type = paddr_t; template BackrefLeafNode(T&&... t) : FixedKVLeafNode(std::forward(t)...) {} @@ -124,7 +132,7 @@ public: const_iterator iter, paddr_t key, backref_map_val_t val, - LogicalCachedExtent*) final { + LogicalChildNode*) final { journal_insert( iter, key, @@ -136,7 +144,7 @@ public: void update( const_iterator iter, backref_map_val_t val, - LogicalCachedExtent*) final { + LogicalChildNode*) final { return journal_update( iter, val, diff --git a/src/crimson/os/seastore/backref/btree_backref_manager.cc b/src/crimson/os/seastore/backref/btree_backref_manager.cc index 9cbf65f4033..8a30b1ee94d 100644 --- a/src/crimson/os/seastore/backref/btree_backref_manager.cc +++ b/src/crimson/os/seastore/backref/btree_backref_manager.cc @@ -47,24 +47,21 @@ const get_phy_tree_root_node_ret get_phy_tree_root_node< } } -template -void link_phy_tree_root_node(RootBlockRef &root_block, ROOT* backref_root) { - root_block->backref_root_node = backref_root; - ceph_assert(backref_root != nullptr); - backref_root->root_block = root_block; -} - -template void link_phy_tree_root_node( - RootBlockRef &root_block, backref::BackrefInternalNode* backref_root); -template void link_phy_tree_root_node( - RootBlockRef &root_block, backref::BackrefLeafNode* backref_root); -template void link_phy_tree_root_node( - RootBlockRef &root_block, backref::BackrefNode* backref_root); +template +class TreeRootLinker { +public: + static void link_root(RootBlockRef &root_block, RootT* backref_root) { + root_block->backref_root_node = backref_root; + ceph_assert(backref_root != nullptr); + backref_root->parent_of_root = root_block; + } + static void unlink_root(RootBlockRef &root_block) { + root_block->backref_root_node = nullptr; + } +}; -template <> -void unlink_phy_tree_root_node(RootBlockRef &root_block) { - root_block->backref_root_node = nullptr; -} +template class TreeRootLinker; +template class TreeRootLinker; } diff --git a/src/crimson/os/seastore/backref/btree_backref_manager.h b/src/crimson/os/seastore/backref/btree_backref_manager.h index 24897dd55da..a747ef6d3bd 100644 --- a/src/crimson/os/seastore/backref/btree_backref_manager.h +++ b/src/crimson/os/seastore/backref/btree_backref_manager.h @@ -15,7 +15,7 @@ public: : BackrefMapping(ctx) {} BtreeBackrefMapping( op_context_t ctx, - CachedExtentRef parent, + BackrefLeafNodeRef parent, uint16_t pos, backref_map_val_t &val, backref_node_meta_t &&meta) diff --git a/src/crimson/os/seastore/btree/fixed_kv_btree.h b/src/crimson/os/seastore/btree/fixed_kv_btree.h index 88f7cea9ed8..1dd666e8a24 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_btree.h +++ b/src/crimson/os/seastore/btree/fixed_kv_btree.h @@ -14,6 +14,7 @@ #include "crimson/os/seastore/seastore_types.h" #include "crimson/os/seastore/btree/btree_range_pin.h" #include "crimson/os/seastore/root_block.h" +#include "crimson/os/seastore/linked_tree_node.h" namespace crimson::os::seastore::lba_manager::btree { struct lba_map_val_t; @@ -21,14 +22,6 @@ struct lba_map_val_t; namespace crimson::os::seastore { -bool is_valid_child_ptr(ChildableCachedExtent* child); - -bool is_reserved_ptr(ChildableCachedExtent* child); - -inline ChildableCachedExtent* get_reserved_ptr() { - return (ChildableCachedExtent*)0x1; -} - template phy_tree_root_t& get_phy_tree_root(root_t& r); @@ -40,12 +33,6 @@ const get_phy_tree_root_node_ret get_phy_tree_root_node( const RootBlockRef &root_block, op_context_t c); -template -void link_phy_tree_root_node(RootBlockRef &root_block, ROOT_T* root_node); - -template -void unlink_phy_tree_root_node(RootBlockRef &root_block); - template Transaction::tree_stats_t& get_tree_stats(Transaction &t); @@ -351,7 +338,7 @@ public: template void set_root_node(const TCachedExtentRef &root_node) { static_assert(std::is_base_of_v); - link_phy_tree_root_node(root_block, root_node.get()); + TreeRootLinker::link_root(root_block, root_node.get()); } auto get_root_node(op_context_t c) const { @@ -373,7 +360,7 @@ public: root_leaf->range = meta; get_tree_stats(c.trans).depth = 1u; get_tree_stats(c.trans).extents_num_delta++; - link_phy_tree_root_node(root_block, root_leaf.get()); + TreeRootLinker::link_root(root_block, root_leaf.get()); return phy_tree_root_t{root_leaf->get_paddr(), 1u}; } @@ -495,6 +482,7 @@ public: return upper_bound(c, min_max_t::max); } +#ifdef UNIT_TESTS_BUILT template ::type = 0> void check_node( @@ -532,8 +520,8 @@ public: if (node->is_pending()) { auto &n = node->get_stable_for_key(i->get_key()); assert(cnode->get_parent_node().get() == &n); - auto pos = n.lower_bound_offset(i->get_key()); - assert(pos < n.get_node_size()); + auto pos = n.lower_bound(i->get_key()).get_offset(); + assert(pos < n.get_size()); assert(n.children[pos] == cnode.get()); } else { assert(cnode->get_parent_node().get() == node.get()); @@ -547,8 +535,8 @@ public: if (node->is_mutation_pending()) { auto &n = node->get_stable_for_key(i->get_key()); assert(prior.get_parent_node().get() == &n); - auto pos = n.lower_bound_offset(i->get_key()); - assert(pos < n.get_node_size()); + auto pos = n.lower_bound(i->get_key()).get_offset(); + assert(pos < n.get_size()); assert(n.children[pos] == &prior); } else { assert(prior.get_parent_node().get() == node.get()); @@ -567,11 +555,12 @@ public: ceph_abort("impossible"); } } else if (ret == Transaction::get_extent_ret::ABSENT) { - ChildableCachedExtent* child = nullptr; + BaseChildNode, + node_key_t>* child = nullptr; if (node->is_pending()) { auto &n = node->get_stable_for_key(i->get_key()); - auto pos = n.lower_bound_offset(i->get_key()); - assert(pos < n.get_node_size()); + auto pos = n.lower_bound(i->get_key()).get_offset(); + assert(pos < n.get_size()); child = n.children[pos]; } else { child = node->children[i->get_offset()]; @@ -601,7 +590,7 @@ public: } } } else { - auto c = (child_node_t*)child; + auto c = static_cast(child); assert(c->has_parent_tracker()); assert(c->get_parent_node().get() == node.get() || (node->is_pending() && c->is_stable() @@ -634,12 +623,16 @@ public: if (depth > 1) { auto &node = iter.get_internal(depth).node; assert(node->is_valid()); - check_node(c, node); + if (depth > 2 ) { + check_node(c, node); + } else { + check_node(c, node); + } } else { assert(depth == 1); auto &node = iter.leaf.node; assert(node->is_valid()); - check_node(c, node); + check_node(c, node); } return seastar::now(); }; @@ -666,6 +659,7 @@ public: &checker); }); } +#endif using iterate_repeat_ret_inner = base_iertr::future< seastar::stop_iteration>; @@ -730,7 +724,7 @@ public: iterator iter, node_key_t laddr, node_val_t val, - LogicalCachedExtent* nextent + leaf_node_t::base_child_node_t* nextent ) { LOG_PREFIX(FixedKVBtree::insert); SUBTRACET( @@ -780,7 +774,7 @@ public: op_context_t c, node_key_t laddr, node_val_t val, - LogicalCachedExtent* nextent) { + leaf_node_t::base_child_node_t* nextent) { return lower_bound( c, laddr ).si_then([this, c, laddr, val, nextent](auto iter) { @@ -804,7 +798,7 @@ public: op_context_t c, iterator iter, node_val_t val, - LogicalCachedExtent* nextent) + leaf_node_t::base_child_node_t* nextent) { LOG_PREFIX(FixedKVBtree::update); SUBTRACET( @@ -1082,13 +1076,15 @@ public: using update_internal_mapping_iertr = base_iertr; using update_internal_mapping_ret = update_internal_mapping_iertr::future<>; + template + requires std::is_same_v || std::is_same_v update_internal_mapping_ret update_internal_mapping( op_context_t c, depth_t depth, node_key_t laddr, paddr_t old_addr, paddr_t new_addr, - typename internal_node_t::base_ref nextent) + TCachedExtentRef nextent) { LOG_PREFIX(FixedKVBtree::update_internal_mapping); SUBTRACET( @@ -1226,6 +1222,7 @@ private: auto init_internal = [c, depth, begin, end, parent_pos=std::move(parent_pos)] (internal_node_t &node) { + using tree_root_linker_t = TreeRootLinker; assert(!node.is_pending()); assert(!node.is_linked()); node.range = fixed_kv_node_meta_t{begin, end, depth}; @@ -1237,10 +1234,10 @@ private: auto root_block = c.cache.get_root_fast(c.trans); if (root_block->is_mutation_pending()) { auto &stable_root = (RootBlockRef&)*root_block->get_prior_instance(); - link_phy_tree_root_node(stable_root, &node); + tree_root_linker_t::link_root(stable_root, &node); } else { assert(!root_block->is_pending()); - link_phy_tree_root_node(root_block, &node); + tree_root_linker_t::link_root(root_block, &node); } } }; @@ -1311,6 +1308,7 @@ private: auto init_leaf = [c, begin, end, parent_pos=std::move(parent_pos)] (leaf_node_t &node) { + using tree_root_linker_t = TreeRootLinker; assert(!node.is_pending()); assert(!node.is_linked()); node.range = fixed_kv_node_meta_t{begin, end, 1}; @@ -1322,10 +1320,10 @@ private: auto root_block = c.cache.get_root_fast(c.trans); if (root_block->is_mutation_pending()) { auto &stable_root = (RootBlockRef&)*root_block->get_prior_instance(); - link_phy_tree_root_node(stable_root, &node); + tree_root_linker_t::link_root(stable_root, &node); } else { assert(!root_block->is_pending()); - link_phy_tree_root_node(root_block, &node); + tree_root_linker_t::link_root(root_block, &node); } } }; @@ -1493,7 +1491,8 @@ private: return seastar::now(); }; - auto v = parent->template get_child(c, node_iter); + auto v = parent->template get_child( + c.trans, c.cache, node_iter.get_offset(), node_iter.get_key()); // checking the lba child must be atomic with creating // and linking the absent child if (v.has_child()) { @@ -1527,7 +1526,7 @@ private: begin, end, std::make_optional>( - child_pos.template get_parent(), + child_pos.get_parent(), child_pos.get_pos()) ).si_then([on_found=std::move(on_found)](InternalNodeRef node) { return on_found(node); @@ -1563,7 +1562,8 @@ private: return seastar::now(); }; - auto v = parent->template get_child(c, node_iter); + auto v = parent->template get_child( + c.trans, c.cache, node_iter.get_offset(), node_iter.get_key()); // checking the lba child must be atomic with creating // and linking the absent child if (v.has_child()) { @@ -1597,7 +1597,7 @@ private: begin, end, std::make_optional>( - child_pos.template get_parent(), + child_pos.get_parent(), child_pos.get_pos()) ).si_then([on_found=std::move(on_found)](LeafNodeRef node) { return on_found(node); @@ -2118,7 +2118,8 @@ private: return seastar::now(); }; - auto v = parent_pos.node->template get_child(c, donor_iter); + auto v = parent_pos.node->template get_child( + c.trans, c.cache, donor_iter.get_offset(), donor_iter.get_key()); // checking the lba child must be atomic with creating // and linking the absent child if (v.has_child()) { @@ -2150,7 +2151,7 @@ private: begin, end, std::make_optional>( - child_pos.template get_parent(), + child_pos.get_parent(), child_pos.get_pos()) ).si_then([do_merge=std::move(do_merge)](typename NodeType::Ref donor) { return do_merge(donor); diff --git a/src/crimson/os/seastore/btree/fixed_kv_node.cc b/src/crimson/os/seastore/btree/fixed_kv_node.cc deleted file mode 100644 index 94783a01091..00000000000 --- a/src/crimson/os/seastore/btree/fixed_kv_node.cc +++ /dev/null @@ -1,16 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include "crimson/os/seastore/btree/fixed_kv_node.h" - -namespace crimson::os::seastore { - -bool is_valid_child_ptr(ChildableCachedExtent* child) { - return child != nullptr && child != get_reserved_ptr(); -} - -bool is_reserved_ptr(ChildableCachedExtent* child) { - return child == get_reserved_ptr(); -} - -} // namespace crimson::os::seastore diff --git a/src/crimson/os/seastore/btree/fixed_kv_node.h b/src/crimson/os/seastore/btree/fixed_kv_node.h index 63e2ca38c42..f593df3bbaa 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_node.h +++ b/src/crimson/os/seastore/btree/fixed_kv_node.h @@ -28,231 +28,32 @@ namespace crimson::os::seastore { * Base class enabling recursive lookup between internal and leaf nodes. */ template -struct FixedKVNode : ChildableCachedExtent { +struct FixedKVNode : CachedExtent { using FixedKVNodeRef = TCachedExtentRef; fixed_kv_node_meta_t range; - struct fixedkv_node_cmp_t { - using is_transparent = node_key_t; - bool operator()(const FixedKVNodeRef &l, const FixedKVNodeRef &r) const { - assert(l->range.end <= r->range.begin - || r->range.end <= l->range.begin - || (l->range.begin == r->range.begin - && l->range.end == r->range.end)); - return l->range.begin < r->range.begin; - } - bool operator()(const node_key_t &l, const FixedKVNodeRef &r) const { - return l < r->range.begin; - } - bool operator()(const FixedKVNodeRef &l, const node_key_t &r) const { - return l->range.begin < r; - } - }; - - /* - * - * Nodes of fixed-kv-btree connect to their child nodes by pointers following - * invariants below: - * - * 1. if nodes are stable: - * a. parent points at the node's stable parent - * b. prior_instance is empty - * c. child pointers point at stable children. Child resolution is done - * directly via this array. - * d. copy_sources is empty - * 2. if nodes are mutation_pending: - * a. parent is empty and needs to be fixed upon commit - * b. prior_instance points to its stable version - * c. child pointers are null except for initial_pending() children of - * this transaction. Child resolution is done by first checking this - * array, and then recursively resolving via the parent. We copy child - * pointers from parent on commit. - * d. copy_sources is empty - * 3. if nodes are initial_pending - * a. parent points at its pending parent on this transaction (must exist) - * b. prior_instance is empty or, if it's the result of rewrite, points to - * its stable predecessor - * c. child pointers are null except for initial_pending() children of - * this transaction (live due to 3a below). Child resolution is done - * by first checking this array, and then recursively resolving via - * the correct copy_sources entry. We copy child pointers from copy_sources - * on commit. - * d. copy_sources contains the set of stable nodes at the same tree-level(only - * its "prior_instance" if the node is the result of a rewrite), with which - * the lba range of this node overlaps. - * 4. EXIST_CLEAN and EXIST_MUTATION_PENDING belong to 3 above (except that they - * cannot be rewritten) because their parents must be mutated upon remapping. - */ - std::vector children; - std::set copy_sources; - uint16_t capacity = 0; - parent_tracker_t* my_tracker = nullptr; - RootBlockRef root_block; - - // copy dests points from a stable node back to its pending nodes - // having copy sources at the same tree level, it serves as a two-level index: - // transaction-id then node-key to the pending node. - // - // The copy dest pointers must be symmetric to the copy source pointers. - // - // copy_dests_t will be automatically unregisterred upon transaction destruction, - // see Transaction::views - struct copy_dests_t : trans_spec_view_t { - std::set dests_by_key; - copy_dests_t(Transaction &t) : trans_spec_view_t{t.get_trans_id()} {} - ~copy_dests_t() { - LOG_PREFIX(~copy_dests_t); - SUBTRACE(seastore_fixedkv_tree, "copy_dests_t destroyed"); - } - }; - - trans_view_set_t copy_dests_by_trans; - - void add_copy_dest(Transaction &t, FixedKVNodeRef dest) { - ceph_assert(is_stable()); - ceph_assert(dest->is_pending()); - auto tid = t.get_trans_id(); - auto iter = copy_dests_by_trans.lower_bound( - tid, trans_spec_view_t::cmp_t()); - if (iter == copy_dests_by_trans.end() || - iter->pending_for_transaction != tid) { - iter = copy_dests_by_trans.insert_before( - iter, t.add_transactional_view(t)); - } - auto ©_dests = static_cast(*iter); - auto [it, inserted] = copy_dests.dests_by_key.insert(dest); - assert(inserted || it->get() == dest.get()); - } - - void del_copy_dest(Transaction &t, FixedKVNodeRef dest) { - auto iter = copy_dests_by_trans.find( - t.get_trans_id(), trans_spec_view_t::cmp_t()); - ceph_assert(iter != copy_dests_by_trans.end()); - auto ©_dests = static_cast(*iter); - auto it = copy_dests.dests_by_key.find(dest); - ceph_assert(it != copy_dests.dests_by_key.end()); - copy_dests.dests_by_key.erase(dest); - } - - FixedKVNodeRef find_pending_version(Transaction &t, node_key_t key) { - assert(is_stable()); - auto mut_iter = mutation_pendings.find( - t.get_trans_id(), trans_spec_view_t::cmp_t()); - if (mut_iter != mutation_pendings.end()) { - assert(copy_dests_by_trans.find(t.get_trans_id()) == - copy_dests_by_trans.end()); - return (FixedKVNode*)(&(*mut_iter)); - } - auto iter = copy_dests_by_trans.find( - t.get_trans_id(), trans_spec_view_t::cmp_t()); - ceph_assert(iter != copy_dests_by_trans.end()); - auto ©_dests = static_cast(*iter); - auto it = copy_dests.dests_by_key.lower_bound(key); - if (it == copy_dests.dests_by_key.end() || (*it)->range.begin > key) { - ceph_assert(it != copy_dests.dests_by_key.begin()); - --it; - } - ceph_assert((*it)->range.begin <= key && key < (*it)->range.end); - return *it; - } - - bool is_linked() { - assert(!has_parent_tracker() || !(bool)root_block); - return (bool)has_parent_tracker() || (bool)root_block; + bool is_btree_root() const { + return range.is_root(); } - FixedKVNode(uint16_t capacity, ceph::bufferptr &&ptr) - : ChildableCachedExtent(std::move(ptr)), - children(capacity, nullptr), - capacity(capacity) {} - // Must be identical with FixedKVNode(capacity, ptr) after on_fully_loaded() - explicit FixedKVNode(uint16_t capacity, extent_len_t length) - : ChildableCachedExtent(length), - children(capacity, nullptr), - capacity(capacity) {} + FixedKVNode(ceph::bufferptr &&ptr) + : CachedExtent(std::move(ptr)) {} + // Must be identical with FixedKVNode(ptr) after on_fully_loaded() + explicit FixedKVNode(extent_len_t length) + : CachedExtent(length) {} FixedKVNode(const FixedKVNode &rhs) - : ChildableCachedExtent(rhs), - range(rhs.range), - children(rhs.capacity, nullptr), - capacity(rhs.capacity) {} + : CachedExtent(rhs), + range(rhs.range) {} virtual fixed_kv_node_meta_t get_node_meta() const = 0; - virtual uint16_t get_node_size() const = 0; - virtual ~FixedKVNode() = default; - virtual node_key_t get_key_from_idx(uint16_t idx) const = 0; - - template - void update_child_ptr(iter_t iter, ChildableCachedExtent* child) { - children[iter.get_offset()] = child; - set_child_ptracker(child); - } - - virtual bool is_leaf_and_has_children() const = 0; - - template - void insert_child_ptr(iter_t iter, ChildableCachedExtent* child) { - auto raw_children = children.data(); - auto offset = iter.get_offset(); - std::memmove( - &raw_children[offset + 1], - &raw_children[offset], - (get_node_size() - offset) * sizeof(ChildableCachedExtent*)); - if (child) { - children[offset] = child; - set_child_ptracker(child); - } else { - // this can happen when reserving lba spaces and cloning mappings - ceph_assert(is_leaf_and_has_children()); - // this is to avoid mistakenly copying pointers from - // copy sources when committing this lba node, because - // we rely on pointers' "nullness" to avoid copying - // pointers for updated values - children[offset] = get_reserved_ptr(); - } - } - - template - void remove_child_ptr(iter_t iter) { - LOG_PREFIX(FixedKVNode::remove_child_ptr); - auto raw_children = children.data(); - auto offset = iter.get_offset(); - SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, total size {}, extent {}", - this->pending_for_transaction, - offset, - get_node_size(), - (void*)raw_children[offset]); - // parent tracker of the child being removed will be - // reset when the child is invalidated, so no need to - // reset it here - std::memmove( - &raw_children[offset], - &raw_children[offset + 1], - (get_node_size() - offset - 1) * sizeof(ChildableCachedExtent*)); - } - - virtual bool have_children() const = 0; + virtual void do_on_rewrite(Transaction &t, CachedExtent &extent) = 0; void on_rewrite(Transaction &t, CachedExtent &extent, extent_len_t off) final { assert(get_type() == extent.get_type()); assert(off == 0); - auto &foreign_extent = (FixedKVNode&)extent; range = get_node_meta(); - - if (have_children()) { - if (!foreign_extent.is_pending()) { - foreign_extent.add_copy_dest(t, this); - copy_sources.emplace(&foreign_extent); - } else { - ceph_assert(foreign_extent.is_mutation_pending()); - auto copy_source = - foreign_extent.get_prior_instance()->template cast(); - copy_source->add_copy_dest(t, this); - copy_sources.emplace(copy_source); - children = std::move(foreign_extent.children); - adjust_ptracker_for_children(); - } - } + do_on_rewrite(t, extent); /* This is a bit underhanded. Any relative addrs here must necessarily * be record relative as we are rewriting a dirty extent. Thus, we @@ -271,302 +72,6 @@ struct FixedKVNode : ChildableCachedExtent { } // else: backend_type_t::RANDOM_BLOCK } - FixedKVNode& get_stable_for_key(node_key_t key) const { - ceph_assert(is_pending()); - if (is_mutation_pending()) { - return (FixedKVNode&)*get_prior_instance(); - } else { - ceph_assert(!copy_sources.empty()); - auto it = copy_sources.upper_bound(key); - it--; - auto ©_source = *it; - ceph_assert(copy_source->get_node_meta().is_in_range(key)); - return *copy_source; - } - } - - static void push_copy_sources( - Transaction &t, - FixedKVNode &dest, - FixedKVNode &src) - { - ceph_assert(dest.is_initial_pending()); - if (!src.is_pending()) { - src.add_copy_dest(t, &dest); - dest.copy_sources.emplace(&src); - } else if (src.is_mutation_pending()) { - auto copy_src = - src.get_prior_instance()->template cast(); - copy_src->add_copy_dest(t, &dest); - dest.copy_sources.emplace(copy_src); - } else { - ceph_assert(src.is_initial_pending()); - for (auto &cs : src.copy_sources) { - cs->add_copy_dest(t, &dest); - } - dest.copy_sources.insert( - src.copy_sources.begin(), - src.copy_sources.end()); - } - } - - virtual uint16_t get_node_split_pivot() = 0; - - static void move_child_ptrs( - FixedKVNode &dest, - FixedKVNode &src, - size_t dest_start, - size_t src_start, - size_t src_end) - { - std::memmove( - dest.children.data() + dest_start, - src.children.data() + src_start, - (src_end - src_start) * sizeof(ChildableCachedExtent*)); - - ceph_assert(src_start < src_end); - ceph_assert(src.children.size() >= src_end); - for (auto it = src.children.begin() + src_start; - it != src.children.begin() + src_end; - it++) - { - auto child = *it; - if (is_valid_child_ptr(child)) { - dest.set_child_ptracker(child); - } - } - } - - void link_child(ChildableCachedExtent* child, uint16_t pos) { - assert(pos < get_node_size()); - assert(child); - ceph_assert(!is_pending()); - ceph_assert(child->is_valid() && !child->is_pending()); - assert(!children[pos]); - children[pos] = child; - set_child_ptracker(child); - } - - virtual bool is_child_stable( - op_context_t, - uint16_t pos, - node_key_t key) const = 0; - virtual bool is_child_data_stable( - op_context_t, - uint16_t pos, - node_key_t key) const = 0; - - template - get_child_ret_t get_child( - op_context_t c, - uint16_t pos, - node_key_t key) - { - assert(children.capacity()); - assert(key == get_key_from_idx(pos)); - auto child = children[pos]; - ceph_assert(!is_reserved_ptr(child)); - if (is_valid_child_ptr(child)) { - return c.cache.template get_extent_viewable_by_trans(c.trans, (T*)child); - } else if (is_pending()) { - auto &sparent = get_stable_for_key(key); - auto spos = sparent.lower_bound_offset(key); - auto child = sparent.children[spos]; - if (is_valid_child_ptr(child)) { - return c.cache.template get_extent_viewable_by_trans(c.trans, (T*)child); - } else { - c.cache.account_absent_access(c.trans.get_src()); - return child_pos_t(&sparent, spos); - } - } else { - c.cache.account_absent_access(c.trans.get_src()); - return child_pos_t(this, pos); - } - } - - template - get_child_ret_t get_child(op_context_t c, iter_t iter) { - return get_child(c, iter.get_offset(), iter.get_key()); - } - - void split_child_ptrs( - Transaction &t, - FixedKVNode &left, - FixedKVNode &right) - { - assert(!left.my_tracker); - assert(!right.my_tracker); - if (is_initial_pending()) { - for (auto &cs : copy_sources) { - cs->del_copy_dest(t, this); - } - } - - push_copy_sources(t, left, *this); - push_copy_sources(t, right, *this); - if (is_pending()) { - uint16_t pivot = get_node_split_pivot(); - move_child_ptrs(left, *this, 0, 0, pivot); - move_child_ptrs(right, *this, 0, pivot, get_node_size()); - my_tracker = nullptr; - } - } - - void merge_child_ptrs( - Transaction &t, - FixedKVNode &left, - FixedKVNode &right) - { - ceph_assert(!my_tracker); - - if (left.is_initial_pending()) { - for (auto &cs : left.copy_sources) { - cs->del_copy_dest(t, &left); - } - } - if (right.is_initial_pending()) { - for (auto &cs : right.copy_sources) { - cs->del_copy_dest(t, &right); - } - } - push_copy_sources(t, *this, left); - push_copy_sources(t, *this, right); - - if (left.is_pending()) { - move_child_ptrs(*this, left, 0, 0, left.get_node_size()); - left.my_tracker = nullptr; - } - - if (right.is_pending()) { - move_child_ptrs(*this, right, left.get_node_size(), 0, right.get_node_size()); - right.my_tracker = nullptr; - } - } - - static void balance_child_ptrs( - Transaction &t, - FixedKVNode &left, - FixedKVNode &right, - bool prefer_left, - FixedKVNode &replacement_left, - FixedKVNode &replacement_right) - { - size_t l_size = left.get_node_size(); - size_t r_size = right.get_node_size(); - size_t total = l_size + r_size; - size_t pivot_idx = (l_size + r_size) / 2; - if (total % 2 && prefer_left) { - pivot_idx++; - } - - if (left.is_initial_pending()) { - for (auto &cs : left.copy_sources) { - cs->del_copy_dest(t, &left); - } - } - if (right.is_initial_pending()) { - for (auto &cs : right.copy_sources) { - cs->del_copy_dest(t, &right); - } - } - - assert(!replacement_left.my_tracker); - assert(!replacement_right.my_tracker); - if (pivot_idx < l_size) { - // deal with left - push_copy_sources(t, replacement_left, left); - push_copy_sources(t, replacement_right, left); - if (left.is_pending()) { - move_child_ptrs(replacement_left, left, 0, 0, pivot_idx); - move_child_ptrs(replacement_right, left, 0, pivot_idx, l_size); - left.my_tracker = nullptr; - } - - // deal with right - push_copy_sources(t, replacement_right, right); - if (right.is_pending()) { - move_child_ptrs(replacement_right, right, l_size - pivot_idx, 0, r_size); - right.my_tracker= nullptr; - } - } else { - // deal with left - push_copy_sources(t, replacement_left, left); - if (left.is_pending()) { - move_child_ptrs(replacement_left, left, 0, 0, l_size); - left.my_tracker = nullptr; - } - - // deal with right - push_copy_sources(t, replacement_left, right); - push_copy_sources(t, replacement_right, right); - if (right.is_pending()) { - move_child_ptrs(replacement_left, right, l_size, 0, pivot_idx - l_size); - move_child_ptrs(replacement_right, right, 0, pivot_idx - l_size, r_size); - right.my_tracker= nullptr; - } - } - } - - void set_parent_tracker_from_prior_instance() { - assert(is_mutation_pending()); - auto &prior = (FixedKVNode&)(*get_prior_instance()); - if (range.is_root()) { - ceph_assert(prior.root_block); - ceph_assert(pending_for_transaction); - root_block = prior.root_block; - link_phy_tree_root_node(root_block, this); - return; - } - ceph_assert(!root_block); - take_prior_parent_tracker(); - assert(is_parent_valid()); - auto parent = get_parent_node(); - //TODO: can this search be avoided? - auto off = parent->lower_bound_offset(get_node_meta().begin); - assert(parent->get_key_from_idx(off) == get_node_meta().begin); - parent->children[off] = this; - } - - bool is_children_empty() const { - for (auto it = children.begin(); - it != children.begin() + get_node_size(); - it++) { - if (is_valid_child_ptr(*it) - && (*it)->is_valid()) { - return false; - } - } - return true; - } - - void set_children_from_prior_instance() { - assert(get_prior_instance()); - auto &prior = (FixedKVNode&)(*get_prior_instance()); - assert(prior.my_tracker || prior.is_children_empty()); - - if (prior.my_tracker) { - prior.my_tracker->reset_parent(this); - my_tracker = prior.my_tracker; - // All my initial pending children is pointing to the original - // tracker which has been dropped by the above line, so need - // to adjust them to point to the new tracker - adjust_ptracker_for_children(); - } - assert(my_tracker || is_children_empty()); - } - - void adjust_ptracker_for_children() { - auto begin = children.begin(); - auto end = begin + get_node_size(); - ceph_assert(end <= children.end()); - for (auto it = begin; it != end; it++) { - auto child = *it; - if (is_valid_child_ptr(child)) { - set_child_ptracker(child); - } - } - } - void on_delta_write(paddr_t record_block_offset) final { // All in-memory relative addrs are necessarily record-relative assert(get_prior_instance()); @@ -574,102 +79,20 @@ struct FixedKVNode : ChildableCachedExtent { resolve_relative_addrs(record_block_offset); } - virtual uint16_t lower_bound_offset(node_key_t) const = 0; - virtual uint16_t upper_bound_offset(node_key_t) const = 0; - - virtual bool validate_stable_children() = 0; - - template - uint16_t copy_children_from_stable_source( - FixedKVNode &source, - iter_t foreign_start_it, - iter_t foreign_end_it, - iter_t local_start_it) { - auto foreign_it = foreign_start_it, local_it = local_start_it; - while (foreign_it != foreign_end_it - && local_it.get_offset() < get_node_size()) - { - auto &child = children[local_it.get_offset()]; - if (foreign_it.get_key() == local_it.get_key()) { - // the foreign key is preserved - if (!child) { - child = source.children[foreign_it.get_offset()]; - // child can be either valid if present, nullptr if absent, - // or reserved ptr. - } - foreign_it++; - local_it++; - } else if (foreign_it.get_key() < local_it.get_key()) { - // the foreign key has been removed, because, if it hasn't, - // there must have been a local key before the one pointed - // by the current "local_it" that's equal to this foreign key - // and has pushed the foreign_it forward. - foreign_it++; - } else { - // the local key must be a newly inserted one. - local_it++; - } - } - return local_it.get_offset(); - } - - template - void copy_children_from_stable_sources(Func &&get_iter) { - if (!copy_sources.empty()) { - auto it = --copy_sources.upper_bound(get_node_meta().begin); - auto &cs = *it; - uint16_t start_pos = cs->lower_bound_offset( - get_node_meta().begin); - if (start_pos == cs->get_node_size()) { - it++; - start_pos = 0; - } - uint16_t local_next_pos = 0; - for (; it != copy_sources.end(); it++) { - auto& copy_source = *it; - auto end_pos = copy_source->get_node_size(); - if (copy_source->get_node_meta().is_in_range(get_node_meta().end)) { - end_pos = copy_source->upper_bound_offset(get_node_meta().end); - } - auto local_start_iter = get_iter(*this, local_next_pos); - auto foreign_start_iter = get_iter(*copy_source, start_pos); - auto foreign_end_iter = get_iter(*copy_source, end_pos); - local_next_pos = copy_children_from_stable_source( - *copy_source, foreign_start_iter, foreign_end_iter, local_start_iter); - if (end_pos != copy_source->get_node_size()) { - break; - } - start_pos = 0; - } - } - } - - void on_invalidated(Transaction &t) final { - reset_parent_tracker(); - } - - void on_initial_write() final { - // All in-memory relative addrs are necessarily block-relative - resolve_relative_addrs(get_paddr()); - if (range.is_root()) { - reset_parent_tracker(); - } - assert(has_parent_tracker() ? (is_parent_valid()) : true); - } - - void set_child_ptracker(ChildableCachedExtent *child) { - if (!this->my_tracker) { - this->my_tracker = new parent_tracker_t(this); - } - child->reset_parent_tracker(this->my_tracker); - } - void on_clean_read() final { // From initial write of block, relative addrs are necessarily block-relative resolve_relative_addrs(get_paddr()); } + node_key_t get_begin() const { + return this->range.begin; + } + node_key_t get_end() const { + return this->range.end; + } virtual void resolve_relative_addrs(paddr_t base) = 0; + virtual bool is_linked() const = 0; + virtual uint16_t get_node_split_pivot() const = 0; }; /** @@ -691,7 +114,10 @@ struct FixedKVInternalNode fixed_kv_node_meta_t, fixed_kv_node_meta_le_t, NODE_KEY, NODE_KEY_LE, - paddr_t, paddr_le_t> { + paddr_t, paddr_le_t>, + RootChildNode, + ParentNode, + ChildNode { using Ref = TCachedExtentRef; using base_t = FixedKVNode; using base_ref = typename FixedKVNode::FixedKVNodeRef; @@ -712,125 +138,73 @@ struct FixedKVInternalNode NODE_KEY_LE, node_size, node_type_t>; + using parent_node_t = ParentNode; + using base_child_node_t = BaseChildNode; + using child_node_t = ChildNode; + using root_node_t = RootChildNode; + + bool is_linked() const final { + return this->has_parent_tracker() || + (this->is_btree_root() && this->has_root_parent()); + } + + void do_on_rewrite(Transaction &t, CachedExtent &extent) final { + this->parent_node_t::on_rewrite(t, static_cast(extent)); + } explicit FixedKVInternalNode(ceph::bufferptr &&ptr) - : FixedKVNode(CAPACITY, std::move(ptr)) { + : FixedKVNode(std::move(ptr)), + ParentNode(CAPACITY) { this->set_layout_buf(this->get_bptr().c_str()); } // Must be identical with FixedKVInternalNode(ptr) after on_fully_loaded() explicit FixedKVInternalNode(extent_len_t length) - : FixedKVNode(CAPACITY, length) {} + : FixedKVNode(length), + ParentNode(CAPACITY) {} FixedKVInternalNode(const FixedKVInternalNode &rhs) - : FixedKVNode(rhs) { + : FixedKVNode(rhs), + ParentNode(rhs) { this->set_layout_buf(this->get_bptr().c_str()); } - bool have_children() const final { - return true; - } - - bool is_leaf_and_has_children() const final { + bool is_leaf_and_has_children() const { return false; } - uint16_t get_node_split_pivot() final { + uint16_t get_node_split_pivot() const final{ return this->get_split_pivot().get_offset(); } void prepare_commit() final { - if (this->is_initial_pending()) { - if (this->is_rewrite()) { - this->set_children_from_prior_instance(); - } - this->copy_children_from_stable_sources( - [this](base_t &node, uint16_t pos) { - ceph_assert(node.get_type() == this->get_type()); - auto &n = static_cast(node); - return n.iter_idx(pos); - } - ); - if (this->is_rewrite()) { - this->reset_prior_instance(); - } else { - this->adjust_ptracker_for_children(); - } - assert(this->validate_stable_children()); - this->copy_sources.clear(); - } - } - - bool is_child_stable( - op_context_t, - uint16_t pos, - NODE_KEY key) const final { - ceph_abort("impossible"); - return false; - } - bool is_child_data_stable( - op_context_t, - uint16_t pos, - NODE_KEY key) const final { - ceph_abort("impossible"); - return false; - } - - bool validate_stable_children() final { - LOG_PREFIX(FixedKVInternalNode::validate_stable_children); - if (this->children.empty()) { - return false; - } - - for (auto i : *this) { - auto child = (FixedKVNode*)this->children[i.get_offset()]; - if (child && child->range.begin != i.get_key()) { - SUBERROR(seastore_fixedkv_tree, - "stable child not valid: child {}, child meta{}, key {}", - *child, - child->get_node_meta(), - i.get_key()); - ceph_abort(); - return false; - } - } - return true; + parent_node_t::prepare_commit(); } virtual ~FixedKVInternalNode() { if (this->is_valid() && !this->is_pending()) { - if (this->range.is_root()) { - ceph_assert(this->root_block); - unlink_phy_tree_root_node(this->root_block); + if (this->is_btree_root()) { + this->root_node_t::destroy(); } else { - ceph_assert(this->is_parent_valid()); - auto parent = this->template get_parent_node>(); - auto off = parent->lower_bound_offset(this->get_meta().begin); - assert(parent->get_key_from_idx(off) == this->get_meta().begin); - assert(parent->children[off] == this); - parent->children[off] = nullptr; + this->child_node_t::destroy(); } } } - uint16_t lower_bound_offset(NODE_KEY key) const final { - return this->lower_bound(key).get_offset(); - } - - uint16_t upper_bound_offset(NODE_KEY key) const final { - return this->upper_bound(key).get_offset(); + void on_initial_write() final { + // All in-memory relative addrs are necessarily block-relative + resolve_relative_addrs(this->get_paddr()); + if (this->is_btree_root()) { + this->root_node_t::on_initial_write(); + } } - NODE_KEY get_key_from_idx(uint16_t idx) const final { - return this->iter_idx(idx).get_key(); + void on_invalidated(Transaction &t) final { + this->child_node_t::on_invalidated(); } fixed_kv_node_meta_t get_node_meta() const { return this->get_meta(); } - uint16_t get_node_size() const final { - return this->get_size(); - } - uint32_t calc_crc32c() const final { return this->calc_phy_checksum(); } @@ -855,29 +229,24 @@ struct FixedKVInternalNode }; void on_replace_prior() final { - ceph_assert(!this->is_rewrite()); - this->set_children_from_prior_instance(); - auto &prior = (this_type_t&)(*this->get_prior_instance()); - auto copied = this->copy_children_from_stable_source( - prior, - prior.begin(), - prior.end(), - this->begin()); - ceph_assert(copied <= get_node_size()); - assert(this->validate_stable_children()); - this->set_parent_tracker_from_prior_instance(); + this->parent_node_t::on_replace_prior(); + if (this->is_btree_root()) { + this->root_node_t::on_replace_prior(); + } else { + this->child_node_t::on_replace_prior(); + } } void update( internal_const_iterator_t iter, paddr_t addr, - FixedKVNode* nextent) { + base_child_node_t* nextent) { LOG_PREFIX(FixedKVInternalNode::update); SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, {}", this->pending_for_transaction, iter.get_offset(), - *nextent); - this->update_child_ptr(iter, nextent); + (void*)nextent); + this->update_child_ptr(iter.get_offset(), nextent); return this->journal_update( iter, this->maybe_generate_relative(addr), @@ -888,14 +257,14 @@ struct FixedKVInternalNode internal_const_iterator_t iter, NODE_KEY pivot, paddr_t addr, - FixedKVNode* nextent) { + base_child_node_t* nextent) { LOG_PREFIX(FixedKVInternalNode::insert); SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}, {}", this->pending_for_transaction, iter.get_offset(), pivot, - *nextent); - this->insert_child_ptr(iter, nextent); + (void*)nextent); + this->insert_child_ptr(iter.get_offset(), nextent); return this->journal_insert( iter, pivot, @@ -909,7 +278,7 @@ struct FixedKVInternalNode this->pending_for_transaction, iter.get_offset(), iter.get_key()); - this->remove_child_ptr(iter); + this->remove_child_ptr(iter.get_offset()); return this->journal_remove( iter, maybe_get_delta_buffer()); @@ -919,15 +288,15 @@ struct FixedKVInternalNode internal_const_iterator_t iter, NODE_KEY pivot, paddr_t addr, - FixedKVNode* nextent) { + base_child_node_t* nextent) { LOG_PREFIX(FixedKVInternalNode::replace); SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, old key {}, key {}, {}", this->pending_for_transaction, iter.get_offset(), iter.get_key(), pivot, - *nextent); - this->update_child_ptr(iter, nextent); + (void*)nextent); + this->update_child_ptr(iter.get_offset(), nextent); return this->journal_replace( iter, pivot, @@ -941,10 +310,11 @@ struct FixedKVInternalNode c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION); auto right = c.cache.template alloc_new_non_data_extent( c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION); + this->split_child_ptrs(c.trans, *left, *right); auto pivot = this->split_into(*left, *right); left->range = left->get_meta(); right->range = right->get_meta(); - this->split_child_ptrs(c.trans, *left, *right); + this->adjust_copy_src_dest_on_split(c.trans, *left, *right); return std::make_tuple( left, right, @@ -956,9 +326,12 @@ struct FixedKVInternalNode Ref &right) { auto replacement = c.cache.template alloc_new_non_data_extent( c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION); + replacement->merge_child_ptrs( + c.trans, static_cast(*this), *right); replacement->merge_from(*this, *right->template cast()); replacement->range = replacement->get_meta(); - replacement->merge_child_ptrs(c.trans, *this, *right); + replacement->adjust_copy_src_dest_on_merge( + c.trans, static_cast(*this), *right); return replacement; } @@ -974,6 +347,13 @@ struct FixedKVInternalNode auto replacement_right = c.cache.template alloc_new_non_data_extent( c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION); + this->balance_child_ptrs( + c.trans, + static_cast(*this), + right, + prefer_left, + *replacement_left, + *replacement_right); auto pivot = this->balance_into_new_nodes( *this, right, @@ -982,9 +362,9 @@ struct FixedKVInternalNode *replacement_right); replacement_left->range = replacement_left->get_meta(); replacement_right->range = replacement_right->get_meta(); - this->balance_child_ptrs( + this->adjust_copy_src_dest_on_balance( c.trans, - *this, + static_cast(*this), right, prefer_left, *replacement_left, @@ -1044,7 +424,7 @@ struct FixedKVInternalNode } } - std::ostream &_print_detail(std::ostream &out) const + std::ostream &print_detail(std::ostream &out) const { out << ", size=" << this->get_size() << ", meta=" << this->get_meta() @@ -1052,7 +432,7 @@ struct FixedKVInternalNode if (this->my_tracker) { out << ", my_tracker->parent=" << (void*)this->my_tracker->get_parent().get(); } - return out << ", root_block=" << (void*)this->root_block.get(); + return out << ", root_block=" << (void*)this->parent_of_root.get(); } ceph::bufferlist get_delta() { @@ -1104,7 +484,9 @@ template < typename VAL, typename VAL_LE, size_t node_size, + typename internal_node_type_t, typename node_type_t, + typename child_t, bool has_children> struct FixedKVLeafNode : FixedKVNode, @@ -1113,7 +495,10 @@ struct FixedKVLeafNode fixed_kv_node_meta_t, fixed_kv_node_meta_le_t, NODE_KEY, NODE_KEY_LE, - VAL, VAL_LE> { + VAL, VAL_LE>, + RootChildNode, + ParentNode, + ChildNode { using Ref = TCachedExtentRef; using node_layout_t = common::FixedKVNodeLayout< @@ -1132,31 +517,58 @@ struct FixedKVLeafNode VAL, VAL_LE, node_size, + internal_node_type_t, node_type_t, + child_t, has_children>; using base_t = FixedKVNode; + using parent_node_t = ParentNode; + using base_child_node_t = child_t; + using child_node_t = ChildNode; + using root_node_t = RootChildNode; explicit FixedKVLeafNode(ceph::bufferptr &&ptr) - : FixedKVNode(has_children ? CAPACITY : 0, std::move(ptr)) { + : FixedKVNode(std::move(ptr)), + ParentNode(has_children ? CAPACITY : 0){ this->set_layout_buf(this->get_bptr().c_str()); } // Must be identical with FixedKVLeafNode(ptr) after on_fully_loaded() explicit FixedKVLeafNode(extent_len_t length) - : FixedKVNode(has_children ? CAPACITY : 0, length) {} + : FixedKVNode(length), + ParentNode(has_children ? CAPACITY : 0) {} FixedKVLeafNode(const FixedKVLeafNode &rhs) : FixedKVNode(rhs), + ParentNode(rhs), modifications(rhs.modifications) { this->set_layout_buf(this->get_bptr().c_str()); } + bool is_linked() const final { + return this->has_parent_tracker() || + (this->is_btree_root() && this->has_root_parent()); + } + static constexpr bool do_has_children = has_children; // for the stable extent, modifications is always 0; // it will increase for each transaction-local change, so that // modifications can be detected (see BtreeLBAMapping.parent_modifications) uint64_t modifications = 0; + void on_invalidated(Transaction &t) final { + this->child_node_t::on_invalidated(); + } - bool have_children() const final { - return do_has_children; + void on_initial_write() final { + // All in-memory relative addrs are necessarily block-relative + this->resolve_relative_addrs(this->get_paddr()); + if (this->is_btree_root()) { + this->root_node_t::on_initial_write(); + } + } + + void _on_rewrite(Transaction &t, CachedExtent &extent) final { + if (do_has_children) { + this->parent_node_t::on_rewrite(t, static_cast(extent)); + } } void on_modify() { @@ -1168,87 +580,33 @@ struct FixedKVLeafNode return v != modifications; } - bool is_leaf_and_has_children() const final { + bool is_leaf_and_has_children() const { return has_children; } - uint16_t get_node_split_pivot() final { + uint16_t get_node_split_pivot() const final{ return this->get_split_pivot().get_offset(); } - // children are considered stable if any of the following case is true: - // 1. The child extent is absent in cache - // 2. The child extent is stable - // - // For reserved mappings, the return values are undefined. bool is_child_stable( op_context_t c, uint16_t pos, - NODE_KEY key) const final { - return _is_child_stable(c, pos, key); + NODE_KEY key) const { + return parent_node_t::_is_child_stable(c.trans, pos, key); } bool is_child_data_stable( op_context_t c, uint16_t pos, - NODE_KEY key) const final { - return _is_child_stable(c, pos, key, true); - } - - bool _is_child_stable( - op_context_t c, - uint16_t pos, - NODE_KEY key, - bool data_only = false) const { - assert(key == get_key_from_idx(pos)); - auto child = this->children[pos]; - if (is_reserved_ptr(child)) { - return true; - } else if (is_valid_child_ptr(child)) { - ceph_assert(child->is_logical()); - ceph_assert( - child->is_pending_in_trans(c.trans.get_trans_id()) - || this->is_stable_written()); - if (data_only) { - return c.cache.is_viewable_extent_data_stable(c.trans, child); - } else { - return c.cache.is_viewable_extent_stable(c.trans, child); - } - } else if (this->is_pending()) { - auto key = this->iter_idx(pos).get_key(); - auto &sparent = this->get_stable_for_key(key); - auto spos = sparent.lower_bound_offset(key); - auto child = sparent.children[spos]; - if (is_valid_child_ptr(child)) { - ceph_assert(child->is_logical()); - if (data_only) { - return c.cache.is_viewable_extent_data_stable(c.trans, child); - } else { - return c.cache.is_viewable_extent_stable(c.trans, child); - } - } else { - return true; - } - } else { - return true; - } - } - - bool validate_stable_children() override { - return true; + NODE_KEY key) const { + return parent_node_t::_is_child_stable(c.trans, pos, key, true); } virtual ~FixedKVLeafNode() { if (this->is_valid() && !this->is_pending()) { - if (this->range.is_root()) { - ceph_assert(this->root_block); - unlink_phy_tree_root_node(this->root_block); + if (this->is_btree_root()) { + this->root_node_t::destroy(); } else { - ceph_assert(this->is_parent_valid()); - auto parent = this->template get_parent_node>(); - auto off = parent->lower_bound_offset(this->get_meta().begin); - assert(parent->get_key_from_idx(off) == this->get_meta().begin); - assert(parent->children[off] == this); - parent->children[off] = nullptr; + this->child_node_t::destroy(); } } } @@ -1259,71 +617,28 @@ struct FixedKVLeafNode void prepare_commit() final { if constexpr (has_children) { - if (this->is_initial_pending()) { - if (this->is_rewrite()) { - this->set_children_from_prior_instance(); - } - this->copy_children_from_stable_sources( - [this](base_t &node, uint16_t pos) { - ceph_assert(node.get_type() == this->get_type()); - auto &n = static_cast(node); - return n.iter_idx(pos); - } - ); - if (this->is_rewrite()) { - this->reset_prior_instance(); - } else { - this->adjust_ptracker_for_children(); - } - assert(this->validate_stable_children()); - this->copy_sources.clear(); - } + parent_node_t::prepare_commit(); } modifications = 0; - assert(this->is_initial_pending() - ? this->copy_sources.empty(): - true); } void on_replace_prior() final { ceph_assert(!this->is_rewrite()); if constexpr (has_children) { - this->set_children_from_prior_instance(); - auto &prior = (this_type_t&)(*this->get_prior_instance()); - auto copied = this->copy_children_from_stable_source( - prior, - prior.begin(), - prior.end(), - this->begin()); - ceph_assert(copied <= get_node_size()); - assert(this->validate_stable_children()); - this->set_parent_tracker_from_prior_instance(); + this->parent_node_t::on_replace_prior(); + } + if (this->is_btree_root()) { + this->root_node_t::on_replace_prior(); } else { - this->set_parent_tracker_from_prior_instance(); + this->child_node_t::on_replace_prior(); } modifications = 0; } - uint16_t lower_bound_offset(NODE_KEY key) const final { - return this->lower_bound(key).get_offset(); - } - - uint16_t upper_bound_offset(NODE_KEY key) const final { - return this->upper_bound(key).get_offset(); - } - - NODE_KEY get_key_from_idx(uint16_t idx) const final { - return this->iter_idx(idx).get_key(); - } - fixed_kv_node_meta_t get_node_meta() const { return this->get_meta(); } - uint16_t get_node_size() const final { - return this->get_size(); - } - uint32_t calc_crc32c() const final { return this->calc_phy_checksum(); } @@ -1343,31 +658,38 @@ struct FixedKVLeafNode CachedExtentRef duplicate_for_write(Transaction&) override { assert(delta_buffer.empty()); - return CachedExtentRef(new node_type_t(*this)); + auto extent = new node_type_t(*this); + extent->set_cache_proxy(this->get_cache_proxy()); + return CachedExtentRef(extent); }; virtual void update( internal_const_iterator_t iter, VAL val, - LogicalCachedExtent* nextent) = 0; + base_child_node_t* nextent) = 0; virtual internal_const_iterator_t insert( internal_const_iterator_t iter, NODE_KEY addr, VAL val, - LogicalCachedExtent* nextent) = 0; + base_child_node_t* nextent) = 0; virtual void remove(internal_const_iterator_t iter) = 0; std::tuple make_split_children(op_context_t c) { auto left = c.cache.template alloc_new_non_data_extent( c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION); + left->set_cache_proxy(this->get_cache_proxy()); auto right = c.cache.template alloc_new_non_data_extent( c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION); + if constexpr (has_children) { + this->split_child_ptrs(c.trans, *left, *right); + } + right->set_cache_proxy(this->get_cache_proxy()); auto pivot = this->split_into(*left, *right); left->range = left->get_meta(); right->range = right->get_meta(); if constexpr (has_children) { - this->split_child_ptrs(c.trans, *left, *right); + this->adjust_copy_src_dest_on_split(c.trans, *left, *right); } return std::make_tuple( left, @@ -1380,10 +702,16 @@ struct FixedKVLeafNode Ref &right) { auto replacement = c.cache.template alloc_new_non_data_extent( c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION); + if constexpr (has_children) { + replacement->merge_child_ptrs( + c.trans, static_cast(*this), *right); + } + replacement->set_cache_proxy(this->get_cache_proxy()); replacement->merge_from(*this, *right->template cast()); replacement->range = replacement->get_meta(); if constexpr (has_children) { - replacement->merge_child_ptrs(c.trans, *this, *right); + replacement->adjust_copy_src_dest_on_merge( + c.trans, static_cast(*this), *right); } return replacement; } @@ -1397,9 +725,20 @@ struct FixedKVLeafNode auto &right = *_right->template cast(); auto replacement_left = c.cache.template alloc_new_non_data_extent( c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION); + replacement_left->set_cache_proxy(this->get_cache_proxy()); auto replacement_right = c.cache.template alloc_new_non_data_extent( c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION); + replacement_right->set_cache_proxy(this->get_cache_proxy()); + if constexpr (has_children) { + this->balance_child_ptrs( + c.trans, + static_cast(*this), + right, + prefer_left, + *replacement_left, + *replacement_right); + } auto pivot = this->balance_into_new_nodes( *this, right, @@ -1409,9 +748,9 @@ struct FixedKVLeafNode replacement_left->range = replacement_left->get_meta(); replacement_right->range = replacement_right->get_meta(); if constexpr (has_children) { - this->balance_child_ptrs( + this->adjust_copy_src_dest_on_balance( c.trans, - *this, + static_cast(*this), right, prefer_left, *replacement_left, @@ -1445,7 +784,7 @@ struct FixedKVLeafNode this->resolve_relative_addrs(base); } - std::ostream &_print_detail(std::ostream &out) const + std::ostream &print_detail(std::ostream &out) const { return out << ", size=" << this->get_size() << ", meta=" << this->get_meta(); diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index a239b861726..63e9670b8ce 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -18,6 +18,7 @@ #include "crimson/os/seastore/seastore_types.h" #include "crimson/os/seastore/segment_manager.h" #include "crimson/os/seastore/transaction.h" +#include "crimson/os/seastore/linked_tree_node.h" namespace crimson::os::seastore::backref { class BtreeBackrefManager; @@ -109,7 +110,7 @@ class SegmentProvider; * - TRACE: DEBUG details * - seastore_t logs */ -class Cache { +class Cache : public ExtentTransViewRetriever { public: using base_ertr = crimson::errorator< crimson::ct_error::input_output_error>; @@ -194,7 +195,7 @@ public: return t.root; } - void account_absent_access(Transaction::src_t src) { + void account_absent_access(Transaction::src_t src) final { ++(get_by_src(stats.cache_absent_by_src, src)); ++stats.access.cache_absent; } @@ -432,7 +433,7 @@ public: bool is_viewable_extent_stable( Transaction &t, - CachedExtentRef extent) + CachedExtentRef extent) final { assert(extent); auto view = extent->get_transactional_view(t); @@ -441,7 +442,7 @@ public: bool is_viewable_extent_data_stable( Transaction &t, - CachedExtentRef extent) + CachedExtentRef extent) final { assert(extent); auto view = extent->get_transactional_view(t); @@ -451,7 +452,7 @@ public: get_extent_iertr::future get_extent_viewable_by_trans( Transaction &t, - CachedExtentRef extent) + CachedExtentRef extent) final { assert(extent->is_valid()); @@ -530,18 +531,6 @@ public: }); } - template - get_extent_iertr::future> - get_extent_viewable_by_trans( - Transaction &t, - TCachedExtentRef extent) - { - return get_extent_viewable_by_trans(t, CachedExtentRef(extent.get()) - ).si_then([](auto p_extent) { - return p_extent->template cast(); - }); - } - // wait extent io or do partial reads template get_extent_iertr::future> diff --git a/src/crimson/os/seastore/cached_extent.cc b/src/crimson/os/seastore/cached_extent.cc index 49fede1d9a8..8a00a6d4d83 100644 --- a/src/crimson/os/seastore/cached_extent.cc +++ b/src/crimson/os/seastore/cached_extent.cc @@ -8,6 +8,7 @@ #include "crimson/os/seastore/btree/fixed_kv_node.h" #include "crimson/os/seastore/lba_mapping.h" +#include "crimson/os/seastore/logical_child_node.h" namespace { [[maybe_unused]] seastar::logger& logger() { @@ -88,31 +89,12 @@ CachedExtent* CachedExtent::get_transactional_view(transaction_id_t tid) { } } -std::ostream &operator<<(std::ostream &out, const parent_tracker_t &tracker) { - return out << "tracker_ptr=" << (void*)&tracker - << ", parent_ptr=" << (void*)tracker.get_parent().get(); -} - -std::ostream &ChildableCachedExtent::print_detail(std::ostream &out) const { - if (parent_tracker) { - out << ", parent_tracker(" << *parent_tracker << ")"; - } else { - out << ", parent_tracker(nullptr)"; - } - _print_detail(out); - return out; -} - -std::ostream &LogicalCachedExtent::_print_detail(std::ostream &out) const +std::ostream &LogicalCachedExtent::print_detail(std::ostream &out) const { out << ", laddr=" << laddr; return print_detail_l(out); } -void child_pos_t::link_child(ChildableCachedExtent *c) { - get_parent>()->link_child(c, pos); -} - void CachedExtent::set_invalid(Transaction &t) { state = extent_state_t::INVALID; if (trans_view_hook.is_linked()) { @@ -121,42 +103,12 @@ void CachedExtent::set_invalid(Transaction &t) { on_invalidated(t); } -LogicalCachedExtent::~LogicalCachedExtent() { - if (has_parent_tracker() && is_valid() && !is_pending()) { - assert(get_parent_node()); - auto parent = get_parent_node>(); - auto off = parent->lower_bound_offset(laddr); - assert(parent->get_key_from_idx(off) == laddr); - assert(parent->children[off] == this); - parent->children[off] = nullptr; - } -} - -void LogicalCachedExtent::on_replace_prior() { - assert(is_mutation_pending()); - take_prior_parent_tracker(); - assert(get_parent_node()); - auto parent = get_parent_node>(); - //TODO: can this search be avoided? - auto off = parent->lower_bound_offset(laddr); - assert(parent->get_key_from_idx(off) == laddr); - parent->children[off] = this; -} - void LogicalCachedExtent::maybe_set_intermediate_laddr(LBAMapping &mapping) { laddr = mapping.is_indirect() ? mapping.get_intermediate_base() : mapping.get_key(); } -parent_tracker_t::~parent_tracker_t() { - // this is parent's tracker, reset it - auto &p = (FixedKVNode&)*parent; - if (p.my_tracker == this) { - p.my_tracker = nullptr; - } -} - bool BufferSpace::is_range_loaded(extent_len_t offset, extent_len_t length) const { assert(length > 0); diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index 9dc60d719eb..0604982f7e8 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -29,9 +29,6 @@ class SegmentedAllocator; class TransactionManager; class ExtentPlacementManager; -template -class BtreeNodeMapping; - // #define DEBUG_CACHED_EXTENT_REF #ifdef DEBUG_CACHED_EXTENT_REF @@ -1092,6 +1089,8 @@ protected: friend class ::lba_btree_test; friend class ::btree_test_base; friend class ::cache_test_t; + template + friend class ParentNode; }; std::ostream &operator<<(std::ostream &, CachedExtent::extent_state_t); @@ -1278,55 +1277,6 @@ private: uint64_t bytes = 0; }; -class ChildableCachedExtent; - -class child_pos_t { -public: - child_pos_t(CachedExtentRef stable_parent, uint16_t pos) - : stable_parent(stable_parent), pos(pos) {} - - template - TCachedExtentRef get_parent() { - ceph_assert(stable_parent); - return stable_parent->template cast(); - } - uint16_t get_pos() { - return pos; - } - void link_child(ChildableCachedExtent *c); -private: - CachedExtentRef stable_parent; - uint16_t pos = std::numeric_limits::max(); -}; - -using get_child_iertr = trans_iertr>; -template -using get_child_ifut = get_child_iertr::future>; - -template -struct get_child_ret_t { - std::variant> ret; - get_child_ret_t(child_pos_t pos) - : ret(std::move(pos)) {} - get_child_ret_t(get_child_ifut child) - : ret(std::move(child)) {} - - bool has_child() const { - return ret.index() == 1; - } - - child_pos_t &get_child_pos() { - ceph_assert(ret.index() == 0); - return std::get<0>(ret); - } - - get_child_ifut &get_child_fut() { - ceph_assert(ret.index() == 1); - return std::get<1>(ret); - } -}; - template class PhysicalNodeMapping; @@ -1414,68 +1364,6 @@ public: } }; -class parent_tracker_t - : public boost::intrusive_ref_counter< - parent_tracker_t, boost::thread_unsafe_counter> { -public: - parent_tracker_t(CachedExtentRef parent) - : parent(parent) {} - parent_tracker_t(CachedExtent* parent) - : parent(parent) {} - ~parent_tracker_t(); - template - TCachedExtentRef get_parent() const { - ceph_assert(parent); - if constexpr (std::is_same_v) { - return parent; - } else { - return parent->template cast(); - } - } - void reset_parent(CachedExtentRef p) { - parent = p; - } - bool is_valid() const { - return parent && parent->is_valid(); - } -private: - CachedExtentRef parent; -}; - -std::ostream &operator<<(std::ostream &, const parent_tracker_t &); - -using parent_tracker_ref = boost::intrusive_ptr; - -class ChildableCachedExtent : public CachedExtent { -public: - template - ChildableCachedExtent(T&&... t) : CachedExtent(std::forward(t)...) {} - bool has_parent_tracker() const { - return (bool)parent_tracker; - } - void reset_parent_tracker(parent_tracker_t *p = nullptr) { - parent_tracker.reset(p); - } - bool is_parent_valid() const { - return parent_tracker && parent_tracker->is_valid(); - } - template - TCachedExtentRef get_parent_node() const { - assert(parent_tracker); - return parent_tracker->template get_parent(); - } - void take_prior_parent_tracker() { - auto &prior = (ChildableCachedExtent&)(*get_prior_instance()); - parent_tracker = prior.parent_tracker; - } - std::ostream &print_detail(std::ostream &out) const final; -private: - parent_tracker_ref parent_tracker; - virtual std::ostream &_print_detail(std::ostream &out) const { - return out; - } -}; - class LBAMapping; /** * LogicalCachedExtent @@ -1485,12 +1373,10 @@ class LBAMapping; * Users of TransactionManager should be using extents derived from * LogicalCachedExtent. */ -class LogicalCachedExtent : public ChildableCachedExtent { +class LogicalCachedExtent : public CachedExtent { public: template - LogicalCachedExtent(T&&... t) - : ChildableCachedExtent(std::forward(t)...) - {} + LogicalCachedExtent(T&&... t) : CachedExtent(std::forward(t)...) {} void on_rewrite(Transaction&, CachedExtent &extent, extent_len_t off) final { assert(get_type() == extent.get_type()); @@ -1523,7 +1409,7 @@ public: return true; } - std::ostream &_print_detail(std::ostream &out) const final; + std::ostream &print_detail(std::ostream &out) const final; struct modified_region_t { extent_len_t offset; @@ -1535,10 +1421,9 @@ public: virtual void clear_modified_region() {} - virtual ~LogicalCachedExtent(); + virtual ~LogicalCachedExtent() {} protected: - void on_replace_prior() final; virtual void apply_delta(const ceph::bufferlist &bl) = 0; diff --git a/src/crimson/os/seastore/collection_manager/collection_flat_node.h b/src/crimson/os/seastore/collection_manager/collection_flat_node.h index 1f4de652bba..162b07e2379 100644 --- a/src/crimson/os/seastore/collection_manager/collection_flat_node.h +++ b/src/crimson/os/seastore/collection_manager/collection_flat_node.h @@ -6,6 +6,7 @@ #include "crimson/os/seastore/seastore_types.h" #include "crimson/os/seastore/transaction_manager.h" #include "crimson/os/seastore/collection_manager.h" +#include "crimson/os/seastore/logical_child_node.h" namespace crimson::os::seastore::collection_manager { struct coll_context_t { @@ -90,16 +91,15 @@ WRITE_CLASS_DENC(crimson::os::seastore::collection_manager::delta_buffer_t) namespace crimson::os::seastore::collection_manager { -struct CollectionNode - : LogicalCachedExtent { +struct CollectionNode : LogicalChildNode { using CollectionNodeRef = TCachedExtentRef; explicit CollectionNode(ceph::bufferptr &&ptr) - : LogicalCachedExtent(std::move(ptr)) {} + : LogicalChildNode(std::move(ptr)) {} explicit CollectionNode(extent_len_t length) - : LogicalCachedExtent(length) {} + : LogicalChildNode(length) {} explicit CollectionNode(const CollectionNode &other) - : LogicalCachedExtent(other), + : LogicalChildNode(other), decoded(other.decoded) {} static constexpr extent_types_t type = extent_types_t::COLL_BLOCK; diff --git a/src/crimson/os/seastore/lba_manager.cc b/src/crimson/os/seastore/lba_manager.cc index 6a029efc66e..7d32855ee45 100644 --- a/src/crimson/os/seastore/lba_manager.cc +++ b/src/crimson/os/seastore/lba_manager.cc @@ -9,7 +9,7 @@ namespace crimson::os::seastore { LBAManager::update_mappings_ret LBAManager::update_mappings( Transaction& t, - const std::list& extents) + const std::list& extents) { return trans_intr::do_for_each(extents, [this, &t](auto &extent) { diff --git a/src/crimson/os/seastore/lba_manager.h b/src/crimson/os/seastore/lba_manager.h index 9a34bf56157..2d347e504c9 100644 --- a/src/crimson/os/seastore/lba_manager.h +++ b/src/crimson/os/seastore/lba_manager.h @@ -20,6 +20,7 @@ #include "crimson/os/seastore/cache.h" #include "crimson/os/seastore/seastore_types.h" #include "crimson/os/seastore/lba_mapping.h" +#include "crimson/os/seastore/logical_child_node.h" namespace crimson::os::seastore { @@ -86,7 +87,7 @@ public: virtual alloc_extent_ret alloc_extent( Transaction &t, laddr_t hint, - LogicalCachedExtent &nextent, + LogicalChildNode &nextent, extent_ref_count_t refcount = EXTENT_DEFAULT_REF_COUNT) = 0; using alloc_extents_ret = alloc_extent_iertr::future< @@ -94,7 +95,7 @@ public: virtual alloc_extents_ret alloc_extents( Transaction &t, laddr_t hint, - std::vector extents, + std::vector extents, extent_ref_count_t refcount) = 0; virtual alloc_extent_ret clone_mapping( @@ -161,14 +162,14 @@ public: Transaction &t, LBAMappingRef orig_mapping, std::vector remaps, - std::vector extents // Required if and only + std::vector extents // Required if and only // if pin isn't indirect ) = 0; /** * Should be called after replay on each cached extent. * Implementation must initialize the LBAMapping on any - * LogicalCachedExtent's and may also read in any dependent + * LogicalChildNode's and may also read in any dependent * structures, etc. * * @return returns whether the extent is alive @@ -179,8 +180,10 @@ public: Transaction &t, CachedExtentRef e) = 0; +#ifdef UNIT_TESTS_BUILT using check_child_trackers_ret = base_iertr::future<>; virtual check_child_trackers_ret check_child_trackers(Transaction &t) = 0; +#endif /** * Calls f for each mapping in [begin, end) @@ -221,7 +224,7 @@ public: extent_len_t len, paddr_t paddr, uint32_t checksum, - LogicalCachedExtent *nextent) = 0; + LogicalChildNode *nextent) = 0; /** * update_mappings @@ -232,7 +235,7 @@ public: using update_mappings_ret = update_mappings_iertr::future<>; update_mappings_ret update_mappings( Transaction& t, - const std::list& extents); + const std::list& extents); /** * get_physical_extent_if_live diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc index 888d3c359ac..363919f6eed 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc @@ -71,30 +71,27 @@ const get_phy_tree_root_node_ret get_phy_tree_root_node< } } -template -void link_phy_tree_root_node(RootBlockRef &root_block, ROOT* lba_root) { - root_block->lba_root_node = lba_root; - ceph_assert(lba_root != nullptr); - lba_root->root_block = root_block; -} - -template void link_phy_tree_root_node( - RootBlockRef &root_block, lba_manager::btree::LBAInternalNode* lba_root); -template void link_phy_tree_root_node( - RootBlockRef &root_block, lba_manager::btree::LBALeafNode* lba_root); -template void link_phy_tree_root_node( - RootBlockRef &root_block, lba_manager::btree::LBANode* lba_root); +template +class TreeRootLinker { +public: + static void link_root(RootBlockRef &root_block, RootT* lba_root) { + root_block->lba_root_node = lba_root; + ceph_assert(lba_root != nullptr); + lba_root->parent_of_root = root_block; + } + static void unlink_root(RootBlockRef &root_block) { + root_block->lba_root_node = nullptr; + } +}; -template <> -void unlink_phy_tree_root_node(RootBlockRef &root_block) { - root_block->lba_root_node = nullptr; -} +template class TreeRootLinker; +template class TreeRootLinker; } namespace crimson::os::seastore::lba_manager::btree { -get_child_ret_t +get_child_ret_t BtreeLBAMapping::get_logical_extent(Transaction &t) { ceph_assert(is_parent_viewable()); @@ -104,7 +101,7 @@ BtreeLBAMapping::get_logical_extent(Transaction &t) auto k = this->is_indirect() ? this->get_intermediate_base() : get_key(); - auto v = p.template get_child(ctx, pos, k); + auto v = p.template get_child(ctx.trans, ctx.cache, pos, k); if (!v.has_child()) { this->child_pos = v.get_child_pos(); } @@ -113,23 +110,19 @@ BtreeLBAMapping::get_logical_extent(Transaction &t) bool BtreeLBAMapping::is_stable() const { - assert(!this->parent_modified()); + assert(!parent_modified()); assert(pos != std::numeric_limits::max()); - auto &p = static_cast(*parent); - auto k = this->is_indirect() - ? this->get_intermediate_base() - : get_key(); + auto &p = (LBALeafNode&)*parent; + auto k = is_indirect() ? get_intermediate_base() : get_key(); return p.is_child_stable(ctx, pos, k); } bool BtreeLBAMapping::is_data_stable() const { - assert(!this->parent_modified()); + assert(!parent_modified()); assert(pos != std::numeric_limits::max()); - auto &p = static_cast(*parent); - auto k = this->is_indirect() - ? this->get_intermediate_base() - : get_key(); + auto &p = (LBALeafNode&)*parent; + auto k = is_indirect() ? get_intermediate_base() : get_key(); return p.is_child_data_stable(ctx, pos, k); } @@ -493,7 +486,7 @@ _init_cached_extent( bool &ret) { if (e->is_logical()) { - auto logn = e->cast(); + auto logn = e->cast(); return btree.lower_bound( c, logn->get_laddr() @@ -542,6 +535,7 @@ BtreeLBAManager::init_cached_extent( }); } +#ifdef UNIT_TESTS_BUILT BtreeLBAManager::check_child_trackers_ret BtreeLBAManager::check_child_trackers( Transaction &t) { @@ -552,6 +546,7 @@ BtreeLBAManager::check_child_trackers( return btree.check_child_trackers(c); }); } +#endif BtreeLBAManager::scan_mappings_ret BtreeLBAManager::scan_mappings( @@ -624,7 +619,7 @@ BtreeLBAManager::update_mapping( extent_len_t len, paddr_t addr, uint32_t checksum, - LogicalCachedExtent *nextent) + LogicalChildNode *nextent) { LOG_PREFIX(BtreeLBAManager::update_mapping); TRACET("laddr={}, paddr {} => {}", t, laddr, prev_addr, addr); @@ -823,7 +818,7 @@ BtreeLBAManager::_update_mapping( Transaction &t, laddr_t addr, update_func_t &&f, - LogicalCachedExtent* nextent) + LogicalChildNode* nextent) { auto c = get_context(t); return with_btree_ret( diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h index e0902053d0e..22232b9d97c 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h @@ -29,8 +29,6 @@ class LogicalCachedExtent; namespace crimson::os::seastore::lba_manager::btree { -struct LBALeafNode; - class BtreeLBAMapping : public LBAMapping { // To support cloning, there are two kinds of lba mappings: // 1. physical lba mapping: the pladdr in the value of which is the paddr of @@ -86,7 +84,11 @@ public: raw_val(val.pladdr), map_val(val), parent_modifications(parent->modifications) - {} + { + if (!parent->is_pending()) { + this->child_pos = {parent, pos}; + } + } lba_map_val_t get_map_val() const { return map_val; @@ -196,7 +198,8 @@ public: } bool is_stable() const final; bool is_data_stable() const final; - get_child_ret_t get_logical_extent(Transaction &t); + get_child_ret_t + get_logical_extent(Transaction &t); protected: LBAMappingRef _duplicate( @@ -281,7 +284,7 @@ public: extent_len_t len = 0; pladdr_t val; uint32_t checksum = 0; - LogicalCachedExtent* extent = nullptr; + LogicalChildNode* extent = nullptr; static alloc_mapping_info_t create_zero(extent_len_t len) { return {L_ADDR_NULL, len, P_ADDR_ZERO, 0, nullptr}; @@ -303,7 +306,7 @@ public: extent_len_t len, paddr_t paddr, uint32_t checksum, - LogicalCachedExtent *extent) { + LogicalChildNode *extent) { return {laddr, len, paddr, checksum, extent}; } }; @@ -369,7 +372,7 @@ public: alloc_extent_ret alloc_extent( Transaction &t, laddr_t hint, - LogicalCachedExtent &ext, + LogicalChildNode &ext, extent_ref_count_t refcount = EXTENT_DEFAULT_REF_COUNT) final { // The real checksum will be updated upon transaction commit @@ -401,7 +404,7 @@ public: alloc_extents_ret alloc_extents( Transaction &t, laddr_t hint, - std::vector extents, + std::vector extents, extent_ref_count_t refcount) final { std::vector alloc_infos; @@ -443,7 +446,7 @@ public: Transaction &t, LBAMappingRef orig_mapping, std::vector remaps, - std::vector extents) final { + std::vector extents) final { LOG_PREFIX(BtreeLBAManager::remap_mappings); assert((orig_mapping->is_indirect()) == (remaps.size() != extents.size())); @@ -566,7 +569,9 @@ public: Transaction &t, CachedExtentRef e) final; +#ifdef UNIT_TESTS_BUILT check_child_trackers_ret check_child_trackers(Transaction &t) final; +#endif scan_mappings_ret scan_mappings( Transaction &t, @@ -586,7 +591,7 @@ public: extent_len_t len, paddr_t paddr, uint32_t checksum, - LogicalCachedExtent*) final; + LogicalChildNode*) final; get_physical_extent_if_live_ret get_physical_extent_if_live( Transaction &t, @@ -647,7 +652,7 @@ private: Transaction &t, laddr_t addr, update_func_t &&f, - LogicalCachedExtent*); + LogicalChildNode*); alloc_extents_ret _alloc_extents( Transaction &t, diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc index 8bcd494efff..fc1f3710041 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc @@ -12,6 +12,7 @@ #include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h" #include "crimson/os/seastore/logging.h" +#include "crimson/os/seastore/logical_child_node.h" SET_SUBSYS(seastore_lba); @@ -27,7 +28,7 @@ std::ostream& operator<<(std::ostream& out, const lba_map_val_t& v) << ")"; } -std::ostream &LBALeafNode::_print_detail(std::ostream &out) const +std::ostream &LBALeafNode::print_detail(std::ostream &out) const { out << ", size=" << this->get_size() << ", meta=" << this->get_meta() @@ -36,7 +37,7 @@ std::ostream &LBALeafNode::_print_detail(std::ostream &out) const if (this->my_tracker) { out << ", my_tracker->parent=" << (void*)this->my_tracker->get_parent().get(); } - return out << ", root_block=" << (void*)this->root_block.get(); + return out << ", root_block=" << (void*)this->parent_of_root.get(); } void LBALeafNode::resolve_relative_addrs(paddr_t base) @@ -86,4 +87,57 @@ BtreeLBAMappingRef LBALeafNode::get_mapping( lba_node_meta_t{laddr, (laddr + val.len).checked_to_laddr(), 0}); } +void LBALeafNode::update( + internal_const_iterator_t iter, + lba_map_val_t val, + LogicalChildNode* nextent) +{ + LOG_PREFIX(LBALeafNode::update); + if (nextent) { + SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, {}", + this->pending_for_transaction, + iter.get_offset(), + *nextent); + // child-ptr may already be correct, see LBAManager::update_mappings() + if (!nextent->has_parent_tracker()) { + this->update_child_ptr(iter.get_offset(), nextent); + } + assert(nextent->has_parent_tracker() + && nextent->get_parent_node().get() == this); + } + this->on_modify(); + if (val.pladdr.is_paddr()) { + val.pladdr = maybe_generate_relative(val.pladdr.get_paddr()); + } + return this->journal_update( + iter, + val, + this->maybe_get_delta_buffer()); +} + +LBALeafNode::internal_const_iterator_t LBALeafNode::insert( + internal_const_iterator_t iter, + laddr_t addr, + lba_map_val_t val, + LogicalChildNode* nextent) +{ + LOG_PREFIX(LBALeafNode::insert); + SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}, extent {}", + this->pending_for_transaction, + iter.get_offset(), + addr, + (void*)nextent); + this->on_modify(); + this->insert_child_ptr(iter.get_offset(), nextent); + if (val.pladdr.is_paddr()) { + val.pladdr = maybe_generate_relative(val.pladdr.get_paddr()); + } + this->journal_insert( + iter, + addr, + val, + this->maybe_get_delta_buffer()); + return iter; +} + } diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h index 524bf23dd58..7d8a567435d 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h @@ -12,7 +12,6 @@ #include "crimson/common/fixed_kv_node_layout.h" #include "crimson/common/errorator.h" -#include "crimson/os/seastore/lba_manager.h" #include "crimson/os/seastore/seastore_types.h" #include "crimson/os/seastore/cache.h" #include "crimson/os/seastore/cached_extent.h" @@ -21,9 +20,13 @@ #include "crimson/os/seastore/btree/fixed_kv_btree.h" #include "crimson/os/seastore/btree/fixed_kv_node.h" +namespace crimson::os::seastore { +class LogicalChildNode; +} + namespace crimson::os::seastore::lba_manager::btree { -using base_iertr = LBAManager::base_iertr; +using base_iertr = Cache::base_iertr; using LBANode = FixedKVNode; class BtreeLBAMapping; @@ -87,6 +90,7 @@ struct LBAInternalNode "INTERNAL_NODE_CAPACITY doesn't fit in LBA_BLOCK_SIZE"); using Ref = TCachedExtentRef; using internal_iterator_t = const_iterator; + using key_type = laddr_t; template LBAInternalNode(T&&... t) : FixedKVInternalNode(std::forward(t)...) {} @@ -148,7 +152,9 @@ struct LBALeafNode laddr_t, laddr_le_t, lba_map_val_t, lba_map_val_le_t, LBA_BLOCK_SIZE, + LBAInternalNode, LBALeafNode, + LogicalChildNode, true> { static_assert( check_capacity(LBA_BLOCK_SIZE), @@ -159,90 +165,31 @@ struct LBALeafNode laddr_t, laddr_le_t, lba_map_val_t, lba_map_val_le_t, LBA_BLOCK_SIZE, + LBAInternalNode, LBALeafNode, + LogicalChildNode, true>; using internal_const_iterator_t = typename parent_type_t::node_layout_t::const_iterator; using internal_iterator_t = typename parent_type_t::node_layout_t::iterator; + using key_type = laddr_t; template LBALeafNode(T&&... t) : parent_type_t(std::forward(t)...) {} static constexpr extent_types_t TYPE = extent_types_t::LADDR_LEAF; - bool validate_stable_children() final { - LOG_PREFIX(LBALeafNode::validate_stable_children); - if (this->children.empty()) { - return false; - } - - for (auto i : *this) { - auto child = (LogicalCachedExtent*)this->children[i.get_offset()]; - // Children may not be marked as stable yet, - // the specific order is undefined in the transaction prepare record phase. - if (is_valid_child_ptr(child) && child->get_laddr() != i.get_key()) { - SUBERROR(seastore_fixedkv_tree, - "stable child not valid: child {}, key {}", - *child, - i.get_key()); - ceph_abort(); - return false; - } - } - return true; - } - void update( internal_const_iterator_t iter, lba_map_val_t val, - LogicalCachedExtent* nextent) final { - LOG_PREFIX(LBALeafNode::update); - if (nextent) { - SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, {}", - this->pending_for_transaction, - iter.get_offset(), - *nextent); - // child-ptr may already be correct, see LBAManager::update_mappings() - if (!nextent->has_parent_tracker()) { - this->update_child_ptr(iter, nextent); - } - assert(nextent->has_parent_tracker() - && nextent->get_parent_node().get() == this); - } - this->on_modify(); - if (val.pladdr.is_paddr()) { - val.pladdr = maybe_generate_relative(val.pladdr.get_paddr()); - } - return this->journal_update( - iter, - val, - this->maybe_get_delta_buffer()); - } + LogicalChildNode* nextent) final; internal_const_iterator_t insert( internal_const_iterator_t iter, laddr_t addr, lba_map_val_t val, - LogicalCachedExtent* nextent) final { - LOG_PREFIX(LBALeafNode::insert); - SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}, extent {}", - this->pending_for_transaction, - iter.get_offset(), - addr, - (void*)nextent); - this->on_modify(); - this->insert_child_ptr(iter, nextent); - if (val.pladdr.is_paddr()) { - val.pladdr = maybe_generate_relative(val.pladdr.get_paddr()); - } - this->journal_insert( - iter, - addr, - val, - this->maybe_get_delta_buffer()); - return iter; - } + LogicalChildNode* nextent) final; void remove(internal_const_iterator_t iter) final { LOG_PREFIX(LBALeafNode::remove); @@ -252,7 +199,7 @@ struct LBALeafNode iter.get_key()); assert(iter != this->end()); this->on_modify(); - this->remove_child_ptr(iter); + this->remove_child_ptr(iter.get_offset()); return this->journal_remove( iter, this->maybe_get_delta_buffer()); @@ -297,7 +244,7 @@ struct LBALeafNode return TYPE; } - std::ostream &_print_detail(std::ostream &out) const final; + std::ostream &print_detail(std::ostream &out) const final; void maybe_fix_mapping_pos(BtreeLBAMapping &mapping); std::unique_ptr get_mapping(op_context_t c, laddr_t laddr); diff --git a/src/crimson/os/seastore/lba_mapping.h b/src/crimson/os/seastore/lba_mapping.h index 338d4d53f55..3a0cb594037 100644 --- a/src/crimson/os/seastore/lba_mapping.h +++ b/src/crimson/os/seastore/lba_mapping.h @@ -5,6 +5,8 @@ #include "crimson/os/seastore/cached_extent.h" #include "crimson/os/seastore/btree/btree_range_pin.h" +#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" +#include "crimson/os/seastore/logical_child_node.h" namespace crimson::os::seastore { @@ -20,11 +22,7 @@ public: template LBAMapping(T&&... t) : BtreeNodeMapping(std::forward(t)...) - { - if (!parent->is_pending()) { - this->child_pos = {parent, pos}; - } - } + {} // An lba pin may be indirect, see comments in lba_manager/btree/btree_lba_manager.h virtual bool is_indirect() const = 0; @@ -34,10 +32,10 @@ public: // The start offset of the pin, must be 0 if the pin is not indirect virtual extent_len_t get_intermediate_offset() const = 0; - virtual get_child_ret_t + virtual get_child_ret_t get_logical_extent(Transaction &t) = 0; - void link_child(ChildableCachedExtent *c) { + void link_child(LogicalChildNode *c) { ceph_assert(child_pos); child_pos->link_child(c); } @@ -57,7 +55,8 @@ public: virtual ~LBAMapping() {} protected: virtual LBAMappingRef _duplicate(op_context_t) const = 0; - std::optional child_pos = std::nullopt; + std::optional> child_pos = std::nullopt; }; std::ostream &operator<<(std::ostream &out, const LBAMapping &rhs); diff --git a/src/crimson/os/seastore/linked_tree_node.h b/src/crimson/os/seastore/linked_tree_node.h index 4d06c894db0..ecda72a11bb 100644 --- a/src/crimson/os/seastore/linked_tree_node.h +++ b/src/crimson/os/seastore/linked_tree_node.h @@ -942,7 +942,7 @@ private: friend class parent_tracker_t; template friend class child_pos_t; -#ifndef UNIT_TESTS_BUILT +#ifdef UNIT_TESTS_BUILT template friend class FixedKVBtree; #endif diff --git a/src/crimson/os/seastore/logical_child_node.cc b/src/crimson/os/seastore/logical_child_node.cc new file mode 100644 index 00000000000..c07f7e44108 --- /dev/null +++ b/src/crimson/os/seastore/logical_child_node.cc @@ -0,0 +1,45 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "crimson/os/seastore/logical_child_node.h" +#include "crimson/os/seastore/lba_mapping.h" + +namespace crimson::os::seastore { + +std::ostream &operator<<(std::ostream &out, const LBAMapping &rhs) +{ + out << "LBAMapping(" << rhs.get_key() + << "~0x" << std::hex << rhs.get_length() << std::dec + << "->" << rhs.get_val(); + if (rhs.is_indirect()) { + out << ",indirect(" << rhs.get_intermediate_base() + << "~0x" << std::hex << rhs.get_intermediate_length() + << "@0x" << rhs.get_intermediate_offset() << std::dec + << ")"; + } + out << ")"; + return out; +} + +std::ostream &operator<<(std::ostream &out, const lba_pin_list_t &rhs) +{ + bool first = true; + out << '['; + for (const auto &i: rhs) { + out << (first ? "" : ",") << *i; + first = false; + } + return out << ']'; +} + +LBAMappingRef LBAMapping::duplicate() const { + auto ret = _duplicate(ctx); + ret->range = range; + ret->value = value; + ret->parent = parent; + ret->len = len; + ret->pos = pos; + return ret; +} + +} // namespace crimson::os::seastore diff --git a/src/crimson/os/seastore/logical_child_node.h b/src/crimson/os/seastore/logical_child_node.h new file mode 100644 index 00000000000..03f2b73f975 --- /dev/null +++ b/src/crimson/os/seastore/logical_child_node.h @@ -0,0 +1,52 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "crimson/os/seastore/cached_extent.h" +#include "crimson/os/seastore/linked_tree_node.h" +#include "crimson/os/seastore/btree/btree_range_pin.h" +#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" + +namespace crimson::os::seastore { + +class LogicalChildNode : public LogicalCachedExtent, + public ChildNode { + using child_node_t = ChildNode< + lba_manager::btree::LBALeafNode, LogicalChildNode, laddr_t>; +public: + template + LogicalChildNode(T&&... t) : LogicalCachedExtent(std::forward(t)...) {} + + virtual ~LogicalChildNode() { + if (this->has_parent_tracker() && + this->is_valid() && + !this->is_pending()) { + child_node_t::destroy(); + } + } + + bool is_btree_root() const { + return false; + } + + laddr_t get_begin() const { + return get_laddr(); + } + + laddr_t get_end() const { + return (get_laddr() + get_length()).checked_to_laddr(); + } +protected: + void on_replace_prior() final { + child_node_t::on_replace_prior(); + } +}; +using LogicalChildNodeRef = TCachedExtentRef; +} // namespace crimson::os::seastore + +#if FMT_VERSION >= 90000 +template <> struct fmt::formatter : fmt::ostream_formatter {}; +#endif diff --git a/src/crimson/os/seastore/object_data_handler.h b/src/crimson/os/seastore/object_data_handler.h index 50be20b3706..3fc02a33836 100644 --- a/src/crimson/os/seastore/object_data_handler.h +++ b/src/crimson/os/seastore/object_data_handler.h @@ -13,6 +13,7 @@ #include "crimson/os/seastore/onode.h" #include "crimson/os/seastore/transaction_manager.h" #include "crimson/os/seastore/transaction.h" +#include "crimson/os/seastore/logical_child_node.h" namespace crimson::os::seastore { @@ -77,7 +78,7 @@ private: mutable std::optional ptr = std::nullopt; }; -struct ObjectDataBlock : crimson::os::seastore::LogicalCachedExtent { +struct ObjectDataBlock : crimson::os::seastore::LogicalChildNode { using Ref = TCachedExtentRef; std::vector delta = {}; @@ -88,11 +89,11 @@ struct ObjectDataBlock : crimson::os::seastore::LogicalCachedExtent { overwrite_buf_t cached_overwrites; explicit ObjectDataBlock(ceph::bufferptr &&ptr) - : LogicalCachedExtent(std::move(ptr)) {} + : LogicalChildNode(std::move(ptr)) {} explicit ObjectDataBlock(const ObjectDataBlock &other, share_buffer_t s) - : LogicalCachedExtent(other, s), modified_region(other.modified_region) {} + : LogicalChildNode(other, s), modified_region(other.modified_region) {} explicit ObjectDataBlock(extent_len_t length) - : LogicalCachedExtent(length) {} + : LogicalChildNode(length) {} CachedExtentRef duplicate_for_write(Transaction&) final { return CachedExtentRef(new ObjectDataBlock(*this, share_buffer_t{})); diff --git a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h index 7c2392731c0..0f9551b8c16 100644 --- a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h +++ b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h @@ -28,7 +28,7 @@ enum class mutation_status_t : uint8_t { FAIL = 3 }; -struct OMapNode : LogicalCachedExtent { +struct OMapNode : LogicalChildNode { using base_iertr = OMapManager::base_iertr; using OMapNodeRef = TCachedExtentRef; @@ -48,10 +48,10 @@ struct OMapNode : LogicalCachedExtent { need_merge(n_merge) {} }; - explicit OMapNode(ceph::bufferptr &&ptr) : LogicalCachedExtent(std::move(ptr)) {} - explicit OMapNode(extent_len_t length) : LogicalCachedExtent(length) {} + explicit OMapNode(ceph::bufferptr &&ptr) : LogicalChildNode(std::move(ptr)) {} + explicit OMapNode(extent_len_t length) : LogicalChildNode(length) {} OMapNode(const OMapNode &other) - : LogicalCachedExtent(other) {} + : LogicalChildNode(other) {} using get_value_iertr = base_iertr; using get_value_ret = OMapManager::omap_get_value_ret; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h index f8772929c6a..acc301e6c8e 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h @@ -21,8 +21,7 @@ namespace crimson::os::seastore::onode { -using crimson::os::seastore::LogicalCachedExtent; -class NodeExtent : public LogicalCachedExtent { +class NodeExtent : public LogicalChildNode { public: virtual ~NodeExtent() = default; const node_header_t& get_header() const { @@ -41,7 +40,7 @@ class NodeExtent : public LogicalCachedExtent { protected: template - NodeExtent(T&&... t) : LogicalCachedExtent(std::forward(t)...) {} + NodeExtent(T&&... t) : LogicalChildNode(std::forward(t)...) {} NodeExtentMutable do_get_mutable() { return NodeExtentMutable(get_bptr().c_str(), get_length()); diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h index 04b959f767d..ab25c312f9b 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h @@ -159,7 +159,7 @@ class SeastoreNodeExtentManager final: public TransactionManagerHandle { retire_iertr::future<> retire_extent( Transaction& t, NodeExtentRef _extent) override { - LogicalCachedExtentRef extent = _extent; + LogicalChildNodeRef extent = _extent; auto addr = extent->get_laddr(); auto len = extent->get_length(); SUBDEBUGT(seastore_onode, diff --git a/src/crimson/os/seastore/root_block.cc b/src/crimson/os/seastore/root_block.cc index dec6e12ea4e..c422442f5e0 100644 --- a/src/crimson/os/seastore/root_block.cc +++ b/src/crimson/os/seastore/root_block.cc @@ -4,6 +4,7 @@ #include "crimson/os/seastore/root_block.h" #include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" #include "crimson/os/seastore/backref/backref_tree_node.h" +#include "crimson/os/seastore/linked_tree_node.h" namespace crimson::os::seastore { @@ -12,20 +13,40 @@ void RootBlock::on_replace_prior() { auto &prior = static_cast(*get_prior_instance()); if (prior.lba_root_node) { RootBlockRef this_ref = this; - link_phy_tree_root_node( - this_ref, - static_cast(prior.lba_root_node) - ); + auto lba_root = static_cast< + lba_manager::btree::LBANode*>(prior.lba_root_node); + if (likely(lba_root->range.depth > 1)) { + TreeRootLinker::link_root( + this_ref, + static_cast(prior.lba_root_node) + ); + } else { + assert(lba_root->range.depth == 1); + TreeRootLinker::link_root( + this_ref, + static_cast(prior.lba_root_node) + ); + } } } if (!backref_root_node) { auto &prior = static_cast(*get_prior_instance()); if (prior.backref_root_node) { RootBlockRef this_ref = this; - link_phy_tree_root_node( - this_ref, - static_cast(prior.backref_root_node) - ); + auto backref_root = static_cast< + backref::BackrefNode*>(prior.backref_root_node); + if (likely(backref_root->range.depth > 1)) { + TreeRootLinker::link_root( + this_ref, + static_cast(prior.backref_root_node) + ); + } else { + assert(backref_root->range.depth == 1); + TreeRootLinker::link_root( + this_ref, + static_cast(prior.backref_root_node) + ); + } } } } diff --git a/src/crimson/os/seastore/root_meta.h b/src/crimson/os/seastore/root_meta.h index edf082f1e38..5d6b21b17dc 100644 --- a/src/crimson/os/seastore/root_meta.h +++ b/src/crimson/os/seastore/root_meta.h @@ -3,22 +3,22 @@ #pragma once -#include "crimson/os/seastore/cached_extent.h" +#include "crimson/os/seastore/logical_child_node.h" namespace crimson::os::seastore { -struct RootMetaBlock : LogicalCachedExtent { +struct RootMetaBlock : LogicalChildNode { using meta_t = std::map; using Ref = TCachedExtentRef; static constexpr size_t SIZE = 4096; static constexpr int MAX_META_LENGTH = 1024; explicit RootMetaBlock(ceph::bufferptr &&ptr) - : LogicalCachedExtent(std::move(ptr)) {} + : LogicalChildNode(std::move(ptr)) {} explicit RootMetaBlock(extent_len_t length) - : LogicalCachedExtent(length) {} + : LogicalChildNode(length) {} RootMetaBlock(const RootMetaBlock &rhs) - : LogicalCachedExtent(rhs) {} + : LogicalChildNode(rhs) {} CachedExtentRef duplicate_for_write(Transaction&) final { return CachedExtentRef(new RootMetaBlock(*this)); diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 807d88b2cbc..e8f4801b42e 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -201,7 +201,7 @@ TransactionManager::close() { #ifdef UNIT_TESTS_BUILT TransactionManager::ref_ret TransactionManager::inc_ref( Transaction &t, - LogicalCachedExtentRef &ref) + LogicalChildNodeRef &ref) { LOG_PREFIX(TransactionManager::inc_ref); TRACET("{}", t, *ref); @@ -232,7 +232,7 @@ TransactionManager::ref_ret TransactionManager::inc_ref( TransactionManager::ref_ret TransactionManager::remove( Transaction &t, - LogicalCachedExtentRef &ref) + LogicalChildNodeRef &ref) { LOG_PREFIX(TransactionManager::remove); DEBUGT("{} ...", t, *ref); @@ -335,7 +335,7 @@ TransactionManager::update_lba_mappings( LOG_PREFIX(TransactionManager::update_lba_mappings); SUBTRACET(seastore_t, "update extent lba mappings", t); return seastar::do_with( - std::list(), + std::list(), std::list(), [this, &t, &pre_allocated_extents](auto &lextents, auto &pextents) { auto chksum_func = [&lextents, &pextents, this](auto &extent) { @@ -365,7 +365,7 @@ TransactionManager::update_lba_mappings( assert(extent->get_last_committed_crc() == CRC_NULL); } #endif - lextents.emplace_back(extent->template cast()); + lextents.emplace_back(extent->template cast()); } else { assert(is_physical_type(extent->get_type())); pextents.emplace_back(extent); @@ -519,7 +519,7 @@ TransactionManager::get_next_dirty_extents( TransactionManager::rewrite_extent_ret TransactionManager::rewrite_logical_extent( Transaction& t, - LogicalCachedExtentRef extent) + LogicalChildNodeRef extent) { LOG_PREFIX(TransactionManager::rewrite_logical_extent); if (extent->has_been_invalidated()) { @@ -536,7 +536,7 @@ TransactionManager::rewrite_logical_extent( extent->get_length(), extent->get_user_hint(), // get target rewrite generation - extent->get_rewrite_generation())->cast(); + extent->get_rewrite_generation())->cast(); nextent->rewrite(t, *extent, 0); DEBUGT("rewriting meta -- {} to {}", t, *extent, *nextent); @@ -590,7 +590,7 @@ TransactionManager::rewrite_logical_extent( extents, [extent, this, FNAME, &t, &off, &left, &refcount](auto &_nextent) { - auto nextent = _nextent->template cast(); + auto nextent = _nextent->template cast(); bool first_extent = (off == 0); ceph_assert(left >= nextent->get_length()); nextent->rewrite(t, *extent, off); @@ -702,7 +702,7 @@ TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent( auto fut = rewrite_extent_iertr::now(); if (extent->is_logical()) { assert(is_logical_type(extent->get_type())); - fut = rewrite_logical_extent(t, extent->cast()); + fut = rewrite_logical_extent(t, extent->cast()); } else if (is_backref_node(extent->get_type())) { fut = backref_manager->rewrite_extent(t, extent); } else { diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index e574460894a..fc364b9a2fb 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -321,11 +321,11 @@ public: } /// Obtain mutable copy of extent - LogicalCachedExtentRef get_mutable_extent(Transaction &t, LogicalCachedExtentRef ref) { + LogicalChildNodeRef get_mutable_extent(Transaction &t, LogicalChildNodeRef ref) { LOG_PREFIX(TransactionManager::get_mutable_extent); auto ret = cache->duplicate_for_write( t, - ref)->cast(); + ref)->cast(); if (!ret->has_laddr()) { SUBDEBUGT(seastore_tm, "duplicate from {}", t, *ref); ret->set_laddr(ref->get_laddr()); @@ -343,7 +343,7 @@ public: /// Add refcount for ref ref_ret inc_ref( Transaction &t, - LogicalCachedExtentRef &ref); + LogicalChildNodeRef &ref); /// Add refcount for offset ref_ret inc_ref( @@ -359,7 +359,7 @@ public: */ ref_ret remove( Transaction &t, - LogicalCachedExtentRef &ref); + LogicalChildNodeRef &ref); ref_ret remove( Transaction &t, @@ -433,7 +433,7 @@ public: return lba_manager->alloc_extents( t, laddr_hint, - std::vector( + std::vector( exts.begin(), exts.end()), EXTENT_DEFAULT_REF_COUNT ).si_then([exts=std::move(exts), &t, FNAME](auto &&) mutable { @@ -484,7 +484,7 @@ public: Transaction &t, LBAMappingRef &&pin, std::array remaps) { - static_assert(std::is_base_of_v); + static_assert(std::is_base_of_v); #ifndef NDEBUG std::sort(remaps.begin(), remaps.end(), @@ -513,7 +513,7 @@ public: #endif return seastar::do_with( - std::vector(), + std::vector(), std::move(pin), std::move(remaps), [&t, this](auto &extents, auto &pin, auto &remaps) { @@ -968,7 +968,7 @@ private: return v.get_child_fut( ).si_then([pin=std::move(pin)](auto extent) { #ifndef NDEBUG - auto lextent = extent->template cast(); + auto lextent = extent->template cast(); auto pin_laddr = pin->get_key(); if (pin->is_indirect()) { pin_laddr = pin->get_intermediate_base(); @@ -982,7 +982,7 @@ private: } } - base_iertr::future read_pin_by_type( + base_iertr::future read_pin_by_type( Transaction &t, LBAMappingRef pin, extent_types_t type) @@ -1006,7 +1006,7 @@ private: rewrite_extent_ret rewrite_logical_extent( Transaction& t, - LogicalCachedExtentRef extent); + LogicalChildNodeRef extent); submit_transaction_direct_ret do_submit_transaction( Transaction &t, @@ -1103,7 +1103,7 @@ private: * Get extent mapped at pin. */ using pin_to_extent_by_type_ret = pin_to_extent_iertr::future< - LogicalCachedExtentRef>; + LogicalChildNodeRef>; pin_to_extent_by_type_ret pin_to_extent_by_type( Transaction &t, LBAMappingRef pin, @@ -1130,7 +1130,7 @@ private: direct_key, direct_length, [&pref](CachedExtent &extent) mutable { - auto &lextent = static_cast(extent); + auto &lextent = static_cast(extent); assert(!lextent.has_laddr()); assert(!lextent.has_been_invalidated()); assert(!pref.has_been_invalidated()); @@ -1167,7 +1167,7 @@ private: } return pin_to_extent_by_type_ret( interruptible::ready_future_marker{}, - std::move(ref->template cast())); + std::move(ref->template cast())); }); } diff --git a/src/test/crimson/seastore/test_block.h b/src/test/crimson/seastore/test_block.h index 546f357dea0..0ec2cc1e056 100644 --- a/src/test/crimson/seastore/test_block.h +++ b/src/test/crimson/seastore/test_block.h @@ -6,6 +6,7 @@ #include #include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/logical_child_node.h" namespace crimson::os::seastore { @@ -43,7 +44,7 @@ inline std::ostream &operator<<( << ", checksum=0x" << rhs.checksum << std::dec << ")"; } -struct TestBlock : crimson::os::seastore::LogicalCachedExtent { +struct TestBlock : crimson::os::seastore::LogicalChildNode { constexpr static extent_len_t SIZE = 4<<10; using Ref = TCachedExtentRef; @@ -52,11 +53,11 @@ struct TestBlock : crimson::os::seastore::LogicalCachedExtent { interval_set modified_region; explicit TestBlock(ceph::bufferptr &&ptr) - : LogicalCachedExtent(std::move(ptr)) {} + : LogicalChildNode(std::move(ptr)) {} explicit TestBlock(extent_len_t length) - : LogicalCachedExtent(length) {} + : LogicalChildNode(length) {} TestBlock(const TestBlock &other) - : LogicalCachedExtent(other), modified_region(other.modified_region) {} + : LogicalChildNode(other), modified_region(other.modified_region) {} CachedExtentRef duplicate_for_write(Transaction&) final { return CachedExtentRef(new TestBlock(*this)); diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc index 7874411e0ff..96965d8a435 100644 --- a/src/test/crimson/seastore/test_btree_lba_manager.cc +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -471,7 +471,7 @@ struct btree_lba_manager_test : btree_test_base { *t.t, [this](auto &t) { return seastar::do_with( - std::list(), + std::list(), std::list(), [this, &t](auto &lextents, auto &pextents) { auto chksum_func = [&lextents, &pextents](auto &extent) { @@ -485,7 +485,7 @@ struct btree_lba_manager_test : btree_test_base { extent->update_in_extent_chksum_field(crc); } assert(extent->calc_crc32c() == extent->get_last_committed_crc()); - lextents.emplace_back(extent->template cast()); + lextents.emplace_back(extent->template cast()); } else { pextents.push_back(extent); } @@ -550,7 +550,7 @@ struct btree_lba_manager_test : btree_test_base { 0, get_paddr()); return seastar::do_with( - std::vector( + std::vector( extents.begin(), extents.end()), [this, &t, hint](auto &extents) { return lba_manager->alloc_extents(t, hint, std::move(extents), EXTENT_DEFAULT_REF_COUNT);