From 4750ca539f10487afac1fbd06ca8cd35a1773f82 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Tue, 7 Jul 2020 11:41:23 -0700 Subject: [PATCH] crimson/: add a meta field to FixedKVNodeLayout and use in LBANode* Also removes LBANode::depth. Signed-off-by: Samuel Just --- src/crimson/common/fixed_kv_node_layout.h | 40 ++++++++-- .../lba_manager/btree/btree_lba_manager.cc | 6 +- .../lba_manager/btree/btree_range_pin.h | 36 +++++++++ .../lba_manager/btree/lba_btree_node.h | 7 +- .../lba_manager/btree/lba_btree_node_impl.cc | 27 ++++--- .../lba_manager/btree/lba_btree_node_impl.h | 55 ++++++++++--- src/crimson/os/seastore/root_block.h | 2 - src/crimson/os/seastore/seastore_types.h | 3 + src/test/crimson/test_fixed_kv_node_layout.cc | 77 +++++++++++++++++-- 9 files changed, 211 insertions(+), 42 deletions(-) diff --git a/src/crimson/common/fixed_kv_node_layout.h b/src/crimson/common/fixed_kv_node_layout.h index cc40350220832..b06a3cdcb4b2f 100644 --- a/src/crimson/common/fixed_kv_node_layout.h +++ b/src/crimson/common/fixed_kv_node_layout.h @@ -39,6 +39,8 @@ struct maybe_const_t { */ template < size_t CAPACITY, + typename Meta, + typename MetaInt, typename K, typename KINT, typename V, @@ -47,8 +49,8 @@ template < class FixedKVNodeLayout { char *buf = nullptr; - using L = absl::container_internal::Layout; - static constexpr L layout{1, CAPACITY, CAPACITY}; + using L = absl::container_internal::Layout; + static constexpr L layout{1, 1, CAPACITY, CAPACITY}; public: template @@ -409,6 +411,21 @@ public: *layout.template Pointer<0>(buf) = size; } + /** + * get_meta/set_meta + * + * Enables stashing a templated type within the layout. + * Cannot be modified after initial write as it is not represented + * in delta_t + */ + Meta get_meta() const { + MetaInt &metaint = *layout.template Pointer<1>(buf); + return Meta(metaint); + } + void set_meta(const Meta &meta) { + *layout.template Pointer<1>(buf) = MetaInt(meta); + } + constexpr static size_t get_capacity() { return CAPACITY; } @@ -447,6 +464,10 @@ public: right.copy_from_foreign(right.begin(), piviter, end()); right.set_size(end() - piviter); + auto [lmeta, rmeta] = get_meta().split_into(piviter->get_key()); + left.set_meta(lmeta); + right.set_meta(rmeta); + return piviter->get_key(); } @@ -471,6 +492,7 @@ public: right.begin(), right.end()); set_size(left.get_size() + right.get_size()); + set_meta(Meta::merge_from(left.get_meta(), right.get_meta())); } /** @@ -493,7 +515,7 @@ public: if (total % 2 && prefer_left) { pivot_idx++; } - auto replacement_pivot = pivot_idx > left.get_size() ? + auto replacement_pivot = pivot_idx >= left.get_size() ? right.iter_idx(pivot_idx - left.get_size())->get_key() : left.iter_idx(pivot_idx)->get_key(); @@ -535,6 +557,10 @@ public: replacement_right.set_size(total - pivot_idx); } + auto [lmeta, rmeta] = Meta::rebalance( + left.get_meta(), right.get_meta(), replacement_pivot); + replacement_left.set_meta(lmeta); + replacement_right.set_meta(rmeta); return replacement_pivot; } @@ -594,10 +620,10 @@ private: * Get pointer to start of key array */ KINT *get_key_ptr() { - return layout.template Pointer<1>(buf); + return layout.template Pointer<2>(buf); } const KINT *get_key_ptr() const { - return layout.template Pointer<1>(buf); + return layout.template Pointer<2>(buf); } /** @@ -606,10 +632,10 @@ private: * Get pointer to start of val array */ VINT *get_val_ptr() { - return layout.template Pointer<2>(buf); + return layout.template Pointer<3>(buf); } const VINT *get_val_ptr() const { - return layout.template Pointer<2>(buf); + return layout.template Pointer<3>(buf); } /** diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc index a2ee499f4df41..7e6df859faeb2 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc +++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc @@ -27,8 +27,8 @@ BtreeLBAManager::mkfs_ret BtreeLBAManager::mkfs( auto root_leaf = cache.alloc_new_extent( t, LBA_BLOCK_SIZE); - root_leaf->set_depth(1); root_leaf->set_size(0); + root_leaf->set_meta({0, L_ADDR_MAX, 1}); croot->get_lba_root() = root_t{ 1, @@ -191,14 +191,14 @@ BtreeLBAManager::insert_mapping_ret BtreeLBAManager::insert_mapping( croot = mut_croot->cast(); } auto nroot = cache.alloc_new_extent(t, LBA_BLOCK_SIZE); - nroot->set_depth(root->depth + 1); + nroot->set_meta({0, L_ADDR_MAX, croot->root.lba_depth + 1}); nroot->journal_insert( nroot->begin(), L_ADDR_MIN, root->get_paddr(), nullptr); croot->get_lba_root().lba_root_addr = nroot->get_paddr(); - croot->get_lba_root().lba_depth = root->depth + 1; + croot->get_lba_root().lba_depth = root->get_node_meta().depth + 1; return nroot->split_entry( get_context(t), laddr, nroot->begin(), root); diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_range_pin.h b/src/crimson/os/seastore/lba_manager/btree/btree_range_pin.h index 0c9a5fdd65dac..9be1f8048476e 100644 --- a/src/crimson/os/seastore/lba_manager/btree/btree_range_pin.h +++ b/src/crimson/os/seastore/lba_manager/btree/btree_range_pin.h @@ -8,6 +8,42 @@ namespace crimson::os::seastore::lba_manager::btree { +struct lba_node_meta_t { + laddr_t begin = 0; + laddr_t end = 0; + depth_t depth = 0; + + std::pair split_into(laddr_t pivot) const { + return std::make_pair( + lba_node_meta_t{begin, pivot, depth}, + lba_node_meta_t{pivot, end, depth}); + } + + static lba_node_meta_t merge_from(const lba_node_meta_t &lhs, const lba_node_meta_t &rhs) { + assert(lhs.depth == rhs.depth); + return lba_node_meta_t{lhs.begin, rhs.end, lhs.depth}; + } + + static std::pair + rebalance(const lba_node_meta_t &lhs, const lba_node_meta_t &rhs, laddr_t pivot) { + assert(lhs.depth == rhs.depth); + return std::make_pair( + lba_node_meta_t{lhs.begin, pivot, lhs.depth}, + lba_node_meta_t{pivot, rhs.end, lhs.depth}); + } +}; + +inline std::ostream &operator<<( + std::ostream &lhs, + const lba_node_meta_t &rhs) +{ + return lhs << "btree_node_meta_t(" + << "begin=" << rhs.begin + << ", end=" << rhs.end + << ", depth=" << rhs.depth + << ")"; +} + /* BtreeLBAPin * * References leaf node diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h index 0d9cc12fdb0b3..9b9b6b0662121 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h @@ -8,6 +8,7 @@ #include #include "crimson/common/log.h" +#include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h" namespace crimson::os::seastore::lba_manager::btree { @@ -48,12 +49,12 @@ struct LBANode : CachedExtent { using lookup_range_ertr = LBAManager::get_mapping_ertr; using lookup_range_ret = LBAManager::get_mapping_ret; - depth_t depth = 0; LBANode(ceph::bufferptr &&ptr) : CachedExtent(std::move(ptr)) {} - LBANode(const LBANode &rhs) = default; + LBANode(const LBANode &rhs) + : CachedExtent(rhs) {} - void set_depth(depth_t _depth) { depth = _depth; } + virtual lba_node_meta_t get_node_meta() const = 0; /** * lookup_range diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc index c2cfbe4358e78..7342ecffb0d43 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc @@ -23,7 +23,7 @@ namespace crimson::os::seastore::lba_manager::btree { std::ostream &LBAInternalNode::print_detail(std::ostream &out) const { return out << ", size=" << get_size() - << ", depth=" << depth; + << ", meta=" << get_meta(); } LBAInternalNode::lookup_range_ret LBAInternalNode::lookup_range( @@ -40,7 +40,7 @@ LBAInternalNode::lookup_range_ret LBAInternalNode::lookup_range( [this, c, &result, addr, len](const auto &val) mutable { return get_lba_btree_extent( c, - depth-1, + get_meta().depth - 1, val.get_val(), get_paddr()).safe_then( [c, &result, addr, len](auto extent) mutable { @@ -68,7 +68,7 @@ LBAInternalNode::insert_ret LBAInternalNode::insert( auto insertion_pt = get_containing_child(laddr); return get_lba_btree_extent( c, - depth-1, + get_meta().depth - 1, insertion_pt->get_val(), get_paddr()).safe_then( [this, insertion_pt, c, laddr, val=std::move(val)]( @@ -89,7 +89,7 @@ LBAInternalNode::mutate_mapping_ret LBAInternalNode::mutate_mapping( { return get_lba_btree_extent( c, - depth-1, + get_meta().depth - 1, get_containing_child(laddr)->get_val(), get_paddr() ).safe_then([this, c, laddr](LBANodeRef extent) { @@ -131,7 +131,7 @@ LBAInternalNode::find_hole_ret LBAInternalNode::find_hole( } return get_lba_btree_extent( c, - depth-1, + get_meta().depth - 1, i->get_val(), get_paddr() ).safe_then([c, &i, len](auto extent) mutable { @@ -234,7 +234,7 @@ LBAInternalNode::merge_entry( auto donor_iter = donor_is_left ? iter - 1 : iter + 1; return get_lba_btree_extent( c, - depth - 1, + get_meta().depth - 1, donor_iter->get_val(), get_paddr() ).safe_then([this, c, addr, iter, entry, donor_iter, donor_is_left]( @@ -303,7 +303,7 @@ LBAInternalNode::get_containing_child(laddr_t laddr) std::ostream &LBALeafNode::print_detail(std::ostream &out) const { return out << ", size=" << get_size() - << ", depth=" << depth; + << ", meta=" << get_meta(); } LBALeafNode::lookup_range_ret LBALeafNode::lookup_range( @@ -466,10 +466,13 @@ Cache::get_extent_ertr::future get_lba_btree_extent( c.trans, offset, LBA_BLOCK_SIZE).safe_then([depth](auto ret) { - ret->set_depth(depth); + auto meta = ret->get_meta(); + if (ret->get_size()) { + ceph_assert(meta.begin <= ret->begin()->get_key()); + ceph_assert(meta.end > (ret->end() - 1)->get_key()); + } return LBANodeRef(ret.detach(), /* add_ref = */ false); }); - } else { logger().debug( "get_lba_btree_extent: reading leaf at offset {}, depth {}", @@ -482,7 +485,11 @@ Cache::get_extent_ertr::future get_lba_btree_extent( logger().debug( "get_lba_btree_extent: read leaf at offset {}", offset); - ret->set_depth(depth); + auto meta = ret->get_meta(); + if (ret->get_size()) { + ceph_assert(meta.begin <= ret->begin()->get_key()); + ceph_assert(meta.end > (ret->end() - 1)->get_key()); + } return LBANodeRef(ret.detach(), /* add_ref = */ false); }); } diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h index 73cc452dedf35..ba5d9edce16e6 100644 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h +++ b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h @@ -24,6 +24,29 @@ namespace crimson::os::seastore::lba_manager::btree { constexpr size_t LBA_BLOCK_SIZE = 4096; +/** + * lba_node_meta_le_t + * + * On disk layout for lba_node_meta_t + */ +struct lba_node_meta_le_t { + laddr_le_t begin = init_le64(0); + laddr_le_t end = init_le64(0); + depth_le_t depth = init_les32(0); + + lba_node_meta_le_t() = default; + lba_node_meta_le_t(const lba_node_meta_le_t &) = default; + explicit lba_node_meta_le_t(const lba_node_meta_t &val) + : begin(init_le64(val.begin)), + end(init_le64(val.end)), + depth(init_les32(val.depth)) {} + + operator lba_node_meta_t() const { + return lba_node_meta_t{ begin, end, depth }; + } +}; + + /** * LBAInternalNode * @@ -31,18 +54,22 @@ constexpr size_t LBA_BLOCK_SIZE = 4096; * LBA Tree. * * Layout (4k): - * size : uint32_t[1] (1*4)b - * keys : laddr_t[255] (255*8)b - * values : paddr_t[255] (255*8)b - * = 4084 + * size : uint32_t[1] 4b + * (padding) : 4b + * meta : lba_node_meta_le_t[3] (1*24)b + * keys : laddr_t[255] (254*8)b + * values : paddr_t[255] (254*8)b + * = 4096 * TODO: make the above capacity calculation part of FixedKVNodeLayout + * TODO: the above alignment probably isn't portable without further work */ -constexpr size_t INTERNAL_NODE_CAPACITY = 255; +constexpr size_t INTERNAL_NODE_CAPACITY = 254; struct LBAInternalNode : LBANode, common::FixedKVNodeLayout< INTERNAL_NODE_CAPACITY, + lba_node_meta_t, lba_node_meta_le_t, laddr_t, laddr_le_t, paddr_t, paddr_le_t> { using internal_iterator_t = const_iterator; @@ -53,6 +80,8 @@ struct LBAInternalNode static constexpr extent_types_t type = extent_types_t::LADDR_INTERNAL; + lba_node_meta_t get_node_meta() const final { return get_meta(); } + CachedExtentRef duplicate_for_write() final { assert(delta_buffer.empty()); return CachedExtentRef(new LBAInternalNode(*this)); @@ -239,14 +268,17 @@ struct LBAInternalNode * LBA Tree. * * Layout (4k): - * num_entries: uint32_t 4b - * keys : laddr_t[170] (146*8)b - * values : lba_map_val_t[170] (146*20)b - * = 4090 + * size : uint32_t[1] 4b + * (padding) : 4b + * meta : lba_node_meta_le_t[3] (1*24)b + * keys : laddr_t[170] (145*8)b + * values : lba_map_val_t[170] (145*20)b + * = 4092 * * TODO: update FixedKVNodeLayout to handle the above calculation + * TODO: the above alignment probably isn't portable without further work */ -constexpr size_t LEAF_NODE_CAPACITY = 146; +constexpr size_t LEAF_NODE_CAPACITY = 145; /** * lba_map_val_le_t @@ -276,6 +308,7 @@ struct LBALeafNode : LBANode, common::FixedKVNodeLayout< LEAF_NODE_CAPACITY, + lba_node_meta_t, lba_node_meta_le_t, laddr_t, laddr_le_t, lba_map_val_t, lba_map_val_le_t> { using internal_iterator_t = const_iterator; @@ -286,6 +319,8 @@ struct LBALeafNode static constexpr extent_types_t type = extent_types_t::LADDR_LEAF; + lba_node_meta_t get_node_meta() const final { return get_meta(); } + CachedExtentRef duplicate_for_write() final { assert(delta_buffer.empty()); return CachedExtentRef(new LBALeafNode(*this)); diff --git a/src/crimson/os/seastore/root_block.h b/src/crimson/os/seastore/root_block.h index 9a9a58c5b0129..a5aac321144f1 100644 --- a/src/crimson/os/seastore/root_block.h +++ b/src/crimson/os/seastore/root_block.h @@ -7,8 +7,6 @@ namespace crimson::os::seastore { -using depth_t = uint32_t; - /** * root_t * diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 5abd279d69043..90b753695d617 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -13,6 +13,9 @@ namespace crimson::os::seastore { +using depth_t = int32_t; +using depth_le_t = ceph_les32; + using checksum_t = uint32_t; // Identifies segment location on disk, see SegmentManager, diff --git a/src/test/crimson/test_fixed_kv_node_layout.cc b/src/test/crimson/test_fixed_kv_node_layout.cc index 5970f0b4ebcff..5f59c4e5e045d 100644 --- a/src/test/crimson/test_fixed_kv_node_layout.cc +++ b/src/test/crimson/test_fixed_kv_node_layout.cc @@ -35,24 +35,62 @@ struct test_val_le_t { operator test_val_t() const { return test_val_t{t1, t2}; } +}; - bool operator==(const test_val_t &rhs) const { +struct test_meta_t { + uint32_t t1 = 0; + uint32_t t2 = 0; + + bool operator==(const test_meta_t &rhs) const { return rhs.t1 == t1 && rhs.t2 == t2; } - bool operator!=(const test_val_t &rhs) const { + bool operator!=(const test_meta_t &rhs) const { return !(*this == rhs); } + + std::pair split_into(uint32_t pivot) const { + return std::make_pair( + test_meta_t{t1, pivot}, + test_meta_t{pivot, t2}); + } + + static test_meta_t merge_from(const test_meta_t &lhs, const test_meta_t &rhs) { + return test_meta_t{lhs.t1, rhs.t2}; + } + + static std::pair + rebalance(const test_meta_t &lhs, const test_meta_t &rhs, uint32_t pivot) { + return std::make_pair( + test_meta_t{lhs.t1, pivot}, + test_meta_t{pivot, rhs.t2}); + } }; -constexpr size_t CAPACITY = 341; +struct test_meta_le_t { + ceph_le32 t1 = init_le32(0); + ceph_le32 t2 = init_le32(0); + + test_meta_le_t() = default; + test_meta_le_t(const test_meta_le_t &) = default; + test_meta_le_t(const test_meta_t &nv) + : t1(init_le32(nv.t1)), t2(init_le32(nv.t2)) {} + + operator test_meta_t() const { + return test_meta_t{t1, t2}; + } +}; + +constexpr size_t CAPACITY = 339; struct TestNode : FixedKVNodeLayout< CAPACITY, + test_meta_t, test_meta_le_t, uint32_t, ceph_le32, test_val_t, test_val_le_t> { char buf[4096]; TestNode() : FixedKVNodeLayout(buf) { memset(buf, 0, sizeof(buf)); + set_meta({0, std::numeric_limits::max()}); } TestNode(const TestNode &rhs) : FixedKVNodeLayout(buf) { @@ -127,6 +165,10 @@ TEST(FixedKVNodeTest, split) { node.split_into(split_left, split_right); ASSERT_EQ(split_left.get_size() + split_right.get_size(), CAPACITY); + ASSERT_EQ(split_left.get_meta().t1, split_left.begin()->get_key()); + ASSERT_EQ(split_left.get_meta().t2, split_right.get_meta().t1); + ASSERT_EQ(split_right.get_meta().t2, std::numeric_limits::max()); + num = 0; for (auto &i : split_left) { ASSERT_EQ(i.get_key(), num); @@ -165,6 +207,8 @@ TEST(FixedKVNodeTest, merge) { ++num; ++iter; } + node.set_meta({0, num}); + node2.set_meta({num, std::numeric_limits::max()}); iter = node2.begin(); while (num < (2 * (CAPACITY / 2))) { node2.journal_insert(iter, num, test_val_t{num, num}, nullptr); @@ -180,6 +224,10 @@ TEST(FixedKVNodeTest, merge) { auto node_merged = TestNode(); node_merged.merge_from(node, node2); + ASSERT_EQ( + node_merged.get_meta(), + (test_meta_t{0, std::numeric_limits::max()})); + ASSERT_EQ(node_merged.get_size(), total); num = 0; for (auto &i : node_merged) { @@ -210,6 +258,8 @@ void run_balance_test(unsigned left, unsigned right, bool prefer_left) ++num; ++iter; } + node.set_meta({0, num}); + node2.set_meta({num, std::numeric_limits::max()}); iter = node2.begin(); while (num < (left + right)) { node2.journal_insert(iter, num, test_val_t{num, num}, nullptr); @@ -224,7 +274,7 @@ void run_balance_test(unsigned left, unsigned right, bool prefer_left) auto node_balanced = TestNode(); auto node_balanced2 = TestNode(); - TestNode::balance_into_new_nodes( + auto pivot = TestNode::balance_into_new_nodes( node, node2, prefer_left, @@ -233,15 +283,28 @@ void run_balance_test(unsigned left, unsigned right, bool prefer_left) ASSERT_EQ(total, node_balanced.get_size() + node_balanced2.get_size()); + unsigned left_size, right_size; if (total % 2) { if (prefer_left) { - ASSERT_EQ(node_balanced.get_size(), node_balanced2.get_size() + 1); + left_size = (total/2) + 1; + right_size = total/2; } else { - ASSERT_EQ(node_balanced.get_size() + 1, node_balanced2.get_size()); + left_size = total/2; + right_size = (total/2) + 1; } } else { - ASSERT_EQ(node_balanced.get_size(), node_balanced2.get_size()); + left_size = right_size = total/2; } + ASSERT_EQ(pivot, left_size); + ASSERT_EQ(left_size, node_balanced.get_size()); + ASSERT_EQ(right_size, node_balanced2.get_size()); + + ASSERT_EQ( + node_balanced.get_meta(), + (test_meta_t{0, left_size})); + ASSERT_EQ( + node_balanced2.get_meta(), + (test_meta_t{left_size, std::numeric_limits::max()})); num = 0; for (auto &i: node_balanced) { -- 2.39.5