From: Chunmei Liu Date: Mon, 16 Aug 2021 20:54:07 +0000 (-0700) Subject: crimson/seastore: replace L_ADDR_MIN by obj hash in Onode X-Git-Tag: v17.1.0~1106^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=23693c1010302ec13c20f7cad76a130725b58c85;p=ceph.git crimson/seastore: replace L_ADDR_MIN by obj hash in Onode Signed-off-by: Chunmei Liu --- diff --git a/src/crimson/os/seastore/onode.h b/src/crimson/os/seastore/onode.h index c556dcd36d70..04de0caaeaac 100644 --- a/src/crimson/os/seastore/onode.h +++ b/src/crimson/os/seastore/onode.h @@ -58,6 +58,7 @@ public: virtual const onode_layout_t &get_layout() const = 0; virtual onode_layout_t &get_mutable_layout(Transaction &t) = 0; virtual ~Onode() = default; + virtual laddr_t get_hint() const = 0; }; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h index 47106a8c3466..814471fffbc9 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h @@ -94,6 +94,9 @@ struct FLTreeOnode final : Onode, Value { status = status_t::DELETED; } + laddr_t get_hint() const final { + return Value::get_hint(); + } ~FLTreeOnode() final {} }; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/node.cc index b6feac38e283..da324075b172 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node.cc @@ -400,7 +400,7 @@ void Node::test_make_destructable( eagain_ifuture<> Node::mkfs(context_t c, RootNodeTracker& root_tracker) { LOG_PREFIX(OTree::Node::mkfs); - return LeafNode::allocate_root(c, root_tracker + return LeafNode::allocate_root(c, L_ADDR_MIN, root_tracker ).si_then([c, FNAME](auto ret) { INFOT("allocated root {}", c.t, ret->get_name()); }); @@ -462,13 +462,13 @@ Super::URef Node::deref_super() return ret; } -eagain_ifuture<> Node::upgrade_root(context_t c) +eagain_ifuture<> Node::upgrade_root(context_t c, laddr_t hint) { LOG_PREFIX(OTree::Node::upgrade_root); assert(impl->field_type() == field_type_t::N0); auto super_to_move = deref_super(); return InternalNode::allocate_root( - c, impl->level(), impl->laddr(), std::move(super_to_move) + c, hint, impl->level(), impl->laddr(), std::move(super_to_move) ).si_then([this, c, FNAME](auto new_root) { as_child(search_position_t::end(), new_root); INFOT("upgraded from {} to {}", @@ -604,7 +604,7 @@ Node::try_merge_adjacent( // so use rebuild_extent() as a workaround to rebuild the node from a // fresh extent, thus no need to generate delta. auto left_addr = left_for_merge->impl->laddr(); - return left_for_merge->rebuild_extent(c + return left_for_merge->rebuild_extent(c, L_ADDR_MIN ).si_then([c, update_index_after_merge, left_addr, merge_stage = merge_stage, @@ -743,7 +743,7 @@ eagain_ifuture> Node::load( }); } -eagain_ifuture Node::rebuild_extent(context_t c) +eagain_ifuture Node::rebuild_extent(context_t c, laddr_t hint) { LOG_PREFIX(OTree::Node::rebuild_extent); DEBUGT("{} ...", c.t, get_name()); @@ -752,7 +752,7 @@ eagain_ifuture Node::rebuild_extent(context_t c) // note: laddr can be changed after rebuild, but we don't fix the parent // mapping as it is part of the merge process. - return impl->rebuild_extent(c); + return impl->rebuild_extent(c, hint); } eagain_ifuture<> Node::retire(context_t c, Ref&& this_ref) @@ -1214,12 +1214,12 @@ eagain_ifuture, Ref>> InternalNode::get_child_peers( } eagain_ifuture> InternalNode::allocate_root( - context_t c, level_t old_root_level, + context_t c, laddr_t hint, level_t old_root_level, laddr_t old_root_addr, Super::URef&& super) { // support tree height up to 256 ceph_assert(old_root_level < MAX_LEVEL); - return InternalNode::allocate(c, field_type_t::N0, true, old_root_level + 1 + return InternalNode::allocate(c, hint, field_type_t::N0, true, old_root_level + 1 ).si_then([c, old_root_addr, super = std::move(super)](auto fresh_node) mutable { auto root = fresh_node.node; @@ -1379,7 +1379,7 @@ eagain_ifuture<> InternalNode::test_clone_root( assert(impl->is_level_tail()); assert(impl->field_type() == field_type_t::N0); Ref this_ref = this; - return InternalNode::allocate(c_other, field_type_t::N0, true, impl->level() + return InternalNode::allocate(c_other, L_ADDR_MIN, field_type_t::N0, true, impl->level() ).si_then([this, c_other, &tracker_other](auto fresh_other) { impl->test_copy_to(fresh_other.mut); auto cloned_root = fresh_other.node; @@ -1489,10 +1489,11 @@ eagain_ifuture> InternalNode::insert_or_split( // proceed to split with insert // assume I'm already ref-counted by caller - return (is_root() ? upgrade_root(c) : eagain_iertr::now() - ).si_then([this, c] { + auto hint = insert_key.get_hint(); + return (is_root() ? upgrade_root(c, hint) : eagain_iertr::now() + ).si_then([this, c, hint] { return InternalNode::allocate( - c, impl->field_type(), impl->is_level_tail(), impl->level()); + c, hint, impl->field_type(), impl->is_level_tail(), impl->level()); }).si_then([this, insert_key, insert_child, insert_pos, insert_stage=insert_stage, insert_size=insert_size, outdated_child, c, FNAME](auto fresh_right) mutable { @@ -1735,9 +1736,9 @@ void InternalNode::validate_child_inconsistent(const Node& child) const } eagain_ifuture InternalNode::allocate( - context_t c, field_type_t field_type, bool is_level_tail, level_t level) + context_t c, laddr_t hint, field_type_t field_type, bool is_level_tail, level_t level) { - return InternalNodeImpl::allocate(c, field_type, is_level_tail, level + return InternalNodeImpl::allocate(c, hint, field_type, is_level_tail, level ).si_then([](auto&& fresh_impl) { auto node = Ref(new InternalNode( fresh_impl.impl.get(), std::move(fresh_impl.impl))); @@ -2013,7 +2014,7 @@ eagain_ifuture<> LeafNode::test_clone_root( assert(impl->is_level_tail()); assert(impl->field_type() == field_type_t::N0); Ref this_ref = this; - return LeafNode::allocate(c_other, field_type_t::N0, true + return LeafNode::allocate(c_other, L_ADDR_MIN, field_type_t::N0, true ).si_then([this, c_other, &tracker_other](auto fresh_other) { impl->test_copy_to(fresh_other.mut); auto cloned_root = fresh_other.node; @@ -2060,9 +2061,10 @@ eagain_ifuture> LeafNode::insert_value( } // split and insert Ref this_ref = this; - return (is_root() ? upgrade_root(c) : eagain_iertr::now() - ).si_then([this, c] { - return LeafNode::allocate(c, impl->field_type(), impl->is_level_tail()); + auto hint = key.get_hint(); + return (is_root() ? upgrade_root(c, hint) : eagain_iertr::now() + ).si_then([this, c, hint] { + return LeafNode::allocate(c, hint, impl->field_type(), impl->is_level_tail()); }).si_then([this_ref = std::move(this_ref), this, c, &key, vconf, FNAME, insert_pos, insert_stage=insert_stage, insert_size=insert_size](auto fresh_right) mutable { auto right_node = fresh_right.node; @@ -2096,10 +2098,10 @@ eagain_ifuture> LeafNode::insert_value( } eagain_ifuture> LeafNode::allocate_root( - context_t c, RootNodeTracker& root_tracker) + context_t c, laddr_t hint, RootNodeTracker& root_tracker) { LOG_PREFIX(OTree::LeafNode::allocate_root); - return LeafNode::allocate(c, field_type_t::N0, true + return LeafNode::allocate(c, hint, field_type_t::N0, true ).si_then([c, &root_tracker, FNAME](auto fresh_node) { auto root = fresh_node.node; return c.nm.get_super(c.t, root_tracker @@ -2221,9 +2223,9 @@ void LeafNode::track_erase( } eagain_ifuture LeafNode::allocate( - context_t c, field_type_t field_type, bool is_level_tail) + context_t c, laddr_t hint, field_type_t field_type, bool is_level_tail) { - return LeafNodeImpl::allocate(c, field_type, is_level_tail + return LeafNodeImpl::allocate(c, hint, field_type, is_level_tail ).si_then([](auto&& fresh_impl) { auto node = Ref(new LeafNode( fresh_impl.impl.get(), std::move(fresh_impl.impl))); diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node.h index 7597b0cf6937..6392da50d37f 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node.h @@ -405,7 +405,7 @@ class Node make_root(c, std::move(_super)); } void as_root(Super::URef&& _super); - eagain_ifuture<> upgrade_root(context_t); + eagain_ifuture<> upgrade_root(context_t, laddr_t); Super::URef deref_super(); @@ -428,7 +428,7 @@ class Node eagain_ifuture<> erase_node(context_t, Ref&&); template eagain_ifuture<> fix_parent_index(context_t, Ref&&, bool); - eagain_ifuture rebuild_extent(context_t); + eagain_ifuture rebuild_extent(context_t, laddr_t); eagain_ifuture<> retire(context_t, Ref&&); void make_tail(context_t); @@ -539,7 +539,7 @@ class InternalNode final : public Node { void track_make_tail(const search_position_t&); static eagain_ifuture> allocate_root( - context_t, level_t, laddr_t, Super::URef&&); + context_t, laddr_t, level_t, laddr_t, Super::URef&&); protected: eagain_ifuture> lookup_smallest(context_t) override; @@ -580,7 +580,7 @@ class InternalNode final : public Node { return std::make_pair(Ref(node), mut); } }; - static eagain_ifuture allocate(context_t, field_type_t, bool, level_t); + static eagain_ifuture allocate(context_t, laddr_t, field_type_t, bool, level_t); private: /** @@ -681,7 +681,7 @@ class LeafNode final : public Node { context_t, const key_hobj_t&, value_config_t, const search_position_t&, const MatchHistory&, match_stat_t mstat); - static eagain_ifuture> allocate_root(context_t, RootNodeTracker&); + static eagain_ifuture> allocate_root(context_t, laddr_t, RootNodeTracker&); friend class Node; private: @@ -712,7 +712,7 @@ class LeafNode final : public Node { return std::make_pair(Ref(node), mut); } }; - static eagain_ifuture allocate(context_t, field_type_t, bool); + static eagain_ifuture allocate(context_t, laddr_t, field_type_t, bool); private: /** diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_accessor.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_accessor.h index b6a2f0315893..3f8f78d3900a 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_accessor.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_accessor.h @@ -505,7 +505,7 @@ class NodeExtentAccessorT { std::memcpy(to.get_write(), extent->get_read(), get_length()); } - eagain_ifuture rebuild(context_t c) { + eagain_ifuture rebuild(context_t c, laddr_t hint) { LOG_PREFIX(OTree::Extent::rebuild); assert(!is_retired()); if (state == nextent_state_t::FRESH) { @@ -515,7 +515,7 @@ class NodeExtentAccessorT { } assert(!extent->is_initial_pending()); auto alloc_size = get_length(); - return c.nm.alloc_extent(c.t, alloc_size + return c.nm.alloc_extent(c.t, hint, alloc_size ).handle_error_interruptible( eagain_iertr::pass_further{}, crimson::ct_error::input_output_error::handle( diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h index b33c9d539b10..6f7ae9245d6d 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h @@ -77,7 +77,7 @@ class NodeExtentManager { using alloc_iertr = base_iertr; virtual alloc_iertr::future alloc_extent( - Transaction&, extent_len_t) = 0; + Transaction&, laddr_t hint, extent_len_t) = 0; using retire_iertr = base_iertr::extend< crimson::ct_error::enoent>; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h index 2d3b3fb636db..53115ba171ea 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h @@ -89,7 +89,7 @@ class DummyNodeExtentManager final: public NodeExtentManager { } alloc_iertr::future alloc_extent( - Transaction& t, extent_len_t len) override { + Transaction& t, laddr_t hint, extent_len_t len) override { TRACET("allocating {}B ...", t, len); if constexpr (SYNC) { return alloc_extent_sync(t, len); diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h index 262c10bb3fd2..3ff51d0542e3 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h @@ -121,7 +121,7 @@ class SeastoreNodeExtentManager final: public TransactionManagerHandle { } alloc_iertr::future alloc_extent( - Transaction& t, extent_len_t len) override { + Transaction& t, laddr_t hint, extent_len_t len) override { TRACET("allocating {}B ...", t, len); if constexpr (INJECT_EAGAIN) { if (trigger_eagain()) { @@ -130,7 +130,7 @@ class SeastoreNodeExtentManager final: public TransactionManagerHandle { return alloc_iertr::make_ready_future(); } } - return tm.alloc_extent(t, addr_min, len + return tm.alloc_extent(t, hint, len ).si_then([len, &t](auto extent) { DEBUGT("allocated {}B at {:#x} -- {}", t, extent->get_length(), extent->get_laddr(), *extent); diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_impl.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/node_impl.cc index be1ac9b78230..5db0f83dda61 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_impl.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_impl.cc @@ -13,16 +13,16 @@ last_split_info_t last_split = {}; // XXX: branchless allocation eagain_ifuture InternalNodeImpl::allocate( - context_t c, field_type_t type, bool is_level_tail, level_t level) + context_t c, laddr_t hint, field_type_t type, bool is_level_tail, level_t level) { if (type == field_type_t::N0) { - return InternalNode0::allocate(c, is_level_tail, level); + return InternalNode0::allocate(c, hint, is_level_tail, level); } else if (type == field_type_t::N1) { - return InternalNode1::allocate(c, is_level_tail, level); + return InternalNode1::allocate(c, hint, is_level_tail, level); } else if (type == field_type_t::N2) { - return InternalNode2::allocate(c, is_level_tail, level); + return InternalNode2::allocate(c, hint, is_level_tail, level); } else if (type == field_type_t::N3) { - return InternalNode3::allocate(c, is_level_tail, level); + return InternalNode3::allocate(c, hint, is_level_tail, level); } else { ceph_abort("impossible path"); } @@ -30,16 +30,16 @@ InternalNodeImpl::allocate( eagain_ifuture LeafNodeImpl::allocate( - context_t c, field_type_t type, bool is_level_tail) + context_t c, laddr_t hint, field_type_t type, bool is_level_tail) { if (type == field_type_t::N0) { - return LeafNode0::allocate(c, is_level_tail, 0); + return LeafNode0::allocate(c, hint, is_level_tail, 0); } else if (type == field_type_t::N1) { - return LeafNode1::allocate(c, is_level_tail, 0); + return LeafNode1::allocate(c, hint, is_level_tail, 0); } else if (type == field_type_t::N2) { - return LeafNode2::allocate(c, is_level_tail, 0); + return LeafNode2::allocate(c, hint, is_level_tail, 0); } else if (type == field_type_t::N3) { - return LeafNode3::allocate(c, is_level_tail, 0); + return LeafNode3::allocate(c, hint, is_level_tail, 0); } else { ceph_abort("impossible path"); } diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_impl.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_impl.h index 8bac60bac533..f8a8aaa1cd4b 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_impl.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_impl.h @@ -90,7 +90,7 @@ class NodeImpl { virtual std::tuple erase(const search_position_t&) = 0; virtual std::tuple evaluate_merge(NodeImpl&) = 0; virtual search_position_t merge(NodeExtentMutable&, NodeImpl&, match_stage_t, extent_len_t) = 0; - virtual eagain_ifuture rebuild_extent(context_t) = 0; + virtual eagain_ifuture rebuild_extent(context_t, laddr_t) = 0; virtual eagain_ifuture<> retire_extent(context_t) = 0; virtual search_position_t make_tail() = 0; @@ -179,7 +179,7 @@ class InternalNodeImpl : public NodeImpl { return {std::move(impl), mut}; } }; - static eagain_ifuture allocate(context_t, field_type_t, bool, level_t); + static eagain_ifuture allocate(context_t, laddr_t, field_type_t, bool, level_t); static InternalNodeImplURef load(NodeExtentRef, field_type_t); @@ -259,7 +259,7 @@ class LeafNodeImpl : public NodeImpl { return {std::move(impl), mut}; } }; - static eagain_ifuture allocate(context_t, field_type_t, bool); + static eagain_ifuture allocate(context_t, laddr_t, field_type_t, bool); static LeafNodeImplURef load(NodeExtentRef, field_type_t); diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout.h index 4c55d49475da..cc1841ce9fbf 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout.h @@ -66,7 +66,7 @@ class NodeLayoutT final : public InternalNodeImpl, public LeafNodeImpl { } static eagain_ifuture allocate( - context_t c, bool is_level_tail, level_t level) { + context_t c, laddr_t hint, bool is_level_tail, level_t level) { LOG_PREFIX(OTree::Layout::allocate); extent_len_t extent_size; if constexpr (NODE_TYPE == node_type_t::LEAF) { @@ -74,7 +74,7 @@ class NodeLayoutT final : public InternalNodeImpl, public LeafNodeImpl { } else { extent_size = c.vb.get_internal_node_size(); } - return c.nm.alloc_extent(c.t, extent_size + return c.nm.alloc_extent(c.t, hint, extent_size ).handle_error_interruptible( eagain_iertr::pass_further{}, crimson::ct_error::input_output_error::handle( @@ -306,8 +306,8 @@ class NodeLayoutT final : public InternalNodeImpl, public LeafNodeImpl { } eagain_ifuture - rebuild_extent(context_t c) override { - return extent.rebuild(c).si_then([this] (auto mut) { + rebuild_extent(context_t c, laddr_t hint) override { + return extent.rebuild(c, hint).si_then([this] (auto mut) { // addr may change build_name(); return mut; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h index e9305416c10d..2bfa9efdeb9a 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h @@ -41,6 +41,16 @@ template<> struct _full_key_type { using type = key_hobj_t; }; template using full_key_t = typename _full_key_type::type; +static laddr_t get_lba_hint(shard_t shard, pool_t pool, crush_hash_t crush) +{ + if (shard == shard_id_t::NO_SHARD) { + return (uint64_t)(pool & 0xFF)<<56 | (uint64_t)(crush)<<24; + } else { + return (uint64_t)(shard & 0X7F)<<56 | (uint64_t)(pool& 0xFF)<<48 | + (uint64_t)(crush)<<16; + } +} + struct node_offset_packed_t { node_offset_t value; } __attribute__((packed)); @@ -515,6 +525,9 @@ class key_hobj_t { crush_hash_t crush() const { return ghobj.hobj.get_hash(); } + laddr_t get_hint() const { + return get_lba_hint(shard(), pool(), crush()); + } std::string_view nspace() const { // TODO(cross-node string dedup) return ghobj.hobj.nspace; @@ -608,6 +621,9 @@ class key_view_t { crush_hash_t crush() const { return crush_packed().crush; } + laddr_t get_hint() const { + return get_lba_hint(shard(), pool(), crush()); + } std::string_view nspace() const { // TODO(cross-node string dedup) return ns_oid_view().nspace.to_string_view(); diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc index 78c370b60567..e71ce06453ac 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc @@ -82,6 +82,11 @@ Value::do_prepare_mutate_payload(Transaction& t) return p_cursor->prepare_mutate_value_payload(get_context(t)); } +laddr_t Value::get_hint() const +{ + return p_cursor->get_key_view(vb.get_header_magic()).get_hint(); +} + std::unique_ptr build_value_recorder_by_type(ceph::bufferlist& encoded, const value_magic_t& magic) diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/value.h b/src/crimson/os/seastore/onode_manager/staged-fltree/value.h index 2bb069d1d53f..18d459969785 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/value.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/value.h @@ -201,6 +201,8 @@ class Value { return read_value_header()->payload_size; } + laddr_t get_hint() const; + bool operator==(const Value& v) const { return p_cursor == v.p_cursor; } bool operator!=(const Value& v) const { return !(*this == v); } @@ -240,7 +242,9 @@ class Value { private: const value_header_t* read_value_header() const; - context_t get_context(Transaction& t) { return {nm, vb, t}; } + context_t get_context(Transaction& t) { + return {nm, vb, t}; + } std::pair do_prepare_mutate_payload(Transaction&);