From 37d721cf69977b57d8814cb1dbea5fc99f566f1e Mon Sep 17 00:00:00 2001 From: Yingxin Cheng Date: Wed, 2 Jun 2021 13:33:50 +0800 Subject: [PATCH] crimson/onode-staged-tree: add and use node sizes in tree_conf_t Signed-off-by: Yingxin Cheng --- .../staged-fltree/fltree_onode_manager.h | 4 +++- .../seastore/onode_manager/staged-fltree/fwd.h | 4 +++- .../onode_manager/staged-fltree/node.cc | 16 ++++++++++------ .../staged-fltree/node_extent_accessor.h | 7 ++++--- .../staged-fltree/node_extent_manager.h | 2 +- .../staged-fltree/node_extent_manager/dummy.h | 13 ++++++------- .../node_extent_manager/seastore.h | 12 +++++------- .../onode_manager/staged-fltree/node_layout.h | 17 ++++++++++------- .../onode_manager/staged-fltree/value.cc | 4 ++++ .../onode_manager/staged-fltree/value.h | 10 ++++++++++ .../crimson/seastore/onode_tree/test_value.h | 12 ++++++++---- 11 files changed, 64 insertions(+), 37 deletions(-) diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h index d075364af19..1444ebbb6d0 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h @@ -14,7 +14,9 @@ struct FLTreeOnode final : Onode, Value { value_magic_t::ONODE, 128, // max_ns_size 320, // max_oid_size - 1200 // max_value_payload_size + 1200, // max_value_payload_size + 4096, // internal_node_size + 4096 // leaf_node_size }; enum class status_t { diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h b/src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h index 741c31efe1d..2c08895320d 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h @@ -59,10 +59,12 @@ constexpr auto INDEX_LAST = INDEX_END - 0x4; constexpr auto INDEX_UPPER_BOUND = INDEX_END - 0x8; inline bool is_valid_index(index_t index) { return index < INDEX_UPPER_BOUND; } -// TODO: decide by NODE_BLOCK_SIZE +// we support up to 64 KiB tree nodes using node_offset_t = uint16_t; constexpr node_offset_t DISK_BLOCK_SIZE = 1u << 12; constexpr node_offset_t NODE_BLOCK_SIZE = DISK_BLOCK_SIZE * 1u; +constexpr auto MAX_NODE_SIZE = + (extent_len_t)std::numeric_limits::max() + 1; using string_size_t = uint16_t; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/node.cc index 9fcd950107e..d0f35817f59 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node.cc @@ -653,11 +653,7 @@ eagain_future> Node::load( context_t c, laddr_t addr, bool expect_is_level_tail) { LOG_PREFIX(OTree::Node::load); - // NOTE: - // *option1: all types of node have the same length; - // option2: length is defined by node/field types; - // option3: length is totally flexible; - return c.nm.read_extent(c.t, addr, NODE_BLOCK_SIZE + return c.nm.read_extent(c.t, addr ).handle_error( eagain_ertr::pass_further{}, crimson::ct_error::input_output_error::handle( @@ -684,12 +680,20 @@ eagain_future> Node::load( c.t, addr, expect_is_level_tail); ceph_abort("fatal error"); }) - ).safe_then([expect_is_level_tail](auto extent) { + ).safe_then([FNAME, c, expect_is_level_tail](auto extent) { auto [node_type, field_type] = extent->get_types(); if (node_type == node_type_t::LEAF) { + if (extent->get_length() != c.vb.get_leaf_node_size()) { + ERRORT("leaf length mismatch -- {}", c.t, extent); + ceph_abort("fatal error"); + } auto impl = LeafNodeImpl::load(extent, field_type, expect_is_level_tail); return Ref(new LeafNode(impl.get(), std::move(impl))); } else if (node_type == node_type_t::INTERNAL) { + if (extent->get_length() != c.vb.get_internal_node_size()) { + ERRORT("internal length mismatch -- {}", c.t, extent); + ceph_abort("fatal error"); + } auto impl = InternalNodeImpl::load(extent, field_type, expect_is_level_tail); return Ref(new InternalNode(impl.get(), std::move(impl))); } else { diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_accessor.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_accessor.h index ec42c4f569e..ab1b1880f35 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_accessor.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_accessor.h @@ -503,13 +503,14 @@ class NodeExtentAccessorT { return eagain_ertr::make_ready_future(*mut); } assert(!extent->is_initial_pending()); - return c.nm.alloc_extent(c.t, node_stage_t::EXTENT_SIZE + auto alloc_size = extent->get_length(); + return c.nm.alloc_extent(c.t, alloc_size ).handle_error( eagain_ertr::pass_further{}, crimson::ct_error::input_output_error::handle( - [FNAME, c, l_to_discard = extent->get_laddr()] { + [FNAME, c, alloc_size, l_to_discard = extent->get_laddr()] { ERRORT("EIO during allocate -- node_size={}, to_discard={:x}", - c.t, node_stage_t::EXTENT_SIZE, l_to_discard); + c.t, alloc_size, l_to_discard); ceph_abort("fatal error"); }) ).safe_then([this, c, FNAME] (auto fresh_extent) { diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h index 4304f0ed51e..ac3b7179cf8 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager.h @@ -68,7 +68,7 @@ class NodeExtentManager { crimson::ct_error::enoent, crimson::ct_error::erange>; virtual read_ertr::future read_extent( - Transaction&, laddr_t, extent_len_t) = 0; + Transaction&, laddr_t) = 0; using alloc_ertr = base_ertr; virtual alloc_ertr::future alloc_extent( diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h index c868d4448bb..1cdb117c0a0 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/dummy.h @@ -75,14 +75,14 @@ class DummyNodeExtentManager final: public NodeExtentManager { bool is_read_isolated() const override { return false; } read_ertr::future read_extent( - Transaction& t, laddr_t addr, extent_len_t len) override { - TRACET("reading {}B at {:#x} ...", t, len, addr); + Transaction& t, laddr_t addr) override { + TRACET("reading at {:#x} ...", t, addr); if constexpr (SYNC) { - return read_extent_sync(t, addr, len); + return read_extent_sync(t, addr); } else { using namespace std::chrono_literals; - return seastar::sleep(1us).then([this, &t, addr, len] { - return read_extent_sync(t, addr, len); + return seastar::sleep(1us).then([this, &t, addr] { + return read_extent_sync(t, addr); }); } } @@ -133,13 +133,12 @@ class DummyNodeExtentManager final: public NodeExtentManager { private: read_ertr::future read_extent_sync( - Transaction& t, laddr_t addr, extent_len_t len) { + Transaction& t, laddr_t addr) { auto iter = allocate_map.find(addr); assert(iter != allocate_map.end()); auto extent = iter->second; TRACET("read {}B at {:#x}", t, extent->get_length(), extent->get_laddr()); assert(extent->get_laddr() == addr); - assert(extent->get_length() == len); return read_ertr::make_ready_future(extent); } diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h index 3ca8e826325..60c6233088a 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h @@ -97,16 +97,16 @@ class SeastoreNodeExtentManager final: public TransactionManagerHandle { bool is_read_isolated() const override { return true; } read_ertr::future read_extent( - Transaction& t, laddr_t addr, extent_len_t len) override { - TRACET("reading {}B at {:#x} ...", t, len, addr); + Transaction& t, laddr_t addr) override { + TRACET("reading at {:#x} ...", t, addr); if constexpr (INJECT_EAGAIN) { if (trigger_eagain()) { - DEBUGT("reading {}B at {:#x}: trigger eagain", t, len, addr); + DEBUGT("reading at {:#x}: trigger eagain", t, addr); return crimson::ct_error::eagain::make(); } } - return tm.read_extent(t, addr, len - ).safe_then([addr, len, &t](auto&& e) { + return tm.read_extent(t, addr + ).safe_then([addr, &t](auto&& e) { TRACET("read {}B at {:#x} -- {}", t, e->get_length(), e->get_laddr(), *e); if (!e->is_valid()) { @@ -114,9 +114,7 @@ class SeastoreNodeExtentManager final: public TransactionManagerHandle { ceph_abort("fatal error"); } assert(e->get_laddr() == addr); - assert(e->get_length() == len); std::ignore = addr; - std::ignore = len; return NodeExtentRef(e); }); } diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout.h index ebd265d752d..ca54eb34764 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout.h @@ -69,16 +69,19 @@ class NodeLayoutT final : public InternalNodeImpl, public LeafNodeImpl { static eagain_future allocate( context_t c, bool is_level_tail, level_t level) { LOG_PREFIX(OTree::Layout::allocate); - // NOTE: Currently, all the node types have the same size for simplicity. - // But depending on the requirement, we may need to make node size - // configurable by field_type_t and node_type_t, or totally flexible. - return c.nm.alloc_extent(c.t, node_stage_t::EXTENT_SIZE + extent_len_t extent_size; + if constexpr (NODE_TYPE == node_type_t::LEAF) { + extent_size = c.vb.get_leaf_node_size(); + } else { + extent_size = c.vb.get_internal_node_size(); + } + return c.nm.alloc_extent(c.t, extent_size ).handle_error( eagain_ertr::pass_further{}, crimson::ct_error::input_output_error::handle( - [FNAME, c, is_level_tail, level] { - ERRORT("EIO -- node_size={}, is_level_tail={}, level={}", - c.t, node_stage_t::EXTENT_SIZE, is_level_tail, level); + [FNAME, c, extent_size, is_level_tail, level] { + ERRORT("EIO -- extent_size={}, is_level_tail={}, level={}", + c.t, extent_size, is_level_tail, level); ceph_abort("fatal error"); }) ).safe_then([is_level_tail, level](auto extent) { diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc index a61436a5213..3d9538d55aa 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc @@ -111,6 +111,10 @@ void validate_tree_config(const tree_conf_t& conf) string_key_view_t::VALID_UPPER_BOUND); ceph_assert(conf.max_oid_size < string_key_view_t::VALID_UPPER_BOUND); + ceph_assert(conf.internal_node_size <= MAX_NODE_SIZE); + ceph_assert(conf.internal_node_size % DISK_BLOCK_SIZE == 0); + ceph_assert(conf.leaf_node_size <= MAX_NODE_SIZE); + ceph_assert(conf.leaf_node_size % DISK_BLOCK_SIZE == 0); } } diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/value.h b/src/crimson/os/seastore/onode_manager/staged-fltree/value.h index 1833d4b2b2d..f91468d0432 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/value.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/value.h @@ -160,6 +160,8 @@ struct tree_conf_t { string_size_t max_ns_size; string_size_t max_oid_size; value_size_t max_value_payload_size; + extent_len_t internal_node_size; + extent_len_t leaf_node_size; }; class tree_cursor_t; @@ -258,6 +260,8 @@ struct ValueBuilder { virtual string_size_t get_max_ns_size() const = 0; virtual string_size_t get_max_oid_size() const = 0; virtual value_size_t get_max_value_payload_size() const = 0; + virtual extent_len_t get_internal_node_size() const = 0; + virtual extent_len_t get_leaf_node_size() const = 0; virtual std::unique_ptr build_value_recorder(ceph::bufferlist&) const = 0; }; @@ -285,6 +289,12 @@ struct ValueBuilderImpl final : public ValueBuilder { value_size_t get_max_value_payload_size() const override { return ValueImpl::TREE_CONF.max_value_payload_size; } + extent_len_t get_internal_node_size() const override { + return ValueImpl::TREE_CONF.internal_node_size; + } + extent_len_t get_leaf_node_size() const override { + return ValueImpl::TREE_CONF.leaf_node_size; + } std::unique_ptr build_value_recorder(ceph::bufferlist& encoded) const override { diff --git a/src/test/crimson/seastore/onode_tree/test_value.h b/src/test/crimson/seastore/onode_tree/test_value.h index fec4c54c851..cb509363837 100644 --- a/src/test/crimson/seastore/onode_tree/test_value.h +++ b/src/test/crimson/seastore/onode_tree/test_value.h @@ -39,14 +39,18 @@ inline std::ostream& operator<<(std::ostream& os, const test_item_t& item) { template + value_size_t MAX_VALUE_PAYLOAD_SIZE, + extent_len_t INTERNAL_NODE_SIZE, + extent_len_t LEAF_NODE_SIZE> class TestValue final : public Value { public: static constexpr tree_conf_t TREE_CONF = { MAGIC, MAX_NS_SIZE, MAX_OID_SIZE, - MAX_VALUE_PAYLOAD_SIZE + MAX_VALUE_PAYLOAD_SIZE, + INTERNAL_NODE_SIZE, + LEAF_NODE_SIZE }; using id_t = test_item_t::id_t; @@ -197,8 +201,8 @@ class TestValue final : public Value { }; using UnboundedValue = TestValue< - value_magic_t::TEST_UNBOUND, 4096, 4096, 4096>; + value_magic_t::TEST_UNBOUND, 4096, 4096, 4096, 4096, 4096>; using BoundedValue = TestValue< - value_magic_t::TEST_BOUNDED, 320, 320, 640>; + value_magic_t::TEST_BOUNDED, 320, 320, 640, 4096, 4096>; } -- 2.39.5