From 6f21d915b2e9f392278f1eedf4b62e191300e7f2 Mon Sep 17 00:00:00 2001 From: chunmei-liu Date: Thu, 14 Apr 2022 22:47:30 -0700 Subject: [PATCH] crimson/seastore: enlarge omap_leaf_node size to avoid double split when key size around 2K. Signed-off-by: chunmei-liu --- src/crimson/os/seastore/omap_manager.h | 3 ++- .../omap_manager/btree/btree_omap_manager.cc | 5 +++-- .../btree/omap_btree_node_impl.cc | 22 +++++++++++-------- .../btree/string_kv_node_layout.h | 22 ++++++++++--------- 4 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/crimson/os/seastore/omap_manager.h b/src/crimson/os/seastore/omap_manager.h index 4107cb2d40125..6211d0bf0aefd 100644 --- a/src/crimson/os/seastore/omap_manager.h +++ b/src/crimson/os/seastore/omap_manager.h @@ -14,7 +14,8 @@ #include "crimson/os/seastore/seastore_types.h" #include "crimson/os/seastore/transaction_manager.h" -#define OMAP_BLOCK_SIZE 4096 +#define OMAP_INNER_BLOCK_SIZE 4096 +#define OMAP_LEAF_BLOCK_SIZE 8192 namespace crimson::os::seastore { diff --git a/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc b/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc index e666879b1e111..fcf069b67abf9 100644 --- a/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc +++ b/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc @@ -26,7 +26,7 @@ BtreeOMapManager::initialize_omap(Transaction &t, laddr_t hint) { logger().debug("{}", __func__); - return tm.alloc_extent(t, hint, OMAP_BLOCK_SIZE) + return tm.alloc_extent(t, hint, OMAP_LEAF_BLOCK_SIZE) .si_then([hint](auto&& root_extent) { root_extent->set_size(0); omap_node_meta_t meta{1}; @@ -51,7 +51,8 @@ BtreeOMapManager::handle_root_split( omap_root_t &omap_root, const OMapNode::mutation_result_t& mresult) { - return oc.tm.alloc_extent(oc.t, omap_root.hint, OMAP_BLOCK_SIZE) + return oc.tm.alloc_extent(oc.t, omap_root.hint, + OMAP_INNER_BLOCK_SIZE) .si_then([&omap_root, mresult](auto&& nroot) -> handle_root_split_ret { auto [left, right, pivot] = *(mresult.split_tuple); omap_node_meta_t meta{omap_root.depth + 1}; diff --git a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc index 98f12d100bcd9..dafe3222d1b2d 100644 --- a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc +++ b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc @@ -268,7 +268,8 @@ OMapInnerNode::split_children_ret OMapInnerNode:: make_split_children(omap_context_t oc) { logger().debug("OMapInnerNode: {}", __func__); - return oc.tm.alloc_extents(oc.t, oc.hint, OMAP_BLOCK_SIZE, 2) + return oc.tm.alloc_extents(oc.t, oc.hint, + OMAP_INNER_BLOCK_SIZE, 2) .si_then([this] (auto &&ext_pair) { auto left = ext_pair.front(); auto right = ext_pair.back(); @@ -282,7 +283,8 @@ OMapInnerNode::full_merge_ret OMapInnerNode::make_full_merge(omap_context_t oc, OMapNodeRef right) { logger().debug("OMapInnerNode: {}", __func__); - return oc.tm.alloc_extent(oc.t, oc.hint, OMAP_BLOCK_SIZE) + return oc.tm.alloc_extent(oc.t, oc.hint, + OMAP_INNER_BLOCK_SIZE) .si_then([this, right] (auto &&replacement) { replacement->merge_from(*this, *right->cast()); return full_merge_ret( @@ -296,7 +298,8 @@ OMapInnerNode::make_balanced(omap_context_t oc, OMapNodeRef _right) { logger().debug("OMapInnerNode: {}", __func__); ceph_assert(_right->get_type() == TYPE); - return oc.tm.alloc_extents(oc.t, oc.hint, OMAP_BLOCK_SIZE, 2) + return oc.tm.alloc_extents(oc.t, oc.hint, + OMAP_INNER_BLOCK_SIZE, 2) .si_then([this, _right] (auto &&replacement_pair){ auto replacement_left = replacement_pair.front(); auto replacement_right = replacement_pair.back(); @@ -557,7 +560,7 @@ OMapLeafNode::split_children_ret OMapLeafNode::make_split_children(omap_context_t oc) { logger().debug("OMapLeafNode: {}", __func__); - return oc.tm.alloc_extents(oc.t, oc.hint, OMAP_BLOCK_SIZE, 2) + return oc.tm.alloc_extents(oc.t, oc.hint, OMAP_LEAF_BLOCK_SIZE, 2) .si_then([this] (auto &&ext_pair) { auto left = ext_pair.front(); auto right = ext_pair.back(); @@ -572,7 +575,7 @@ OMapLeafNode::make_full_merge(omap_context_t oc, OMapNodeRef right) { ceph_assert(right->get_type() == TYPE); logger().debug("OMapLeafNode: {}", __func__); - return oc.tm.alloc_extent(oc.t, oc.hint, OMAP_BLOCK_SIZE) + return oc.tm.alloc_extent(oc.t, oc.hint, OMAP_LEAF_BLOCK_SIZE) .si_then([this, right] (auto &&replacement) { replacement->merge_from(*this, *right->cast()); return full_merge_ret( @@ -586,7 +589,7 @@ OMapLeafNode::make_balanced(omap_context_t oc, OMapNodeRef _right) { ceph_assert(_right->get_type() == TYPE); logger().debug("OMapLeafNode: {}", __func__); - return oc.tm.alloc_extents(oc.t, oc.hint, OMAP_BLOCK_SIZE, 2) + return oc.tm.alloc_extents(oc.t, oc.hint, OMAP_LEAF_BLOCK_SIZE, 2) .si_then([this, _right] (auto &&replacement_pair) { auto replacement_left = replacement_pair.front(); auto replacement_right = replacement_pair.back(); @@ -607,8 +610,9 @@ omap_load_extent(omap_context_t oc, laddr_t laddr, depth_t depth) { ceph_assert(depth > 0); if (depth > 1) { - return oc.tm.read_extent(oc.t, laddr, OMAP_BLOCK_SIZE - ).handle_error_interruptible( + return oc.tm.read_extent(oc.t, laddr, + OMAP_INNER_BLOCK_SIZE) + .handle_error_interruptible( omap_load_extent_iertr::pass_further{}, crimson::ct_error::assert_all{ "Invalid error in omap_load_extent" } ).si_then( @@ -616,7 +620,7 @@ omap_load_extent(omap_context_t oc, laddr_t laddr, depth_t depth) return seastar::make_ready_future(std::move(e)); }); } else { - return oc.tm.read_extent(oc.t, laddr, OMAP_BLOCK_SIZE + return oc.tm.read_extent(oc.t, laddr, OMAP_LEAF_BLOCK_SIZE ).handle_error_interruptible( omap_load_extent_iertr::pass_further{}, crimson::ct_error::assert_all{ "Invalid error in omap_load_extent" } diff --git a/src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h b/src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h index 1bd95415a48a1..a2e633fe5b04f 100644 --- a/src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h +++ b/src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h @@ -401,7 +401,7 @@ public: return get_node_key().key_off; } auto get_node_val_ptr() const { - auto tail = node->buf + OMAP_BLOCK_SIZE; + auto tail = node->buf + OMAP_INNER_BLOCK_SIZE; if (*this == node->iter_end()) return tail; else { @@ -416,7 +416,7 @@ public: return (*this - 1)->get_node_val_offset(); } auto get_right_ptr_end() const { - return node->buf + OMAP_BLOCK_SIZE - get_right_offset_end(); + return node->buf + OMAP_INNER_BLOCK_SIZE - get_right_offset_end(); } void update_offset(int offset) { @@ -438,8 +438,8 @@ public: static_assert(!is_const); assert(str.size() == get_node_key().key_len); assert(get_node_key().key_off >= str.size()); - assert(get_node_key().key_off < OMAP_BLOCK_SIZE); - assert(str.size() < OMAP_BLOCK_SIZE); + assert(get_node_key().key_off < OMAP_INNER_BLOCK_SIZE); + assert(str.size() < OMAP_INNER_BLOCK_SIZE); ::memcpy(get_node_val_ptr(), str.data(), str.size()); } @@ -656,8 +656,9 @@ public: } uint16_t capacity() const { - return OMAP_BLOCK_SIZE - (reinterpret_cast(layout.template Pointer<2>(buf))- - reinterpret_cast(layout.template Pointer<0>(buf))); + return OMAP_INNER_BLOCK_SIZE + - (reinterpret_cast(layout.template Pointer<2>(buf)) + - reinterpret_cast(layout.template Pointer<0>(buf))); } bool is_overflow(size_t ksize) const { @@ -1007,7 +1008,7 @@ public: return get_node_key().key_off; } auto get_node_val_ptr() const { - auto tail = node->buf + OMAP_BLOCK_SIZE; + auto tail = node->buf + OMAP_LEAF_BLOCK_SIZE; if (*this == node->iter_end()) return tail; else { @@ -1022,7 +1023,7 @@ public: return (*this - 1)->get_node_val_offset(); } auto get_right_ptr_end() const { - return node->buf + OMAP_BLOCK_SIZE - get_right_offset_end(); + return node->buf + OMAP_LEAF_BLOCK_SIZE - get_right_offset_end(); } void update_offset(int offset) { @@ -1262,8 +1263,9 @@ public: } uint32_t capacity() const { - return OMAP_BLOCK_SIZE - (reinterpret_cast(layout.template Pointer<2>(buf))- - reinterpret_cast(layout.template Pointer<0>(buf))); + return OMAP_LEAF_BLOCK_SIZE + - (reinterpret_cast(layout.template Pointer<2>(buf)) + - reinterpret_cast(layout.template Pointer<0>(buf))); } bool is_overflow(size_t ksize, size_t vsize) const { -- 2.39.5