From 6fb880767869929b0ca940b502fa84b305f2d87b Mon Sep 17 00:00:00 2001 From: Yingxin Cheng Date: Wed, 2 Jun 2021 12:47:55 +0800 Subject: [PATCH] crimson/onode-staged-tree: implement size upper-bounds to ns/oid Signed-off-by: Yingxin Cheng --- .../staged-fltree/fltree_onode_manager.cc | 5 ---- .../staged-fltree/fltree_onode_manager.h | 4 ++- .../onode_manager/staged-fltree/fwd.h | 2 ++ .../staged-fltree/stages/key_layout.h | 11 -------- .../onode_manager/staged-fltree/tree.h | 24 ++++++++++++++-- .../onode_manager/staged-fltree/tree_utils.h | 7 ++++- .../onode_manager/staged-fltree/value.cc | 16 +++++++++-- .../onode_manager/staged-fltree/value.h | 25 +++++++++++++++-- .../seastore/onode_tree/test_staged_fltree.cc | 28 +++++++++---------- .../crimson/seastore/onode_tree/test_value.h | 12 +++++++- src/tools/crimson/perf_staged_fltree.cc | 2 +- 11 files changed, 94 insertions(+), 42 deletions(-) diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc index 95628b2ecf0..d995a83eaa0 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc @@ -4,7 +4,6 @@ #include "crimson/os/seastore/logging.h" #include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h" -#include "crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h" namespace crimson::os::seastore::onode { @@ -41,10 +40,6 @@ FLTreeOnodeManager::get_or_create_onode( const ghobject_t &hoid) { LOG_PREFIX(FLTreeOnodeManager::get_or_create_onode); - if (hoid.hobj.oid.name.length() + hoid.hobj.nspace.length() - > key_view_t::MAX_NS_OID_LENGTH) { - return crimson::ct_error::value_too_large::make(); - } return tree.insert( trans, hoid, OnodeTree::tree_value_config_t{sizeof(onode_layout_t)} diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h index fcb26b25b5e..a1f73e4a711 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h @@ -11,7 +11,9 @@ namespace crimson::os::seastore::onode { struct FLTreeOnode final : Onode, Value { static constexpr tree_conf_t TREE_CONF = { - value_magic_t::ONODE + value_magic_t::ONODE, + 128, // max_ns_size + 320 // max_oid_size }; enum class status_t { diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h b/src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h index 881d9c91be5..741c31efe1d 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h @@ -64,6 +64,8 @@ using node_offset_t = uint16_t; constexpr node_offset_t DISK_BLOCK_SIZE = 1u << 12; constexpr node_offset_t NODE_BLOCK_SIZE = DISK_BLOCK_SIZE * 1u; +using string_size_t = uint16_t; + enum class MatchKindBS : int8_t { NE = -1, EQ = 0 }; enum class MatchKindCMP : int8_t { LT = -1, EQ = 0, GT }; diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h index f8a9cc6b8ef..4eb03cfef2b 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h @@ -144,8 +144,6 @@ inline MatchKindCMP compare_to(const snap_gen_t& l, const snap_gen_t& r) { */ struct string_key_view_t { enum class Type {MIN, STR, MAX}; - // presumably the maximum string length is 2KiB - using string_size_t = uint16_t; static constexpr auto MARKER_MAX = std::numeric_limits::max(); static constexpr auto MARKER_MIN = std::numeric_limits::max() - 1; static constexpr auto VALID_UPPER_BOUND = std::numeric_limits::max() - 2; @@ -273,7 +271,6 @@ struct string_key_view_t { */ class string_view_masked_t { public: - using string_size_t = string_key_view_t::string_size_t; using Type = string_key_view_t::Type; explicit string_view_masked_t(const string_key_view_t& index) : type{index.type()} { @@ -392,7 +389,6 @@ inline std::ostream& operator<<(std::ostream& os, const string_view_masked_t& ma } struct ns_oid_view_t { - using string_size_t = string_key_view_t::string_size_t; using Type = string_key_view_t::Type; ns_oid_view_t(const char* p_end) : nspace(p_end), oid(nspace.p_next_end()) {} @@ -596,13 +592,6 @@ inline std::ostream& operator<<(std::ostream& os, const key_hobj_t& key) { */ class key_view_t { public: - //FIXME: the length of ns and oid should be defined by osd_max_object_name_len - // and object_max_object_namespace_len in the future - static constexpr int MAX_NS_OID_LENGTH = - (4096 - sizeof(onode_layout_t) * 2) / 4 - - sizeof(shard_pool_t) - sizeof(crush_t) - sizeof(snap_gen_t) - - 8; // size of length field of oid and ns - /** * common interfaces as a full_key_t */ diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/tree.h b/src/crimson/os/seastore/onode_manager/staged-fltree/tree.h index db7920c33f9..7fa41b03df6 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/tree.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/tree.h @@ -7,6 +7,7 @@ #include "common/hobject.h" #include "crimson/common/type_helpers.h" +#include "crimson/os/seastore/logging.h" #include "fwd.h" #include "node.h" @@ -79,8 +80,13 @@ class Btree { // XXX: return key_view_t to avoid unecessary ghobject_t constructions ghobject_t get_ghobj() const { assert(!is_end()); - return p_cursor->get_key_view( - p_tree->value_builder.get_header_magic()).to_ghobj(); + auto view = p_cursor->get_key_view( + p_tree->value_builder.get_header_magic()); + assert(view.nspace().size() <= + p_tree->value_builder.get_max_ns_size()); + assert(view.oid().size() <= + p_tree->value_builder.get_max_oid_size()); + return view.to_ghobj(); } ValueImpl value() { @@ -236,8 +242,20 @@ class Btree { struct tree_value_config_t { value_size_t payload_size = 256; }; - eagain_future> + using insert_ertr = eagain_ertr::extend< + crimson::ct_error::value_too_large>; + insert_ertr::future> insert(Transaction& t, const ghobject_t& obj, tree_value_config_t _vconf) { + if (obj.hobj.nspace.size() > value_builder.get_max_ns_size()) { + ERRORT("namespace size {} too large to insert {}", + t, obj.hobj.nspace.size(), key_hobj_t{obj}); + return crimson::ct_error::value_too_large::make(); + } + if (obj.hobj.oid.name.size() > value_builder.get_max_oid_size()) { + ERRORT("oid size {} too large to insert {}", + t, obj.hobj.oid.name.size(), key_hobj_t{obj}); + return crimson::ct_error::value_too_large::make(); + } value_config_t vconf{value_builder.get_header_magic(), _vconf.payload_size}; return seastar::do_with( full_key_t(obj), diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h b/src/crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h index b67b76d0d9d..dcf6ed1e927 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h @@ -323,7 +323,12 @@ class TreeBuilder { #else return eagain_ertr::make_ready_future(cursor); #endif - }); + }).handle_error( + [] (const crimson::ct_error::value_too_large& e) { + ceph_abort("impossible path"); + }, + crimson::ct_error::pass_further_all{} + ); } eagain_future<> insert(Transaction& t) { diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc b/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc index 547872fdf09..03e3f8792b3 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/value.cc @@ -5,6 +5,7 @@ #include "node.h" #include "node_delta_recorder.h" +#include "node_layout.h" // value implementations #include "test/crimson/seastore/onode_tree/test_value.h" @@ -88,8 +89,11 @@ build_value_recorder_by_type(ceph::bufferlist& encoded, case value_magic_t::ONODE: ret = std::make_unique(encoded); break; - case value_magic_t::TEST: - ret = std::make_unique(encoded); + case value_magic_t::TEST_UNBOUND: + ret = std::make_unique(encoded); + break; + case value_magic_t::TEST_BOUNDED: + ret = std::make_unique(encoded); break; default: ret = nullptr; @@ -99,4 +103,12 @@ build_value_recorder_by_type(ceph::bufferlist& encoded, return ret; } +void validate_tree_config(const tree_conf_t& conf) +{ + ceph_assert(conf.max_ns_size < + string_key_view_t::VALID_UPPER_BOUND); + ceph_assert(conf.max_oid_size < + string_key_view_t::VALID_UPPER_BOUND); +} + } diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/value.h b/src/crimson/os/seastore/onode_manager/staged-fltree/value.h index bce1424b760..2a0ef8a43e8 100644 --- a/src/crimson/os/seastore/onode_manager/staged-fltree/value.h +++ b/src/crimson/os/seastore/onode_manager/staged-fltree/value.h @@ -17,14 +17,17 @@ namespace crimson::os::seastore::onode { using value_size_t = uint16_t; enum class value_magic_t : uint8_t { ONODE = 0x52, - TEST, + TEST_UNBOUND, + TEST_BOUNDED, }; inline std::ostream& operator<<(std::ostream& os, const value_magic_t& magic) { switch (magic) { case value_magic_t::ONODE: return os << "ONODE"; - case value_magic_t::TEST: - return os << "TEST"; + case value_magic_t::TEST_UNBOUND: + return os << "TEST_UNBOUND"; + case value_magic_t::TEST_BOUNDED: + return os << "TEST_BOUNDED"; default: return os << "UNKNOWN(" << magic << ")"; } @@ -154,6 +157,8 @@ class ValueDeltaRecorder { */ struct tree_conf_t { value_magic_t value_magic; + string_size_t max_ns_size; + string_size_t max_oid_size; }; class tree_cursor_t; @@ -249,6 +254,8 @@ class Value { */ struct ValueBuilder { virtual value_magic_t get_header_magic() const = 0; + virtual string_size_t get_max_ns_size() const = 0; + virtual string_size_t get_max_oid_size() const = 0; virtual std::unique_ptr build_value_recorder(ceph::bufferlist&) const = 0; }; @@ -260,9 +267,19 @@ struct ValueBuilder { */ template struct ValueBuilderImpl final : public ValueBuilder { + ValueBuilderImpl() { + validate_tree_config(ValueImpl::TREE_CONF); + } + value_magic_t get_header_magic() const { return ValueImpl::TREE_CONF.value_magic; } + string_size_t get_max_ns_size() const override { + return ValueImpl::TREE_CONF.max_ns_size; + } + string_size_t get_max_oid_size() const override { + return ValueImpl::TREE_CONF.max_oid_size; + } std::unique_ptr build_value_recorder(ceph::bufferlist& encoded) const override { @@ -280,6 +297,8 @@ struct ValueBuilderImpl final : public ValueBuilder { } }; +void validate_tree_config(const tree_conf_t& conf); + /** * Get the value recorder by type (the magic value) when the ValueBuilder is * unavailable. diff --git a/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc b/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc index cd211db058a..cdc1f812e11 100644 --- a/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc +++ b/src/test/crimson/seastore/onode_tree/test_staged_fltree.cc @@ -26,6 +26,8 @@ namespace { constexpr bool IS_DUMMY_SYNC = false; using DummyManager = DummyNodeExtentManager; + using UnboundedBtree = Btree; + [[maybe_unused]] seastar::logger& logger() { return crimson::get_logger(ceph_subsys_test); } @@ -140,7 +142,7 @@ TEST_F(a_basic_test_t, 2_node_sizes) run_async([this] { auto nm = NodeExtentManager::create_dummy(IS_DUMMY_SYNC); auto t = make_test_transaction(); - ValueBuilderImpl vb; + ValueBuilderImpl vb; context_t c{*nm, vb, *t}; std::array, 16> nodes = { InternalNode0::allocate(c, false, 1u).unsafe_get0().make_pair(), @@ -171,15 +173,13 @@ TEST_F(a_basic_test_t, 2_node_sizes) }); } -using TestBtree = Btree; - struct b_dummy_tree_test_t : public seastar_test_suite_t { NodeExtentManagerURef moved_nm; TransactionRef ref_t; Transaction& t; - ValueBuilderImpl vb; + ValueBuilderImpl vb; context_t c; - TestBtree tree; + UnboundedBtree tree; b_dummy_tree_test_t() : moved_nm{NodeExtentManager::create_dummy(IS_DUMMY_SYNC)}, @@ -209,7 +209,7 @@ TEST_F(b_dummy_tree_test_t, 3_random_insert_erase_leaf_node) ASSERT_TRUE(tree.last(t).unsafe_get0().is_end()); std::map> insert_history; + std::tuple> insert_history; auto f_validate_insert_new = [this, &insert_history] ( const ghobject_t& key, const test_item_t& value) { @@ -496,7 +496,7 @@ class TestTree { // clone auto ref_dummy = NodeExtentManager::create_dummy(IS_DUMMY_SYNC); auto p_dummy = static_cast(ref_dummy.get()); - TestBtree tree_clone(std::move(ref_dummy)); + UnboundedBtree tree_clone(std::move(ref_dummy)); auto ref_t_clone = make_test_transaction(); Transaction& t_clone = *ref_t_clone; tree_clone.test_clone_from(t_clone, t, tree).unsafe_get0(); @@ -577,12 +577,12 @@ class TestTree { NodeExtentManagerURef moved_nm; TransactionRef ref_t; Transaction& t; - ValueBuilderImpl vb; + ValueBuilderImpl vb; context_t c; - TestBtree tree; + UnboundedBtree tree; Values values; std::map> insert_history; + std::tuple> insert_history; }; struct c_dummy_test_t : public seastar_test_suite_t {}; @@ -1239,9 +1239,9 @@ class DummyChildPool { Transaction& t() const { return *ref_t; } std::set> tracked_children; - std::optional p_btree; + std::optional p_btree; DummyManager* p_dummy = nullptr; - ValueBuilderImpl vb; + ValueBuilderImpl vb; TransactionRef ref_t = make_test_transaction(); std::random_device rd; @@ -1526,7 +1526,7 @@ TEST_F(d_seastore_tm_test_t, 6_random_tree_insert_erase) auto moved_nm = (TEST_SEASTORE ? NodeExtentManager::create_seastore(*tm) : NodeExtentManager::create_dummy(IS_DUMMY_SYNC)); auto p_nm = moved_nm.get(); - auto tree = std::make_unique>( + auto tree = std::make_unique>( kvs, std::move(moved_nm)); { auto t = tm->create_transaction(); @@ -1623,7 +1623,7 @@ TEST_F(d_seastore_tm_test_t, 7_tree_insert_erase_eagain) auto moved_nm = NodeExtentManager::create_seastore( *tm, L_ADDR_MIN, EAGAIN_PROBABILITY); auto p_nm = static_cast*>(moved_nm.get()); - auto tree = std::make_unique>( + auto tree = std::make_unique>( kvs, std::move(moved_nm)); unsigned num_ops = 0; unsigned num_ops_eagain = 0; diff --git a/src/test/crimson/seastore/onode_tree/test_value.h b/src/test/crimson/seastore/onode_tree/test_value.h index f5aed8a20d3..6501047e671 100644 --- a/src/test/crimson/seastore/onode_tree/test_value.h +++ b/src/test/crimson/seastore/onode_tree/test_value.h @@ -36,10 +36,15 @@ inline std::ostream& operator<<(std::ostream& os, const test_item_t& item) { return os << "TestItem(#" << item.id << ", " << item.size << "B)"; } +template class TestValue final : public Value { public: static constexpr tree_conf_t TREE_CONF = { - value_magic_t::TEST + MAGIC, + MAX_NS_SIZE, + MAX_OID_SIZE }; using id_t = test_item_t::id_t; @@ -189,4 +194,9 @@ class TestValue final : public Value { } }; +using UnboundedValue = TestValue< + value_magic_t::TEST_UNBOUND, 4096, 4096>; +using BoundedValue = TestValue< + value_magic_t::TEST_BOUNDED, 320, 320>; + } diff --git a/src/tools/crimson/perf_staged_fltree.cc b/src/tools/crimson/perf_staged_fltree.cc index 3c4be140174..fabaea868c3 100644 --- a/src/tools/crimson/perf_staged_fltree.cc +++ b/src/tools/crimson/perf_staged_fltree.cc @@ -30,7 +30,7 @@ class PerfTree : public TMTestState { seastar::future<> run(KVPool& kvs, double erase_ratio) { return tm_setup().then([this, &kvs, erase_ratio] { return seastar::async([this, &kvs, erase_ratio] { - auto tree = std::make_unique>(kvs, + auto tree = std::make_unique>(kvs, (is_dummy ? NodeExtentManager::create_dummy(true) : NodeExtentManager::create_seastore(*tm))); { -- 2.39.5