]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/onode-staged-tree: implement size upper-bounds to ns/oid
authorYingxin Cheng <yingxin.cheng@intel.com>
Wed, 2 Jun 2021 04:47:55 +0000 (12:47 +0800)
committerYingxin Cheng <yingxin.cheng@intel.com>
Fri, 11 Jun 2021 14:43:58 +0000 (22:43 +0800)
Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc
src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h
src/crimson/os/seastore/onode_manager/staged-fltree/fwd.h
src/crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h
src/crimson/os/seastore/onode_manager/staged-fltree/tree.h
src/crimson/os/seastore/onode_manager/staged-fltree/tree_utils.h
src/crimson/os/seastore/onode_manager/staged-fltree/value.cc
src/crimson/os/seastore/onode_manager/staged-fltree/value.h
src/test/crimson/seastore/onode_tree/test_staged_fltree.cc
src/test/crimson/seastore/onode_tree/test_value.h
src/tools/crimson/perf_staged_fltree.cc

index 95628b2ecf0e49c20865cf348b01478d02f56052..d995a83eaa0fd6e8cf65e569c4cac4fb5885f99e 100644 (file)
@@ -4,7 +4,6 @@
 #include "crimson/os/seastore/logging.h"
 
 #include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h"
-#include "crimson/os/seastore/onode_manager/staged-fltree/stages/key_layout.h"
 
 namespace crimson::os::seastore::onode {
 
@@ -41,10 +40,6 @@ FLTreeOnodeManager::get_or_create_onode(
   const ghobject_t &hoid)
 {
   LOG_PREFIX(FLTreeOnodeManager::get_or_create_onode);
-  if (hoid.hobj.oid.name.length() + hoid.hobj.nspace.length()
-      > key_view_t::MAX_NS_OID_LENGTH) {
-    return crimson::ct_error::value_too_large::make();
-  }
   return tree.insert(
     trans, hoid,
     OnodeTree::tree_value_config_t{sizeof(onode_layout_t)}
index fcb26b25b5ef5d1bc816c0409b19983638cc688b..a1f73e4a711685ebfe82c0b664514d98c2ca4f6e 100644 (file)
@@ -11,7 +11,9 @@ namespace crimson::os::seastore::onode {
 
 struct FLTreeOnode final : Onode, Value {
   static constexpr tree_conf_t TREE_CONF = {
-    value_magic_t::ONODE
+    value_magic_t::ONODE,
+    128,        // max_ns_size
+    320         // max_oid_size
   };
 
   enum class status_t {
index 881d9c91be5e6a828b839818e71ad3d30d154db5..741c31efe1dfcbe750525e06cdc22a085a9aff2d 100644 (file)
@@ -64,6 +64,8 @@ using node_offset_t = uint16_t;
 constexpr node_offset_t DISK_BLOCK_SIZE = 1u << 12;
 constexpr node_offset_t NODE_BLOCK_SIZE = DISK_BLOCK_SIZE * 1u;
 
+using string_size_t = uint16_t;
+
 enum class MatchKindBS : int8_t { NE = -1, EQ = 0 };
 
 enum class MatchKindCMP : int8_t { LT = -1, EQ = 0, GT };
index f8a9cc6b8eff706000cd22b9c013e40a245d97df..4eb03cfef2b993d94a0c3cbf568bfa34704567d2 100644 (file)
@@ -144,8 +144,6 @@ inline MatchKindCMP compare_to(const snap_gen_t& l, const snap_gen_t& r) {
  */
 struct string_key_view_t {
   enum class Type {MIN, STR, MAX};
-  // presumably the maximum string length is 2KiB
-  using string_size_t = uint16_t;
   static constexpr auto MARKER_MAX = std::numeric_limits<string_size_t>::max();
   static constexpr auto MARKER_MIN = std::numeric_limits<string_size_t>::max() - 1;
   static constexpr auto VALID_UPPER_BOUND = std::numeric_limits<string_size_t>::max() - 2;
@@ -273,7 +271,6 @@ struct string_key_view_t {
  */
 class string_view_masked_t {
  public:
-  using string_size_t = string_key_view_t::string_size_t;
   using Type = string_key_view_t::Type;
   explicit string_view_masked_t(const string_key_view_t& index)
       : type{index.type()} {
@@ -392,7 +389,6 @@ inline std::ostream& operator<<(std::ostream& os, const string_view_masked_t& ma
 }
 
 struct ns_oid_view_t {
-  using string_size_t = string_key_view_t::string_size_t;
   using Type = string_key_view_t::Type;
 
   ns_oid_view_t(const char* p_end) : nspace(p_end), oid(nspace.p_next_end()) {}
@@ -596,13 +592,6 @@ inline std::ostream& operator<<(std::ostream& os, const key_hobj_t& key) {
  */
 class key_view_t {
  public:
-  //FIXME: the length of ns and oid should be defined by osd_max_object_name_len
-  //       and object_max_object_namespace_len in the future
-  static constexpr int MAX_NS_OID_LENGTH =
-    (4096 - sizeof(onode_layout_t) * 2) / 4
-    - sizeof(shard_pool_t) - sizeof(crush_t) - sizeof(snap_gen_t)
-    - 8; // size of length field of oid and ns
-
   /**
    * common interfaces as a full_key_t
    */
index db7920c33f996d38639a76efab14f2620252e7f3..7fa41b03df6b6ab4ebb192b48c71813a890f3f42 100644 (file)
@@ -7,6 +7,7 @@
 
 #include "common/hobject.h"
 #include "crimson/common/type_helpers.h"
+#include "crimson/os/seastore/logging.h"
 
 #include "fwd.h"
 #include "node.h"
@@ -79,8 +80,13 @@ class Btree {
     // XXX: return key_view_t to avoid unecessary ghobject_t constructions
     ghobject_t get_ghobj() const {
       assert(!is_end());
-      return p_cursor->get_key_view(
-          p_tree->value_builder.get_header_magic()).to_ghobj();
+      auto view = p_cursor->get_key_view(
+          p_tree->value_builder.get_header_magic());
+      assert(view.nspace().size() <=
+             p_tree->value_builder.get_max_ns_size());
+      assert(view.oid().size() <=
+             p_tree->value_builder.get_max_oid_size());
+      return view.to_ghobj();
     }
 
     ValueImpl value() {
@@ -236,8 +242,20 @@ class Btree {
   struct tree_value_config_t {
     value_size_t payload_size = 256;
   };
-  eagain_future<std::pair<Cursor, bool>>
+  using insert_ertr = eagain_ertr::extend<
+    crimson::ct_error::value_too_large>;
+  insert_ertr::future<std::pair<Cursor, bool>>
   insert(Transaction& t, const ghobject_t& obj, tree_value_config_t _vconf) {
+    if (obj.hobj.nspace.size() > value_builder.get_max_ns_size()) {
+      ERRORT("namespace size {} too large to insert {}",
+             t, obj.hobj.nspace.size(), key_hobj_t{obj});
+      return crimson::ct_error::value_too_large::make();
+    }
+    if (obj.hobj.oid.name.size() > value_builder.get_max_oid_size()) {
+      ERRORT("oid size {} too large to insert {}",
+             t, obj.hobj.oid.name.size(), key_hobj_t{obj});
+      return crimson::ct_error::value_too_large::make();
+    }
     value_config_t vconf{value_builder.get_header_magic(), _vconf.payload_size};
     return seastar::do_with(
       full_key_t<KeyT::HOBJ>(obj),
index b67b76d0d9def6a1efdaa59a5682a9ec9f643c30..dcf6ed1e927ade4320f8799b2f71df632b8e222a 100644 (file)
@@ -323,7 +323,12 @@ class TreeBuilder {
 #else
       return eagain_ertr::make_ready_future<BtreeCursor>(cursor);
 #endif
-    });
+    }).handle_error(
+      [] (const crimson::ct_error::value_too_large& e) {
+        ceph_abort("impossible path");
+      },
+      crimson::ct_error::pass_further_all{}
+    );
   }
 
   eagain_future<> insert(Transaction& t) {
index 547872fdf09d62ca762f5dc447531fced2e56c3b..03e3f8792b3cfe699a376afd445ba5289d6b4748 100644 (file)
@@ -5,6 +5,7 @@
 
 #include "node.h"
 #include "node_delta_recorder.h"
+#include "node_layout.h"
 
 // value implementations
 #include "test/crimson/seastore/onode_tree/test_value.h"
@@ -88,8 +89,11 @@ build_value_recorder_by_type(ceph::bufferlist& encoded,
   case value_magic_t::ONODE:
     ret = std::make_unique<FLTreeOnode::Recorder>(encoded);
     break;
-  case value_magic_t::TEST:
-    ret = std::make_unique<TestValue::Recorder>(encoded);
+  case value_magic_t::TEST_UNBOUND:
+    ret = std::make_unique<UnboundedValue::Recorder>(encoded);
+    break;
+  case value_magic_t::TEST_BOUNDED:
+    ret = std::make_unique<BoundedValue::Recorder>(encoded);
     break;
   default:
     ret = nullptr;
@@ -99,4 +103,12 @@ build_value_recorder_by_type(ceph::bufferlist& encoded,
   return ret;
 }
 
+void validate_tree_config(const tree_conf_t& conf)
+{
+  ceph_assert(conf.max_ns_size <
+              string_key_view_t::VALID_UPPER_BOUND);
+  ceph_assert(conf.max_oid_size <
+              string_key_view_t::VALID_UPPER_BOUND);
+}
+
 }
index bce1424b760a224452f67dee824f4171baf549a4..2a0ef8a43e8701b6e306c4fbbc977730da5321f8 100644 (file)
@@ -17,14 +17,17 @@ namespace crimson::os::seastore::onode {
 using value_size_t = uint16_t;
 enum class value_magic_t : uint8_t {
   ONODE = 0x52,
-  TEST,
+  TEST_UNBOUND,
+  TEST_BOUNDED,
 };
 inline std::ostream& operator<<(std::ostream& os, const value_magic_t& magic) {
   switch (magic) {
   case value_magic_t::ONODE:
     return os << "ONODE";
-  case value_magic_t::TEST:
-    return os << "TEST";
+  case value_magic_t::TEST_UNBOUND:
+    return os << "TEST_UNBOUND";
+  case value_magic_t::TEST_BOUNDED:
+    return os << "TEST_BOUNDED";
   default:
     return os << "UNKNOWN(" << magic << ")";
   }
@@ -154,6 +157,8 @@ class ValueDeltaRecorder {
  */
 struct tree_conf_t {
   value_magic_t value_magic;
+  string_size_t max_ns_size;
+  string_size_t max_oid_size;
 };
 
 class tree_cursor_t;
@@ -249,6 +254,8 @@ class Value {
  */
 struct ValueBuilder {
   virtual value_magic_t get_header_magic() const = 0;
+  virtual string_size_t get_max_ns_size() const = 0;
+  virtual string_size_t get_max_oid_size() const = 0;
   virtual std::unique_ptr<ValueDeltaRecorder>
   build_value_recorder(ceph::bufferlist&) const = 0;
 };
@@ -260,9 +267,19 @@ struct ValueBuilder {
  */
 template <typename ValueImpl>
 struct ValueBuilderImpl final : public ValueBuilder {
+  ValueBuilderImpl() {
+    validate_tree_config(ValueImpl::TREE_CONF);
+  }
+
   value_magic_t get_header_magic() const {
     return ValueImpl::TREE_CONF.value_magic;
   }
+  string_size_t get_max_ns_size() const override {
+    return ValueImpl::TREE_CONF.max_ns_size;
+  }
+  string_size_t get_max_oid_size() const override {
+    return ValueImpl::TREE_CONF.max_oid_size;
+  }
 
   std::unique_ptr<ValueDeltaRecorder>
   build_value_recorder(ceph::bufferlist& encoded) const override {
@@ -280,6 +297,8 @@ struct ValueBuilderImpl final : public ValueBuilder {
   }
 };
 
+void validate_tree_config(const tree_conf_t& conf);
+
 /**
  * Get the value recorder by type (the magic value) when the ValueBuilder is
  * unavailable.
index cd211db058a9ce12613a48bfbb5b1a0542c899f8..cdc1f812e110db35803f43519224189e5fae1709 100644 (file)
@@ -26,6 +26,8 @@ namespace {
   constexpr bool IS_DUMMY_SYNC = false;
   using DummyManager = DummyNodeExtentManager<IS_DUMMY_SYNC>;
 
+  using UnboundedBtree = Btree<UnboundedValue>;
+
   [[maybe_unused]] seastar::logger& logger() {
     return crimson::get_logger(ceph_subsys_test);
   }
@@ -140,7 +142,7 @@ TEST_F(a_basic_test_t, 2_node_sizes)
   run_async([this] {
     auto nm = NodeExtentManager::create_dummy(IS_DUMMY_SYNC);
     auto t = make_test_transaction();
-    ValueBuilderImpl<TestValue> vb;
+    ValueBuilderImpl<UnboundedValue> vb;
     context_t c{*nm, vb, *t};
     std::array<std::pair<NodeImplURef, NodeExtentMutable>, 16> nodes = {
       InternalNode0::allocate(c, false, 1u).unsafe_get0().make_pair(),
@@ -171,15 +173,13 @@ TEST_F(a_basic_test_t, 2_node_sizes)
   });
 }
 
-using TestBtree = Btree<TestValue>;
-
 struct b_dummy_tree_test_t : public seastar_test_suite_t {
   NodeExtentManagerURef moved_nm;
   TransactionRef ref_t;
   Transaction& t;
-  ValueBuilderImpl<TestValue> vb;
+  ValueBuilderImpl<UnboundedValue> vb;
   context_t c;
-  TestBtree tree;
+  UnboundedBtree tree;
 
   b_dummy_tree_test_t()
     : moved_nm{NodeExtentManager::create_dummy(IS_DUMMY_SYNC)},
@@ -209,7 +209,7 @@ TEST_F(b_dummy_tree_test_t, 3_random_insert_erase_leaf_node)
     ASSERT_TRUE(tree.last(t).unsafe_get0().is_end());
 
     std::map<ghobject_t,
-             std::tuple<test_item_t, TestBtree::Cursor>> insert_history;
+             std::tuple<test_item_t, UnboundedBtree::Cursor>> insert_history;
 
     auto f_validate_insert_new = [this, &insert_history] (
         const ghobject_t& key, const test_item_t& value) {
@@ -496,7 +496,7 @@ class TestTree {
       // clone
       auto ref_dummy = NodeExtentManager::create_dummy(IS_DUMMY_SYNC);
       auto p_dummy = static_cast<DummyManager*>(ref_dummy.get());
-      TestBtree tree_clone(std::move(ref_dummy));
+      UnboundedBtree tree_clone(std::move(ref_dummy));
       auto ref_t_clone = make_test_transaction();
       Transaction& t_clone = *ref_t_clone;
       tree_clone.test_clone_from(t_clone, t, tree).unsafe_get0();
@@ -577,12 +577,12 @@ class TestTree {
   NodeExtentManagerURef moved_nm;
   TransactionRef ref_t;
   Transaction& t;
-  ValueBuilderImpl<TestValue> vb;
+  ValueBuilderImpl<UnboundedValue> vb;
   context_t c;
-  TestBtree tree;
+  UnboundedBtree tree;
   Values<test_item_t> values;
   std::map<ghobject_t,
-           std::tuple<test_item_t, TestBtree::Cursor>> insert_history;
+           std::tuple<test_item_t, UnboundedBtree::Cursor>> insert_history;
 };
 
 struct c_dummy_test_t : public seastar_test_suite_t {};
@@ -1239,9 +1239,9 @@ class DummyChildPool {
   Transaction& t() const { return *ref_t; }
 
   std::set<Ref<DummyChild>> tracked_children;
-  std::optional<TestBtree> p_btree;
+  std::optional<UnboundedBtree> p_btree;
   DummyManager* p_dummy = nullptr;
-  ValueBuilderImpl<TestValue> vb;
+  ValueBuilderImpl<UnboundedValue> vb;
   TransactionRef ref_t = make_test_transaction();
 
   std::random_device rd;
@@ -1526,7 +1526,7 @@ TEST_F(d_seastore_tm_test_t, 6_random_tree_insert_erase)
     auto moved_nm = (TEST_SEASTORE ? NodeExtentManager::create_seastore(*tm)
                                    : NodeExtentManager::create_dummy(IS_DUMMY_SYNC));
     auto p_nm = moved_nm.get();
-    auto tree = std::make_unique<TreeBuilder<TRACK_CURSORS, TestValue>>(
+    auto tree = std::make_unique<TreeBuilder<TRACK_CURSORS, BoundedValue>>(
         kvs, std::move(moved_nm));
     {
       auto t = tm->create_transaction();
@@ -1623,7 +1623,7 @@ TEST_F(d_seastore_tm_test_t, 7_tree_insert_erase_eagain)
     auto moved_nm = NodeExtentManager::create_seastore(
         *tm, L_ADDR_MIN, EAGAIN_PROBABILITY);
     auto p_nm = static_cast<SeastoreNodeExtentManager<true>*>(moved_nm.get());
-    auto tree = std::make_unique<TreeBuilder<TRACK_CURSORS, TestValue>>(
+    auto tree = std::make_unique<TreeBuilder<TRACK_CURSORS, BoundedValue>>(
         kvs, std::move(moved_nm));
     unsigned num_ops = 0;
     unsigned num_ops_eagain = 0;
index f5aed8a20d3c5e80867de6da3f5f2b74a135bd76..6501047e67162c38c17255bc5d0408dfcdbff275 100644 (file)
@@ -36,10 +36,15 @@ inline std::ostream& operator<<(std::ostream& os, const test_item_t& item) {
   return os << "TestItem(#" << item.id << ", " << item.size << "B)";
 }
 
+template <value_magic_t MAGIC,
+          string_size_t MAX_NS_SIZE,
+          string_size_t MAX_OID_SIZE>
 class TestValue final : public Value {
  public:
   static constexpr tree_conf_t TREE_CONF = {
-    value_magic_t::TEST
+    MAGIC,
+    MAX_NS_SIZE,
+    MAX_OID_SIZE
   };
 
   using id_t = test_item_t::id_t;
@@ -189,4 +194,9 @@ class TestValue final : public Value {
   }
 };
 
+using UnboundedValue = TestValue<
+  value_magic_t::TEST_UNBOUND, 4096, 4096>;
+using BoundedValue   = TestValue<
+  value_magic_t::TEST_BOUNDED,  320,  320>;
+
 }
index 3c4be1401743028d7fd403b04922966fc69b578d..fabaea868c344d0d07d90689e1bc8b2b30e06f61 100644 (file)
@@ -30,7 +30,7 @@ class PerfTree : public TMTestState {
   seastar::future<> run(KVPool<test_item_t>& kvs, double erase_ratio) {
     return tm_setup().then([this, &kvs, erase_ratio] {
       return seastar::async([this, &kvs, erase_ratio] {
-        auto tree = std::make_unique<TreeBuilder<TRACK, TestValue>>(kvs,
+        auto tree = std::make_unique<TreeBuilder<TRACK, BoundedValue>>(kvs,
             (is_dummy ? NodeExtentManager::create_dummy(true)
                       : NodeExtentManager::create_seastore(*tm)));
         {