]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: make onode data/metadata laddr space reservation configurable 44235/head
authorXuehan Xu <xxhdx1985126@gmail.com>
Sun, 12 Dec 2021 08:43:30 +0000 (16:43 +0800)
committerXuehan Xu <xxhdx1985126@gmail.com>
Tue, 14 Dec 2021 06:55:26 +0000 (14:55 +0800)
Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
src/common/options/crimson.yaml.in
src/crimson/os/seastore/object_data_handler.cc
src/crimson/os/seastore/object_data_handler.h
src/crimson/os/seastore/onode.h
src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.cc
src/crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h
src/crimson/os/seastore/seastore.cc
src/crimson/os/seastore/seastore.h
src/test/crimson/seastore/test_object_data_handler.cc

index d8072a94677c97c29539de9152ab8031dd08411a..d110842c759559d9cf460c7d77a1d7f61f43951c 100644 (file)
@@ -65,3 +65,13 @@ options:
   level: dev
   desc: The record fullness threshold to flush a journal batch
   default: 0.95
+- name: seastore_default_max_object_size
+  type: uint
+  level: dev
+  desc: default logical address space reservation for seastore objects' data
+  default: 16777216
+- name: seastore_default_object_metadata_reservation
+  type: uint
+  level: dev
+  desc: default logical address space reservation for seastore objects' metadata
+  default: 16777216
index a3367d0668fc106615d7831fe3c308362d51428c..2b91cf4835b813701f2b3be2ebc35d7ece3716fa 100644 (file)
@@ -15,16 +15,6 @@ namespace {
 }
 
 namespace crimson::os::seastore {
-
-/**
- * MAX_OBJECT_SIZE
- *
- * For now, we allocate a fixed region of laddr space of size MAX_OBJECT_SIZE
- * for any object.  In the future, once we have the ability to remap logical
- * mappings (necessary for clone), we'll add the ability to grow and shrink
- * these regions and remove this assumption.
- */
-static constexpr extent_len_t MAX_OBJECT_SIZE = Onode::DEFAULT_DATA_RESERVATION;
 #define assert_aligned(x) ceph_assert(((x)%ctx.tm.get_block_size()) == 0)
 
 using context_t = ObjectDataHandler::context_t;
@@ -261,9 +251,9 @@ ObjectDataHandler::write_ret ObjectDataHandler::prepare_data_reservation(
   extent_len_t size)
 {
   LOG_PREFIX(ObjectDataHandler::prepare_data_reservation);
-  ceph_assert(size <= MAX_OBJECT_SIZE);
+  ceph_assert(size <= max_object_size);
   if (!object_data.is_null()) {
-    ceph_assert(object_data.get_reserved_data_len() == MAX_OBJECT_SIZE);
+    ceph_assert(object_data.get_reserved_data_len() == max_object_size);
     DEBUGT("reservation present: {}~{}",
            ctx.t,
            object_data.get_reserved_data_base(),
@@ -273,13 +263,13 @@ ObjectDataHandler::write_ret ObjectDataHandler::prepare_data_reservation(
     DEBUGT("reserving: {}~{}",
            ctx.t,
            ctx.onode.get_data_hint(),
-           MAX_OBJECT_SIZE);
+           max_object_size);
     return ctx.tm.reserve_region(
       ctx.t,
       ctx.onode.get_data_hint(),
-      MAX_OBJECT_SIZE
-    ).si_then([&object_data](auto pin) {
-      ceph_assert(pin->get_length() == MAX_OBJECT_SIZE);
+      max_object_size
+    ).si_then([max_object_size=max_object_size, &object_data](auto pin) {
+      ceph_assert(pin->get_length() == max_object_size);
       object_data.update_reserved(
        pin->get_laddr(),
        pin->get_length());
index 677094a25dbd0ef9bff811846196747961ea07d2..c397245125cf89ac51287a6b63eb7009b24921cf 100644 (file)
@@ -50,6 +50,8 @@ class ObjectDataHandler {
 public:
   using base_iertr = TransactionManager::base_iertr;
 
+  ObjectDataHandler(uint32_t mos) : max_object_size(mos) {}
+
   struct context_t {
     TransactionManager &tm;
     Transaction &t;
@@ -104,6 +106,16 @@ private:
     context_t ctx,
     object_data_t &object_data,
     extent_len_t size);
+private:
+  /**
+   * max_object_size
+   *
+   * For now, we allocate a fixed region of laddr space of size max_object_size
+   * for any object.  In the future, once we have the ability to remap logical
+   * mappings (necessary for clone), we'll add the ability to grow and shrink
+   * these regions and remove this assumption.
+   */
+  const uint32_t max_object_size = 0;
 };
 
 }
index db7aec06b47c694481b41b279d35da824ed090c8..c9c31c3a0b512533cbaef07365029026adb22147 100644 (file)
@@ -54,19 +54,23 @@ class Onode : public boost::intrusive_ref_counter<
 {
 protected:
   virtual laddr_t get_hint() const = 0;
+  const uint32_t default_metadata_offset = 0;
+  const uint32_t default_metadata_range = 0;
 public:
-  static constexpr uint32_t DEFAULT_DATA_RESERVATION = 16<<20;
-  static constexpr uint32_t DEFAULT_METADATA_OFFSET =
-    DEFAULT_DATA_RESERVATION;
-  static constexpr uint32_t DEFAULT_METADATA_RANGE = 16<<20;
+  Onode(uint32_t ddr, uint32_t dmr)
+    : default_metadata_offset(ddr),
+      default_metadata_range(dmr)
+  {}
 
   virtual const onode_layout_t &get_layout() const = 0;
   virtual onode_layout_t &get_mutable_layout(Transaction &t) = 0;
   virtual ~Onode() = default;
 
   laddr_t get_metadata_hint() const {
-    return get_hint() + DEFAULT_METADATA_OFFSET +
-      ((uint32_t)std::rand() % DEFAULT_METADATA_RANGE);
+    assert(default_metadata_offset);
+    assert(default_metadata_range);
+    return get_hint() + default_metadata_offset +
+      ((uint32_t)std::rand() % default_metadata_range);
   }
   laddr_t get_data_hint() const {
     return get_hint();
index ad6d1d19cc8b1e04596c537fc2578f9b96d3ffaf..2865cba4b9754189dc662f8a26269a340ccd43d7 100644 (file)
@@ -27,7 +27,10 @@ FLTreeOnodeManager::get_onode_ret FLTreeOnodeManager::get_onode(
       DEBUGT("no entry for {}", trans, hoid);
       return crimson::ct_error::enoent::make();
     }
-    auto val = OnodeRef(new FLTreeOnode(cursor.value()));
+    auto val = OnodeRef(new FLTreeOnode(
+       default_data_reservation,
+       default_metadata_range,
+       cursor.value()));
     return get_onode_iertr::make_ready_future<OnodeRef>(
       val
     );
@@ -43,10 +46,13 @@ FLTreeOnodeManager::get_or_create_onode(
   return tree.insert(
     trans, hoid,
     OnodeTree::tree_value_config_t{sizeof(onode_layout_t)}
-  ).si_then([&trans, &hoid, FNAME](auto p)
+  ).si_then([this, &trans, &hoid, FNAME](auto p)
               -> get_or_create_onode_ret {
     auto [cursor, created] = std::move(p);
-    auto val = OnodeRef(new FLTreeOnode(cursor.value()));
+    auto val = OnodeRef(new FLTreeOnode(
+       default_data_reservation,
+       default_metadata_range,
+       cursor.value()));
     if (created) {
       DEBUGT("created onode for entry for {}", trans, hoid);
       val->get_mutable_layout(trans) = onode_layout_t{};
index 814471fffbc9b0296aee95713f810f8f8c5254e5..0367b823f361bbafe3b860cb849320de501cf6a0 100644 (file)
@@ -37,7 +37,14 @@ struct FLTreeOnode final : Onode, Value {
   FLTreeOnode& operator=(const FLTreeOnode&) = delete;
 
   template <typename... T>
-  FLTreeOnode(T&&... args) : Value(std::forward<T>(args)...) {}
+  FLTreeOnode(uint32_t ddr, uint32_t dmr, T&&... args)
+    : Onode(ddr, dmr),
+      Value(std::forward<T>(args)...) {}
+
+  template <typename... T>
+  FLTreeOnode(T&&... args)
+    : Onode(0, 0),
+      Value(std::forward<T>(args)...) {}
 
   struct Recorder : public ValueDeltaRecorder {
     Recorder(bufferlist &bl) : ValueDeltaRecorder(bl) {}
@@ -102,12 +109,23 @@ struct FLTreeOnode final : Onode, Value {
 
 using OnodeTree = Btree<FLTreeOnode>;
 
+using crimson::common::get_conf;
+
 class FLTreeOnodeManager : public crimson::os::seastore::OnodeManager {
   OnodeTree tree;
 
+  uint32_t default_data_reservation = 0;
+  uint32_t default_metadata_offset = 0;
+  uint32_t default_metadata_range = 0;
 public:
   FLTreeOnodeManager(TransactionManager &tm) :
-    tree(NodeExtentManager::create_seastore(tm)) {}
+    tree(NodeExtentManager::create_seastore(tm)),
+    default_data_reservation(
+      get_conf<uint64_t>("seastore_default_max_object_size")),
+    default_metadata_offset(default_data_reservation),
+    default_metadata_range(
+      get_conf<uint64_t>("seastore_default_object_metadata_reservation"))
+  {}
 
   mkfs_ret mkfs(Transaction &t) {
     return tree.mkfs(t);
index 88c12b19bf7aaf9557c27ee4c89010d5a0e58b31..6c0c198b8a5c14cc3f341e365653f25506da417d 100644 (file)
@@ -68,6 +68,8 @@ public:
   }
 };
 
+using crimson::common::get_conf;
+
 SeaStore::SeaStore(
   const std::string& root,
   MDStoreRef mdstore,
@@ -80,7 +82,9 @@ SeaStore::SeaStore(
     segment_manager(std::move(sm)),
     transaction_manager(std::move(tm)),
     collection_manager(std::move(cm)),
-    onode_manager(std::move(om))
+    onode_manager(std::move(om)),
+    max_object_size(
+      get_conf<uint64_t>("seastore_default_max_object_size"))
 {
   register_metrics();
 }
@@ -459,7 +463,7 @@ SeaStore::read_errorator::future<ceph::bufferlist> SeaStore::read(
        size - offset :
        std::min(size - offset, len);
 
-      return ObjectDataHandler().read(
+      return ObjectDataHandler(max_object_size).read(
         ObjectDataHandler::context_t{
           *transaction_manager,
           t,
@@ -1069,7 +1073,7 @@ SeaStore::tm_ret SeaStore::_write(
   return seastar::do_with(
     std::move(_bl),
     [=, &ctx, &onode](auto &bl) {
-      return ObjectDataHandler().write(
+      return ObjectDataHandler(max_object_size).write(
         ObjectDataHandler::context_t{
           *transaction_manager,
           *ctx.transaction,
@@ -1199,7 +1203,7 @@ SeaStore::tm_ret SeaStore::_truncate(
   LOG_PREFIX(SeaStore::_truncate);
   DEBUGT("onode={} size={}", *ctx.transaction, *onode, size);
   onode->get_mutable_layout(*ctx.transaction).size = size;
-  return ObjectDataHandler().truncate(
+  return ObjectDataHandler(max_object_size).truncate(
     ObjectDataHandler::context_t{
       *transaction_manager,
       *ctx.transaction,
index b205872141d790083494bc0ed9b788540a803b8f..f1d96cbaa6fc72c53b55e5feb1c6ab56713479c6 100644 (file)
@@ -305,6 +305,7 @@ private:
   TransactionManagerRef transaction_manager;
   CollectionManagerRef collection_manager;
   OnodeManagerRef onode_manager;
+  const uint32_t max_object_size = 0;
 
   using tm_iertr = TransactionManager::base_iertr;
   using tm_ret = tm_iertr::future<>;
index e2fac7f68f1a0ce2b3cd95923d69738d3bcb68f9..162dc1c19cd684793c30db7c03f3a75a51163a71 100644 (file)
@@ -11,6 +11,10 @@ using namespace crimson;
 using namespace crimson::os;
 using namespace crimson::os::seastore;
 
+#define MAX_OBJECT_SIZE (16<<20)
+#define DEFAULT_OBJECT_DATA_RESERVATION (16<<20)
+#define DEFAULT_OBJECT_METADATA_RESERVATION (16<<20)
+
 namespace {
   [[maybe_unused]] seastar::logger& logger() {
     return crimson::get_logger(ceph_subsys_test);
@@ -22,6 +26,7 @@ class TestOnode final : public Onode {
   bool dirty = false;
 
 public:
+  TestOnode(uint32_t ddr, uint32_t dmr) : Onode(ddr, dmr) {}
   const onode_layout_t &get_layout() const final {
     return layout;
   }
@@ -58,7 +63,7 @@ struct object_data_handler_test_t:
        offset,
        len));
     with_trans_intr(t, [&](auto &t) {
-      return ObjectDataHandler().write(
+      return ObjectDataHandler(MAX_OBJECT_SIZE).write(
         ObjectDataHandler::context_t{
           *tm,
           t,
@@ -81,7 +86,7 @@ struct object_data_handler_test_t:
        0,
        size - offset);
       with_trans_intr(t, [&](auto &t) {
-        return ObjectDataHandler().truncate(
+        return ObjectDataHandler(MAX_OBJECT_SIZE).truncate(
           ObjectDataHandler::context_t{
             *tm,
             t,
@@ -100,7 +105,7 @@ struct object_data_handler_test_t:
 
   void read(Transaction &t, objaddr_t offset, extent_len_t len) {
     bufferlist bl = with_trans_intr(t, [&](auto &t) {
-      return ObjectDataHandler().read(
+      return ObjectDataHandler(MAX_OBJECT_SIZE).read(
         ObjectDataHandler::context_t{
           *tm,
           t,
@@ -132,7 +137,9 @@ struct object_data_handler_test_t:
   }
 
   seastar::future<> set_up_fut() final {
-    onode = new TestOnode{};
+    onode = new TestOnode(
+      DEFAULT_OBJECT_DATA_RESERVATION,
+      DEFAULT_OBJECT_METADATA_RESERVATION);
     known_contents = buffer::create(4<<20 /* 4MB */);
     memset(known_contents.c_str(), 0, known_contents.length());
     size = 0;