]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: randomize metadata laddr hints
authorSamuel Just <sjust@redhat.com>
Fri, 10 Dec 2021 06:22:44 +0000 (06:22 +0000)
committerXuehan Xu <xxhdx1985126@gmail.com>
Tue, 14 Dec 2021 06:55:26 +0000 (14:55 +0800)
This should prevent omap and xattr extent allocations from clumping near
the onode's hint.  Additionally, only generate them past the default
16MB object_data_handler reservation.

Signed-off-by: Samuel Just <sjust@redhat.com>
src/crimson/os/seastore/object_data_handler.cc
src/crimson/os/seastore/onode.h
src/crimson/os/seastore/seastore.cc
src/test/crimson/seastore/onode_tree/test_fltree_onode_manager.cc

index a359da8154f3bcc0599fe89f9eef8d9c9bfcad7b..a3367d0668fc106615d7831fe3c308362d51428c 100644 (file)
@@ -24,7 +24,7 @@ namespace crimson::os::seastore {
  * mappings (necessary for clone), we'll add the ability to grow and shrink
  * these regions and remove this assumption.
  */
-static constexpr extent_len_t MAX_OBJECT_SIZE = 16<<20;
+static constexpr extent_len_t MAX_OBJECT_SIZE = Onode::DEFAULT_DATA_RESERVATION;
 #define assert_aligned(x) ceph_assert(((x)%ctx.tm.get_block_size()) == 0)
 
 using context_t = ObjectDataHandler::context_t;
@@ -272,11 +272,11 @@ ObjectDataHandler::write_ret ObjectDataHandler::prepare_data_reservation(
   } else {
     DEBUGT("reserving: {}~{}",
            ctx.t,
-           ctx.onode.get_hint(),
+           ctx.onode.get_data_hint(),
            MAX_OBJECT_SIZE);
     return ctx.tm.reserve_region(
       ctx.t,
-      ctx.onode.get_hint(),
+      ctx.onode.get_data_hint(),
       MAX_OBJECT_SIZE
     ).si_then([&object_data](auto pin) {
       ceph_assert(pin->get_length() == MAX_OBJECT_SIZE);
index a4be9ac98847102df915b4da4a95ff6b29e9091f..db7aec06b47c694481b41b279d35da824ed090c8 100644 (file)
@@ -52,12 +52,25 @@ class Onode : public boost::intrusive_ref_counter<
   Onode,
   boost::thread_unsafe_counter>
 {
+protected:
+  virtual laddr_t get_hint() const = 0;
 public:
+  static constexpr uint32_t DEFAULT_DATA_RESERVATION = 16<<20;
+  static constexpr uint32_t DEFAULT_METADATA_OFFSET =
+    DEFAULT_DATA_RESERVATION;
+  static constexpr uint32_t DEFAULT_METADATA_RANGE = 16<<20;
 
   virtual const onode_layout_t &get_layout() const = 0;
   virtual onode_layout_t &get_mutable_layout(Transaction &t) = 0;
   virtual ~Onode() = default;
-  virtual laddr_t get_hint() const = 0;
+
+  laddr_t get_metadata_hint() const {
+    return get_hint() + DEFAULT_METADATA_OFFSET +
+      ((uint32_t)std::rand() % DEFAULT_METADATA_RANGE);
+  }
+  laddr_t get_data_hint() const {
+    return get_hint();
+  }
 };
 
 
index 600ba4b58f77aa8e12b7377d1bffe4922d36ec59..88c12b19bf7aaf9557c27ee4c89010d5a0e58b31 100644 (file)
@@ -509,7 +509,7 @@ SeaStore::get_attr_errorator::future<ceph::bufferlist> SeaStore::get_attr(
       }
       return _omap_get_value(
         t,
-        layout.xattr_root.get(onode.get_hint()),
+        layout.xattr_root.get(onode.get_metadata_hint()),
         name);
     }
   ).handle_error(crimson::ct_error::input_output_error::handle([FNAME] {
@@ -607,7 +607,8 @@ SeaStore::omap_get_values(
     "omap_get_values",
     op_type_t::OMAP_GET_VALUES,
     [this, keys](auto &t, auto &onode) {
-      omap_root_t omap_root = onode.get_layout().omap_root.get(onode.get_hint());
+      omap_root_t omap_root = onode.get_layout().omap_root.get(
+       onode.get_metadata_hint());
       return _omap_get_values(
        t,
        std::move(omap_root),
@@ -685,7 +686,7 @@ SeaStore::_omap_list_ret SeaStore::_omap_list(
   const std::optional<std::string>& start,
   OMapManager::omap_list_config_t config) const
 {
-  auto root = omap_root.get(onode.get_hint());
+  auto root = omap_root.get(onode.get_metadata_hint());
   if (root.is_null()) {
     return seastar::make_ready_future<_omap_list_bare_ret>(
       true, omap_values_t{}
@@ -1089,13 +1090,13 @@ SeaStore::_omap_set_kvs(
 {
   return seastar::do_with(
     BtreeOMapManager(*transaction_manager),
-    omap_root.get(onode->get_hint()),
+    omap_root.get(onode->get_metadata_hint()),
     [&, keys=std::move(kvs)](auto &omap_manager, auto &root) {
       tm_iertr::future<> maybe_create_root =
         !root.is_null() ?
         tm_iertr::now() :
         omap_manager.initialize_omap(
-          t, onode->get_hint()
+          t, onode->get_metadata_hint()
         ).si_then([&root](auto new_root) {
           root = new_root;
         });
@@ -1146,13 +1147,13 @@ SeaStore::tm_ret SeaStore::_omap_rmkeys(
 {
   LOG_PREFIX(SeaStore::_omap_rmkeys);
   DEBUGT("{} {} keys", *ctx.transaction, *onode, keys.size());
-  auto omap_root = onode->get_layout().omap_root.get(onode->get_hint());
+  auto omap_root = onode->get_layout().omap_root.get(onode->get_metadata_hint());
   if (omap_root.is_null()) {
     return seastar::now();
   } else {
     return seastar::do_with(
       BtreeOMapManager(*transaction_manager),
-      onode->get_layout().omap_root.get(onode->get_hint()),
+      onode->get_layout().omap_root.get(onode->get_metadata_hint()),
       std::move(keys),
       [&ctx, &onode](
        auto &omap_manager,
index baf5bb29bb15a0e7e87f115be6537062f629f260..c91e13e511b2700eed3412018867346502090ac1 100644 (file)
@@ -31,15 +31,15 @@ struct onode_item_t {
   void initialize(Transaction& t, Onode& value) const {
     auto& layout = value.get_mutable_layout(t);
     layout.size = size;
-    layout.omap_root.update(omap_root_t(id, cnt_modify, value.get_hint()));
+    layout.omap_root.update(omap_root_t(id, cnt_modify, value.get_metadata_hint()));
     validate(value);
   }
 
   void validate(Onode& value) const {
     auto& layout = value.get_layout();
     ceph_assert(laddr_t(layout.size) == laddr_t{size});
-    ceph_assert(layout.omap_root.get(value.get_hint()).addr == id);
-    ceph_assert(layout.omap_root.get(value.get_hint()).depth == cnt_modify);
+    ceph_assert(layout.omap_root.get(value.get_metadata_hint()).addr == id);
+    ceph_assert(layout.omap_root.get(value.get_metadata_hint()).depth == cnt_modify);
   }
 
   void modify(Transaction& t, Onode& value) {