]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: implement placement_hint HOT/COLD/REWRITE 43459/head
authorYingxin Cheng <yingxin.cheng@intel.com>
Fri, 8 Oct 2021 02:47:13 +0000 (10:47 +0800)
committerYingxin Cheng <yingxin.cheng@intel.com>
Fri, 8 Oct 2021 03:59:50 +0000 (11:59 +0800)
The current strategy is to inline allocate non-REWRITE extents in order
to reduce the record overhead. The observation shows the transactions
from user is usually very small and doesn't deserve to be written in ool
records, which requires at least a block to store the record metadata.

Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
src/crimson/os/seastore/extent_placement_manager.h
src/crimson/os/seastore/seastore_types.h
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h

index 4bd917eda088840947a318472fec93de1dda566e..b4b00ef491913b4f21d213a711bfd6688f3bde6c 100644 (file)
@@ -307,12 +307,14 @@ public:
     Transaction& t,
     extent_types_t type,
     segment_off_t length,
-    placement_hint_t hint = placement_hint_t::NONE) {
+    placement_hint_t hint) {
     // only logical extents should fall in this path
     assert(is_logical_type(type));
     assert(hint < placement_hint_t::NUM_HINTS);
     auto dtype = get_allocator_type(hint);
-    bool delay = can_delay_allocation(dtype);
+    // FIXME: set delay for COLD extent when the record overhead is low
+    bool delay = (hint > placement_hint_t::COLD &&
+                  can_delay_allocation(dtype));
     CachedExtentRef extent = cache.alloc_new_extent_by_type(
         t, type, length, delay);
     extent->backend_type = dtype;
@@ -326,12 +328,14 @@ public:
   TCachedExtentRef<T> alloc_new_extent(
     Transaction& t,
     segment_off_t length,
-    placement_hint_t hint = placement_hint_t::NONE) {
+    placement_hint_t hint) {
     // only logical extents should fall in this path
     static_assert(is_logical_type(T::TYPE));
     assert(hint < placement_hint_t::NUM_HINTS);
     auto dtype = get_allocator_type(hint);
-    bool delay = can_delay_allocation(dtype);
+    // FIXME: set delay for COLD extent when the record overhead is low
+    bool delay = (hint > placement_hint_t::COLD &&
+                  can_delay_allocation(dtype));
     TCachedExtentRef<T> extent = cache.alloc_new_extent<T>(
         t, length, delay);
     extent->backend_type = dtype;
@@ -351,9 +355,8 @@ public:
     LOG_PREFIX(ExtentPlacementManager::delayed_alloc_or_ool_write);
     DEBUGT("start", t);
     return seastar::do_with(
-      std::map<ExtentAllocator*, std::list<LogicalCachedExtentRef>>(),
-      std::list<std::pair<paddr_t, LogicalCachedExtentRef>>(),
-      [this, &t](auto& alloc_map, auto& inline_list) mutable {
+        std::map<ExtentAllocator*, std::list<LogicalCachedExtentRef>>(),
+        [this, &t](auto& alloc_map) {
       LOG_PREFIX(ExtentPlacementManager::delayed_alloc_or_ool_write);
       auto& alloc_list = t.get_delayed_alloc_list();
       uint64_t num_ool_extents = 0;
@@ -363,38 +366,18 @@ public:
           t.increment_delayed_invalid_extents();
           continue;
         }
-        if (should_be_inline(extent)) {
-          auto old_addr = extent->get_paddr();
-          cache.mark_delayed_extent_inline(t, extent);
-          inline_list.emplace_back(old_addr, extent);
-        } else {
-         auto& allocator_ptr = get_allocator(
-           extent->backend_type, extent->hint
-         );
-         alloc_map[allocator_ptr.get()].emplace_back(extent);
-         num_ool_extents++;
-       }
+        // For now, just do ool allocation for any delayed extent
+        auto& allocator_ptr = get_allocator(
+          extent->backend_type, extent->hint
+        );
+        alloc_map[allocator_ptr.get()].emplace_back(extent);
+        num_ool_extents++;
       }
-      DEBUGT("{} inline extents, {} ool extents",
-        t,
-        inline_list.size(),
-        num_ool_extents);
+      DEBUGT("{} ool extents", t, num_ool_extents);
       return trans_intr::do_for_each(alloc_map, [&t](auto& p) {
         auto allocator = p.first;
         auto& extents = p.second;
         return allocator->alloc_ool_extents_paddr(t, extents);
-      }).si_then([&inline_list, this, &t] {
-        LOG_PREFIX(ExtentPlacementManager::delayed_alloc_or_ool_write);
-        DEBUGT("processing {} inline extents", t, inline_list.size());
-        return trans_intr::do_for_each(inline_list, [this, &t](auto& p) {
-          auto old_addr = p.first;
-          auto& extent = p.second;
-          return lba_manager.update_mapping(
-            t,
-            extent->get_laddr(),
-            old_addr,
-            extent->get_paddr());
-        });
       });
     });
   }
@@ -408,10 +391,6 @@ private:
     return device_type_t::SEGMENTED;
   }
 
-  bool should_be_inline(LogicalCachedExtentRef& extent) {
-    return (std::rand() % 2) == 0;
-  }
-
   ExtentAllocatorRef& get_allocator(
     device_type_t type,
     placement_hint_t hint) {
index f912f6979cfbce44cec4fa0d36dac2a42fc0fa0b..1f53cc5f180552c8ac902d10da3b1df8ff520053 100644 (file)
@@ -245,8 +245,10 @@ constexpr objaddr_t OBJ_ADDR_MAX = std::numeric_limits<objaddr_t>::max();
 constexpr objaddr_t OBJ_ADDR_NULL = OBJ_ADDR_MAX - 1;
 
 enum class placement_hint_t {
-  NONE,     /// Denotes empty hint
-  NUM_HINTS /// Constant for number of hints
+  HOT = 0,   // Most of the metadata
+  COLD,      // Object data
+  REWRITE,   // Cold metadata and data (probably need further splits)
+  NUM_HINTS  // Constant for number of hints
 };
 
 enum device_type_t {
index d202b934463b586e5303e7c758ef8a4e887ecacc..170742df09ed334eae4eb20f49e338166d5e20a4 100644 (file)
@@ -323,7 +323,8 @@ TransactionManager::rewrite_logical_extent(
   auto nlextent = epm->alloc_new_extent_by_type(
     t,
     lextent->get_type(),
-    lextent->get_length())->cast<LogicalCachedExtent>();
+    lextent->get_length(),
+    placement_hint_t::REWRITE)->cast<LogicalCachedExtent>();
   lextent->get_bptr().copy_out(
     0,
     lextent->get_length(),
index cbdb867c854eef03658ba288599d5bed24974357..80d1459b0e22792c7329b172610c3eb9e8803771 100644 (file)
@@ -274,7 +274,7 @@ public:
    * alloc_extent
    *
    * Allocates a new block of type T with the minimum lba range of size len
-   * greater than hint.
+   * greater than laddr_hint.
    */
   using alloc_extent_iertr = LBAManager::alloc_extent_iertr;
   template <typename T>
@@ -282,22 +282,30 @@ public:
   template <typename T>
   alloc_extent_ret<T> alloc_extent(
     Transaction &t,
-    laddr_t hint,
+    laddr_t laddr_hint,
     extent_len_t len) {
+    placement_hint_t placement_hint;
+    if constexpr (T::TYPE == extent_types_t::OBJECT_DATA_BLOCK ||
+                  T::TYPE == extent_types_t::COLL_BLOCK) {
+      placement_hint = placement_hint_t::COLD;
+    } else {
+      placement_hint = placement_hint_t::HOT;
+    }
     auto ext = epm->alloc_new_extent<T>(
       t,
-      len);
+      len,
+      placement_hint);
     return lba_manager->alloc_extent(
       t,
-      hint,
+      laddr_hint,
       len,
       ext->get_paddr()
-    ).si_then([ext=std::move(ext), len, hint, &t, this](auto &&ref) mutable {
+    ).si_then([ext=std::move(ext), len, laddr_hint, &t, this](auto &&ref) mutable {
       LOG_PREFIX(TransactionManager::alloc_extent);
       ext->set_pin(std::move(ref));
       stats.extents_allocated_total++;
       stats.extents_allocated_bytes += len;
-      DEBUGT("new extent: {}, hint: {}", t, *ext, hint);
+      DEBUGT("new extent: {}, laddr_hint: {}", t, *ext, laddr_hint);
       return alloc_extent_iertr::make_ready_future<TCachedExtentRef<T>>(
        std::move(ext));
     });