]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore: allow EPM to make decisions on the general extent allocation...
authorYingxin Cheng <yingxin.cheng@intel.com>
Thu, 27 Jan 2022 07:00:44 +0000 (15:00 +0800)
committerYingxin Cheng <yingxin.cheng@intel.com>
Fri, 11 Feb 2022 01:26:22 +0000 (09:26 +0800)
Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/extent_placement_manager.cc
src/crimson/os/seastore/extent_placement_manager.h
src/crimson/os/seastore/seastore.cc
src/crimson/os/seastore/transaction.h
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h

index fd07e98bc67d195ffc0e5d4f191ef50824726257..1270881241530c2329eef5b1e79d2fe7f3dc823a 100644 (file)
@@ -855,37 +855,37 @@ CachedExtentRef Cache::alloc_new_extent_by_type(
   Transaction &t,       ///< [in, out] current transaction
   extent_types_t type,  ///< [in] type tag
   seastore_off_t length, ///< [in] length
-  bool delay           ///< [in] whether to delay paddr alloc
+  placement_hint_t hint
 )
 {
   LOG_PREFIX(Cache::alloc_new_extent_by_type);
-  SUBDEBUGT(seastore_cache, "allocate {} {}B, delay={}",
-            t, type, length, delay);
+  SUBDEBUGT(seastore_cache, "allocate {} {}B, hint={}",
+            t, type, length, hint);
   switch (type) {
   case extent_types_t::ROOT:
     ceph_assert(0 == "ROOT is never directly alloc'd");
     return CachedExtentRef();
   case extent_types_t::LADDR_INTERNAL:
-    return alloc_new_extent<lba_manager::btree::LBAInternalNode>(t, length, delay);
+    return alloc_new_extent<lba_manager::btree::LBAInternalNode>(t, length, hint);
   case extent_types_t::LADDR_LEAF:
-    return alloc_new_extent<lba_manager::btree::LBALeafNode>(t, length, delay);
+    return alloc_new_extent<lba_manager::btree::LBALeafNode>(t, length, hint);
   case extent_types_t::ONODE_BLOCK_STAGED:
-    return alloc_new_extent<onode::SeastoreNodeExtent>(t, length, delay);
+    return alloc_new_extent<onode::SeastoreNodeExtent>(t, length, hint);
   case extent_types_t::OMAP_INNER:
-    return alloc_new_extent<omap_manager::OMapInnerNode>(t, length, delay);
+    return alloc_new_extent<omap_manager::OMapInnerNode>(t, length, hint);
   case extent_types_t::OMAP_LEAF:
-    return alloc_new_extent<omap_manager::OMapLeafNode>(t, length, delay);
+    return alloc_new_extent<omap_manager::OMapLeafNode>(t, length, hint);
   case extent_types_t::COLL_BLOCK:
-    return alloc_new_extent<collection_manager::CollectionNode>(t, length, delay);
+    return alloc_new_extent<collection_manager::CollectionNode>(t, length, hint);
   case extent_types_t::OBJECT_DATA_BLOCK:
-    return alloc_new_extent<ObjectDataBlock>(t, length, delay);
+    return alloc_new_extent<ObjectDataBlock>(t, length, hint);
   case extent_types_t::RETIRED_PLACEHOLDER:
     ceph_assert(0 == "impossible");
     return CachedExtentRef();
   case extent_types_t::TEST_BLOCK:
-    return alloc_new_extent<TestBlock>(t, length, delay);
+    return alloc_new_extent<TestBlock>(t, length, hint);
   case extent_types_t::TEST_BLOCK_PHYSICAL:
-    return alloc_new_extent<TestBlockPhysical>(t, length, delay);
+    return alloc_new_extent<TestBlockPhysical>(t, length, hint);
   case extent_types_t::NONE: {
     ceph_assert(0 == "NONE is an invalid extent type");
     return CachedExtentRef();
index f289794ff90fc8477b482cefab81e9fa25fff440..524b72707ec46a3372f6da49fc31e91b10ca48b2 100644 (file)
@@ -9,15 +9,16 @@
 
 #include "include/buffer.h"
 
-#include "crimson/os/seastore/logging.h"
-#include "crimson/os/seastore/seastore_types.h"
-#include "crimson/os/seastore/transaction.h"
-#include "crimson/os/seastore/segment_manager.h"
 #include "crimson/common/errorator.h"
 #include "crimson/os/seastore/cached_extent.h"
+#include "crimson/os/seastore/extent_placement_manager.h"
+#include "crimson/os/seastore/logging.h"
+#include "crimson/os/seastore/random_block_manager.h"
 #include "crimson/os/seastore/root_block.h"
+#include "crimson/os/seastore/seastore_types.h"
 #include "crimson/os/seastore/segment_cleaner.h"
-#include "crimson/os/seastore/random_block_manager.h"
+#include "crimson/os/seastore/segment_manager.h"
+#include "crimson/os/seastore/transaction.h"
 
 namespace crimson::os::seastore {
 
@@ -102,6 +103,10 @@ public:
   Cache(ExtentReader &reader);
   ~Cache();
 
+  void set_epm(ExtentPlacementManager& epm) {
+    p_epm = &epm;
+  }
+
   /// Creates empty transaction by source
   TransactionRef create_transaction(
       Transaction::src_t src,
@@ -489,17 +494,20 @@ public:
    */
   template <typename T>
   TCachedExtentRef<T> alloc_new_extent(
-    Transaction &t,       ///< [in, out] current transaction
-    seastore_off_t length, ///< [in] length
-    bool delayed = false  ///< [in] whether the paddr allocation of extent is delayed
+    Transaction &t,         ///< [in, out] current transaction
+    seastore_off_t length,  ///< [in] length
+    placement_hint_t hint = placement_hint_t::HOT
   ) {
     LOG_PREFIX(Cache::alloc_new_extent);
-    SUBDEBUGT(seastore_cache, "allocate {} {}B, delay={}",
-              t, T::TYPE, length, delayed);
-    auto ret = CachedExtent::make_cached_extent_ref<T>(
-      alloc_cache_buf(length));
-    t.add_fresh_extent(ret, delayed);
+    SUBTRACET(seastore_cache, "allocate {} {}B, hint={}",
+              t, T::TYPE, length, hint);
+    auto result = p_epm->alloc_new_extent(t, T::TYPE, length, hint);
+    auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result.bp));
+    ret->set_paddr(result.paddr);
+    t.add_fresh_extent(ret);
     ret->state = CachedExtent::extent_state_t::INITIAL_WRITE_PENDING;
+    SUBDEBUGT(seastore_cache, "allocated {} {}B extent at {}, hint={} -- {}",
+              t, T::TYPE, length, result.paddr, hint, *ret);
     return ret;
   }
 
@@ -512,7 +520,7 @@ public:
     Transaction &t,       ///< [in, out] current transaction
     extent_types_t type,  ///< [in] type tag
     seastore_off_t length, ///< [in] length
-    bool delayed = false  ///< [in] whether delay addr allocation
+    placement_hint_t hint = placement_hint_t::HOT
     );
 
   /**
@@ -732,6 +740,7 @@ public:
 
 private:
   ExtentReader &reader;                   ///< ref to extent reader
+  ExtentPlacementManager* p_epm = nullptr;
   RootBlockRef root;               ///< ref to current root
   ExtentIndex extents;             ///< set of live extents
 
index 498bfdf570a09d235a0523e613487c9ad6de3903..71d86e00c8bb293091440b8c85d41548f2299c13 100644 (file)
@@ -1,9 +1,11 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
 // vim: ts=8 sw=2 smarttab expandtab
 
-#include "crimson/os/seastore/journal.h"
 #include "crimson/os/seastore/extent_placement_manager.h"
 
+#include "crimson/os/seastore/lba_manager.h"
+#include "crimson/os/seastore/segment_cleaner.h"
+
 namespace {
   seastar::logger& logger() {
     return crimson::get_logger(ceph_subsys_seastore_tm);
@@ -18,13 +20,11 @@ SegmentedAllocator::SegmentedAllocator(
   SegmentProvider& sp,
   SegmentManager& sm,
   LBAManager& lba_manager,
-  Journal& journal,
-  Cache& cache)
+  Journal& journal)
   : segment_provider(sp),
     segment_manager(sm),
     lba_manager(lba_manager),
-    journal(journal),
-    cache(cache)
+    journal(journal)
 {
   std::generate_n(
     std::back_inserter(writers),
@@ -35,8 +35,7 @@ SegmentedAllocator::SegmentedAllocator(
        segment_provider,
        segment_manager,
        lba_manager,
-       journal,
-        cache};
+       journal};
       });
 }
 
index 764844113a37de5732bf9ecb063ba30e3abadaa5..9d56e3e0263536471048f00a8ae81196f1c85797 100644 (file)
@@ -6,10 +6,11 @@
 #include "seastar/core/gate.hh"
 
 #include "crimson/common/condition_variable.h"
-#include "crimson/os/seastore/logging.h"
-#include "crimson/os/seastore/cache.h"
 #include "crimson/os/seastore/cached_extent.h"
-#include "crimson/os/seastore/lba_manager.h"
+#include "crimson/os/seastore/journal.h"
+#include "crimson/os/seastore/logging.h"
+#include "crimson/os/seastore/segment_manager.h"
+#include "crimson/os/seastore/transaction.h"
 
 namespace crimson::os::seastore {
 
@@ -162,6 +163,9 @@ struct open_segment_wrapper_t : public boost::intrusive_ref_counter<
 using open_segment_wrapper_ref =
   boost::intrusive_ptr<open_segment_wrapper_t>;
 
+class LBAManager;
+class SegmentProvider;
+
 /**
  * SegmentedAllocator
  *
@@ -182,13 +186,11 @@ class SegmentedAllocator : public ExtentAllocator {
       SegmentProvider& sp,
       SegmentManager& sm,
       LBAManager& lba_manager,
-      Journal& journal,
-      Cache& cache)
+      Journal& journal)
       : segment_provider(sp),
         segment_manager(sm),
         lba_manager(lba_manager),
-        journal(journal),
-        cache(cache)
+        journal(journal)
     {}
     Writer(Writer &&) = default;
 
@@ -203,8 +205,9 @@ class SegmentedAllocator : public ExtentAllocator {
       });
     }
   private:
-    using update_lba_mapping_iertr = LBAManager::update_le_mapping_iertr;
-    using finish_record_iertr = update_lba_mapping_iertr;
+    using finish_record_ertr = crimson::errorator<
+      crimson::ct_error::input_output_error>;
+    using finish_record_iertr = trans_iertr<finish_record_ertr>;
     using finish_record_ret = finish_record_iertr::future<>;
     finish_record_ret finish_write(
       Transaction& t,
@@ -237,15 +240,13 @@ class SegmentedAllocator : public ExtentAllocator {
     crimson::condition_variable segment_rotation_guard;
     seastar::gate writer_guard;
     bool rolling_segment = false;
-    Cache& cache;
   };
 public:
   SegmentedAllocator(
     SegmentProvider& sp,
     SegmentManager& sm,
     LBAManager& lba_manager,
-    Journal& journal,
-    Cache& cache);
+    Journal& journal);
 
   Writer &get_writer(placement_hint_t hint) {
     return writers[std::rand() % writers.size()];
@@ -282,58 +283,50 @@ private:
   std::vector<Writer> writers;
   LBAManager& lba_manager;
   Journal& journal;
-  Cache& cache;
 };
 
 class ExtentPlacementManager {
 public:
   ExtentPlacementManager(
-    Cache& cache,
     LBAManager& lba_manager
-  ) : cache(cache), lba_manager(lba_manager) {}
+  ) : lba_manager(lba_manager) {}
 
-  /**
-   * alloc_new_extent_by_type
-   *
-   * Create a new extent, CachedExtent::poffset may not be set
-   * if a delayed allocation is needed.
-   */
-  CachedExtentRef alloc_new_extent_by_type(
+  struct alloc_result_t {
+    paddr_t paddr;
+    bufferptr bp;
+  };
+  alloc_result_t alloc_new_extent(
     Transaction& t,
     extent_types_t type,
     seastore_off_t length,
-    placement_hint_t hint) {
-    // only logical extents should fall in this path
-    assert(is_logical_type(type));
+    placement_hint_t hint
+  ) {
     assert(hint < placement_hint_t::NUM_HINTS);
-    auto dtype = get_allocator_type(hint);
-    // FIXME: set delay for COLD extent when the record overhead is low
-    bool delay = (hint > placement_hint_t::COLD &&
-                  can_delay_allocation(dtype));
-    CachedExtentRef extent = cache.alloc_new_extent_by_type(
-        t, type, length, delay);
-    extent->hint = hint;
-    return extent;
-  }
 
-  template<
-    typename T,
-    std::enable_if_t<std::is_base_of_v<LogicalCachedExtent, T>, int> = 0>
-  TCachedExtentRef<T> alloc_new_extent(
-    Transaction& t,
-    seastore_off_t length,
-    placement_hint_t hint) {
-    // only logical extents should fall in this path
-    static_assert(is_logical_type(T::TYPE));
-    assert(hint < placement_hint_t::NUM_HINTS);
-    auto dtype = get_allocator_type(hint);
+    // XXX: bp might be extended to point to differnt memory (e.g. PMem)
+    // according to the allocator.
+    auto bp = ceph::bufferptr(
+      buffer::create_page_aligned(length));
+    bp.zero();
+
+    if (!is_logical_type(type)) {
+      // TODO: implement out-of-line strategy for physical extent.
+      return {make_record_relative_paddr(0),
+              std::move(bp)};
+    }
+
     // FIXME: set delay for COLD extent when the record overhead is low
+    // NOTE: delay means to delay the decision about whether to write the
+    // extent as inline or out-of-line extents.
     bool delay = (hint > placement_hint_t::COLD &&
-                  can_delay_allocation(dtype));
-    TCachedExtentRef<T> extent = cache.alloc_new_extent<T>(
-        t, length, delay);
-    extent->hint = hint;
-    return extent;
+                  can_delay_allocation(get_allocator_type(hint)));
+    if (delay) {
+      return {make_delayed_temp_paddr(0),
+              std::move(bp)};
+    } else {
+      return {make_record_relative_paddr(0),
+              std::move(bp)};
+    }
   }
 
   /**
@@ -395,7 +388,6 @@ private:
     return devices[std::rand() % devices.size()];
   }
 
-  Cache& cache;
   LBAManager& lba_manager;
   std::map<device_type_t, std::vector<ExtentAllocatorRef>> allocators;
 };
index 2702e4973097036487249a0a9345fd2c3472acdf..bc3e5ca7674fd254bb85a4d7b742f9525555895e 100644 (file)
@@ -1418,7 +1418,8 @@ seastar::future<std::unique_ptr<SeaStore>> make_seastore(
     auto cache = std::make_unique<Cache>(scanner_ref);
     auto lba_manager = lba_manager::create_lba_manager(*sm, *cache);
 
-    auto epm = std::make_unique<ExtentPlacementManager>(*cache, *lba_manager);
+    auto epm = std::make_unique<ExtentPlacementManager>(*lba_manager);
+    cache->set_epm(*epm);
 
     journal->set_segment_provider(&*segment_cleaner);
 
index f0d35fc9e19b55c9be2c20ccc3c57b746773ee35..4090744412841e3c6bc723205be6af68b6e0ce05 100644 (file)
@@ -116,15 +116,16 @@ public:
   }
 
   void add_fresh_extent(
-    CachedExtentRef ref,
-    bool delayed = false) {
+    CachedExtentRef ref) {
     ceph_assert(!is_weak());
-    if (delayed) {
+    if (ref->get_paddr().is_delayed()) {
+      assert(ref->get_paddr() == make_delayed_temp_paddr(0));
       assert(ref->is_logical());
       ref->set_paddr(make_delayed_temp_paddr(delayed_temp_offset));
       delayed_temp_offset += ref->get_length();
       delayed_alloc_list.emplace_back(ref->cast<LogicalCachedExtent>());
     } else {
+      assert(ref->get_paddr() == make_record_relative_paddr(0));
       ref->set_paddr(make_record_relative_paddr(offset));
       offset += ref->get_length();
       inline_block_list.push_back(ref);
index 25252b34afd8fc9f8033b5cacfcecf47cbc5bd2b..db37abdd096349cc17b424df0efc6fd2ab6d5f24 100644 (file)
@@ -381,7 +381,7 @@ TransactionManager::rewrite_logical_extent(
 
   auto lextent = extent->cast<LogicalCachedExtent>();
   cache->retire_extent(t, extent);
-  auto nlextent = epm->alloc_new_extent_by_type(
+  auto nlextent = cache->alloc_new_extent_by_type(
     t,
     lextent->get_type(),
     lextent->get_length(),
index 34f32e39217f0caf59e3ddb71af5ca0ed75a001e..a5bb254badb8a3a9aecb5d22d683b2b19e09958a 100644 (file)
@@ -304,7 +304,7 @@ public:
     LOG_PREFIX(TransactionManager::alloc_extent);
     SUBTRACET(seastore_tm, "{} len={}, placement_hint={}, laddr_hint={}",
               t, T::TYPE, len, placement_hint, laddr_hint);
-    auto ext = epm->alloc_new_extent<T>(
+    auto ext = cache->alloc_new_extent<T>(
       t,
       len,
       placement_hint);
@@ -551,8 +551,7 @@ public:
        *segment_cleaner,
        *sm,
        *lba_manager,
-       *journal,
-       *cache));
+       *journal));
   }
 
   ~TransactionManager();