]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore: implement generational GC
authorYingxin Cheng <yingxin.cheng@intel.com>
Fri, 27 May 2022 09:13:06 +0000 (17:13 +0800)
committerYingxin Cheng <yingxin.cheng@intel.com>
Wed, 29 Jun 2022 06:39:38 +0000 (14:39 +0800)
Place extents into the dedicated RecordSubmitter by their data-category
and reclaimed-count. Segments of different data-category or
reclaimed-count should have different locality in the access patterns,
which is the foundation to form a desired bimodal distribution of
segment utilizations, so that GC can be more efficient.

Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
20 files changed:
src/crimson/os/seastore/async_cleaner.cc
src/crimson/os/seastore/async_cleaner.h
src/crimson/os/seastore/btree/fixed_kv_btree.h
src/crimson/os/seastore/btree/fixed_kv_node.h
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/cached_extent.h
src/crimson/os/seastore/extent_placement_manager.cc
src/crimson/os/seastore/extent_placement_manager.h
src/crimson/os/seastore/journal/segment_allocator.cc
src/crimson/os/seastore/journal/segment_allocator.h
src/crimson/os/seastore/journal/segmented_journal.cc
src/crimson/os/seastore/seastore_types.cc
src/crimson/os/seastore/seastore_types.h
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h
src/test/crimson/seastore/test_btree_lba_manager.cc
src/test/crimson/seastore/test_cbjournal.cc
src/test/crimson/seastore/test_seastore_cache.cc
src/test/crimson/seastore/test_seastore_journal.cc

index 2e069a81a307cf5cd891fd7d45469a4e4770c131..e39cee32e840baaec7a94a268ade529b165bfb29 100644 (file)
@@ -13,13 +13,18 @@ SET_SUBSYS(seastore_cleaner);
 namespace crimson::os::seastore {
 
 void segment_info_t::set_open(
-    segment_seq_t _seq, segment_type_t _type)
+    segment_seq_t _seq, segment_type_t _type,
+    data_category_t _category, reclaim_gen_t _generation)
 {
   ceph_assert(_seq != NULL_SEG_SEQ);
   ceph_assert(_type != segment_type_t::NULL_SEG);
+  ceph_assert(_category != data_category_t::NUM);
+  ceph_assert(_generation < RECLAIM_GENERATIONS);
   state = Segment::segment_state_t::OPEN;
   seq = _seq;
   type = _type;
+  category = _category;
+  generation = _generation;
   written_to = 0;
 }
 
@@ -28,6 +33,8 @@ void segment_info_t::set_empty()
   state = Segment::segment_state_t::EMPTY;
   seq = NULL_SEG_SEQ;
   type = segment_type_t::NULL_SEG;
+  category = data_category_t::NUM;
+  generation = NULL_GENERATION;
   last_modified = {};
   last_rewritten = {};
   written_to = 0;
@@ -40,13 +47,19 @@ void segment_info_t::set_closed()
 }
 
 void segment_info_t::init_closed(
-    segment_seq_t _seq, segment_type_t _type, std::size_t seg_size)
+    segment_seq_t _seq, segment_type_t _type,
+    data_category_t _category, reclaim_gen_t _generation,
+    std::size_t seg_size)
 {
   ceph_assert(_seq != NULL_SEG_SEQ);
   ceph_assert(_type != segment_type_t::NULL_SEG);
+  ceph_assert(_category != data_category_t::NUM);
+  ceph_assert(_generation < RECLAIM_GENERATIONS);
   state = Segment::segment_state_t::CLOSED;
   seq = _seq;
   type = _type;
+  category = _category;
+  generation = _generation;
   written_to = seg_size;
 }
 
@@ -59,6 +72,8 @@ std::ostream& operator<<(std::ostream &out, const segment_info_t &info)
   } else { // open or closed
     out << ", seq=" << segment_seq_printer_t{info.seq}
         << ", type=" << info.type
+        << ", category=" << info.category
+        << ", generation=" << reclaim_gen_printer_t{info.generation}
         << ", last_modified=" << info.last_modified.time_since_epoch()
         << ", last_rewritten=" << info.last_rewritten.time_since_epoch()
         << ", written_to=" << info.written_to;
@@ -124,15 +139,19 @@ void segments_info_t::add_segment_manager(
 }
 
 void segments_info_t::init_closed(
-    segment_id_t segment, segment_seq_t seq, segment_type_t type)
+    segment_id_t segment, segment_seq_t seq, segment_type_t type,
+    data_category_t category, reclaim_gen_t generation)
 {
   LOG_PREFIX(segments_info_t::init_closed);
   auto& segment_info = segments[segment];
-  INFO("initiating {} {} {}, {}, num_segments(empty={}, opened={}, closed={})",
+  INFO("initiating {} {} {} {} {}, {}, "
+       "num_segments(empty={}, opened={}, closed={})",
        segment, segment_seq_printer_t{seq}, type,
+       category, reclaim_gen_printer_t{generation},
        segment_info, num_empty, num_open, num_closed);
   ceph_assert(segment_info.is_empty());
-  segment_info.init_closed(seq, type, get_segment_size());
+  segment_info.init_closed(
+      seq, type, category, generation, get_segment_size());
   ceph_assert(num_empty > 0);
   --num_empty;
   ++num_closed;
@@ -147,15 +166,18 @@ void segments_info_t::init_closed(
 }
 
 void segments_info_t::mark_open(
-    segment_id_t segment, segment_seq_t seq, segment_type_t type)
+    segment_id_t segment, segment_seq_t seq, segment_type_t type,
+    data_category_t category, reclaim_gen_t generation)
 {
   LOG_PREFIX(segments_info_t::mark_open);
   auto& segment_info = segments[segment];
-  INFO("opening {} {} {}, {}, num_segments(empty={}, opened={}, closed={})",
+  INFO("opening {} {} {} {} {}, {}, "
+       "num_segments(empty={}, opened={}, closed={})",
        segment, segment_seq_printer_t{seq}, type,
+       category, reclaim_gen_printer_t{generation},
        segment_info, num_empty, num_open, num_closed);
   ceph_assert(segment_info.is_empty());
-  segment_info.set_open(seq, type);
+  segment_info.set_open(seq, type, category, generation);
   ceph_assert(num_empty > 0);
   --num_empty;
   ++num_open;
@@ -531,7 +553,9 @@ void AsyncCleaner::register_metrics()
 
 segment_id_t AsyncCleaner::allocate_segment(
     segment_seq_t seq,
-    segment_type_t type)
+    segment_type_t type,
+    data_category_t category,
+    reclaim_gen_t generation)
 {
   LOG_PREFIX(AsyncCleaner::allocate_segment);
   assert(seq != NULL_SEG_SEQ);
@@ -542,7 +566,7 @@ segment_id_t AsyncCleaner::allocate_segment(
     auto& segment_info = it->second;
     if (segment_info.is_empty()) {
       auto old_usage = calc_utilization(seg_id);
-      segments.mark_open(seg_id, seq, type);
+      segments.mark_open(seg_id, seq, type, category, generation);
       auto new_usage = calc_utilization(seg_id);
       adjust_segment_util(old_usage, new_usage);
       INFO("opened, should_block_on_gc {}, projected_avail_ratio {}, "
@@ -682,7 +706,7 @@ AsyncCleaner::rewrite_dirty_ret AsyncCleaner::rewrite_dirty(
          dirty_list,
          [this, FNAME, &t](auto &e) {
          DEBUGT("cleaning {}", t, *e);
-         return ecb->rewrite_extent(t, e);
+         return ecb->rewrite_extent(t, e, DIRTY_GENERATION);
        });
       });
   });
@@ -867,11 +891,12 @@ AsyncCleaner::gc_reclaim_space_ret AsyncCleaner::gc_reclaim_space()
     INFO("reclaim {} {} start", seg_id, segment_info);
     ceph_assert(segment_info.is_closed());
     reclaim_state = reclaim_state_t::create(
-        seg_id, segments.get_segment_size());
+        seg_id, segment_info.generation, segments.get_segment_size());
   }
   reclaim_state->advance(config.reclaim_bytes_per_cycle);
 
-  DEBUG("reclaiming {}~{}",
+  DEBUG("reclaiming {} {}~{}",
+        reclaim_gen_printer_t{reclaim_state->generation},
         reclaim_state->start_pos,
         reclaim_state->end_pos);
   double pavail_ratio = get_projected_available_ratio();
@@ -965,7 +990,7 @@ AsyncCleaner::gc_reclaim_space_ret AsyncCleaner::gc_reclaim_space()
                    extents,
                    [this, &t, &reclaimed](auto &ext) {
                    reclaimed += ext->get_length();
-                   return ecb->rewrite_extent(t, ext);
+                   return ecb->rewrite_extent(t, ext, reclaim_state->target_generation);
                  });
                });
              }).si_then([this, &t, &seq] {
@@ -1074,7 +1099,9 @@ AsyncCleaner::mount_ret AsyncCleaner::mount()
            init_mark_segment_closed(
              segment_id,
              header.segment_seq,
-             header.type);
+             header.type,
+             header.category,
+             header.generation);
            return seastar::now();
          }).handle_error(
            crimson::ct_error::enodata::handle(
@@ -1179,7 +1206,9 @@ AsyncCleaner::scan_extents_ret AsyncCleaner::scan_nonfull_segment(
     init_mark_segment_closed(
       segment_id,
       header.segment_seq,
-      header.type);
+      header.type,
+      header.category,
+      header.generation);
     return seastar::now();
   });
 }
index 0f2ded6bc4351a6cf05d6e8c92cf8dbc8e3b39e5..228b8b3515de9fbab8023f56e78acd97d55ea17a 100644 (file)
@@ -37,6 +37,10 @@ struct segment_info_t {
 
   segment_type_t type = segment_type_t::NULL_SEG;
 
+  data_category_t category = data_category_t::NUM;
+
+  reclaim_gen_t generation = NULL_GENERATION;
+
   time_point last_modified;
   time_point last_rewritten;
 
@@ -59,9 +63,12 @@ struct segment_info_t {
     return state == Segment::segment_state_t::OPEN;
   }
 
-  void init_closed(segment_seq_t, segment_type_t, std::size_t);
+  void init_closed(segment_seq_t, segment_type_t,
+                   data_category_t, reclaim_gen_t,
+                   std::size_t);
 
-  void set_open(segment_seq_t, segment_type_t);
+  void set_open(segment_seq_t, segment_type_t,
+                data_category_t, reclaim_gen_t);
 
   void set_empty();
 
@@ -190,9 +197,11 @@ public:
   void add_segment_manager(SegmentManager &segment_manager);
 
   // initiate non-empty segments, the others are by default empty
-  void init_closed(segment_id_t, segment_seq_t, segment_type_t);
+  void init_closed(segment_id_t, segment_seq_t, segment_type_t,
+                   data_category_t, reclaim_gen_t);
 
-  void mark_open(segment_id_t, segment_seq_t, segment_type_t);
+  void mark_open(segment_id_t, segment_seq_t, segment_type_t,
+                 data_category_t, reclaim_gen_t);
 
   void mark_empty(segment_id_t);
 
@@ -241,7 +250,7 @@ public:
   virtual const segment_info_t& get_seg_info(segment_id_t id) const = 0;
 
   virtual segment_id_t allocate_segment(
-      segment_seq_t seq, segment_type_t type) = 0;
+      segment_seq_t, segment_type_t, data_category_t, reclaim_gen_t) = 0;
 
   virtual journal_seq_t get_dirty_extents_replay_from() const = 0;
 
@@ -597,7 +606,8 @@ public:
     using rewrite_extent_ret = rewrite_extent_iertr::future<>;
     virtual rewrite_extent_ret rewrite_extent(
       Transaction &t,
-      CachedExtentRef extent) = 0;
+      CachedExtentRef extent,
+      reclaim_gen_t target_generation) = 0;
 
     /**
      * get_extent_if_live
@@ -739,7 +749,7 @@ public:
   }
 
   segment_id_t allocate_segment(
-      segment_seq_t seq, segment_type_t type) final;
+      segment_seq_t, segment_type_t, data_category_t, reclaim_gen_t) final;
 
   void close_segment(segment_id_t segment) final;
 
@@ -935,14 +945,21 @@ private:
   }
 
   struct reclaim_state_t {
+    reclaim_gen_t generation;
+    reclaim_gen_t target_generation;
     std::size_t segment_size;
     paddr_t start_pos;
     paddr_t end_pos;
 
     static reclaim_state_t create(
         segment_id_t segment_id,
+        reclaim_gen_t generation,
         std::size_t segment_size) {
-      return {segment_size,
+      ceph_assert(generation < RECLAIM_GENERATIONS);
+      return {generation,
+              (reclaim_gen_t)(generation == RECLAIM_GENERATIONS - 1 ?
+                              generation : generation + 1),
+              segment_size,
               P_ADDR_NULL,
               paddr_t::make_seg_paddr(segment_id, 0)};
     }
@@ -1280,10 +1297,12 @@ private:
   void init_mark_segment_closed(
       segment_id_t segment,
       segment_seq_t seq,
-      segment_type_t s_type) {
+      segment_type_t s_type,
+      data_category_t category,
+      reclaim_gen_t generation) {
     ceph_assert(!init_complete);
     auto old_usage = calc_utilization(segment);
-    segments.init_closed(segment, seq, s_type);
+    segments.init_closed(segment, seq, s_type, category, generation);
     auto new_usage = calc_utilization(segment);
     adjust_segment_util(old_usage, new_usage);
     if (s_type == segment_type_t::OOL) {
index a448fb0c2c9c67a441cc09ff522d312eac330a4d..209c49d79fd0c8436449438c3d43458cd48f9333 100644 (file)
@@ -313,7 +313,9 @@ public:
   static mkfs_ret mkfs(op_context_t<node_key_t> c) {
     auto root_leaf = c.cache.template alloc_new_extent<leaf_node_t>(
       c.trans,
-      node_size);
+      node_size,
+      placement_hint_t::HOT,
+      0);
     root_leaf->set_size(0);
     fixed_kv_node_meta_t<node_key_t> meta{min_max_t<node_key_t>::min, min_max_t<node_key_t>::max, 1};
     root_leaf->set_meta(meta);
@@ -814,7 +816,9 @@ public:
         std::remove_reference_t<decltype(fixed_kv_extent)>
         >(
         c.trans,
-        fixed_kv_extent.get_length());
+        fixed_kv_extent.get_length(),
+        fixed_kv_extent.get_user_hint(),
+        fixed_kv_extent.get_reclaim_generation());
       fixed_kv_extent.get_bptr().copy_out(
         0,
         fixed_kv_extent.get_length(),
@@ -1400,7 +1404,7 @@ private:
 
     if (split_from == iter.get_depth()) {
       auto nroot = c.cache.template alloc_new_extent<internal_node_t>(
-        c.trans, node_size);
+        c.trans, node_size, placement_hint_t::HOT, 0);
       fixed_kv_node_meta_t<node_key_t> meta{
         min_max_t<node_key_t>::min, min_max_t<node_key_t>::max, iter.get_depth() + 1};
       nroot->set_meta(meta);
index c48d3c0e0ff88eeceb67512cfaadaa985f68de72..5673de2512f785a38c27f194cc5bc1c4790c3b09 100644 (file)
@@ -154,9 +154,9 @@ struct FixedKVInternalNode
   std::tuple<Ref, Ref, NODE_KEY>
   make_split_children(op_context_t<NODE_KEY> c) {
     auto left = c.cache.template alloc_new_extent<node_type_t>(
-      c.trans, node_size);
+      c.trans, node_size, placement_hint_t::HOT, 0);
     auto right = c.cache.template alloc_new_extent<node_type_t>(
-      c.trans, node_size);
+      c.trans, node_size, placement_hint_t::HOT, 0);
     auto pivot = this->split_into(*left, *right);
     left->pin.set_range(left->get_meta());
     right->pin.set_range(right->get_meta());
@@ -170,7 +170,7 @@ struct FixedKVInternalNode
     op_context_t<NODE_KEY> c,
     Ref &right) {
     auto replacement = c.cache.template alloc_new_extent<node_type_t>(
-      c.trans, node_size);
+      c.trans, node_size, placement_hint_t::HOT, 0);
     replacement->merge_from(*this, *right->template cast<node_type_t>());
     replacement->pin.set_range(replacement->get_meta());
     return replacement;
@@ -184,9 +184,9 @@ struct FixedKVInternalNode
     ceph_assert(_right->get_type() == this->get_type());
     auto &right = *_right->template cast<node_type_t>();
     auto replacement_left = c.cache.template alloc_new_extent<node_type_t>(
-      c.trans, node_size);
+      c.trans, node_size, placement_hint_t::HOT, 0);
     auto replacement_right = c.cache.template alloc_new_extent<node_type_t>(
-      c.trans, node_size);
+      c.trans, node_size, placement_hint_t::HOT, 0);
 
     auto pivot = this->balance_into_new_nodes(
       *this,
@@ -355,9 +355,9 @@ struct FixedKVLeafNode
   std::tuple<Ref, Ref, NODE_KEY>
   make_split_children(op_context_t<NODE_KEY> c) {
     auto left = c.cache.template alloc_new_extent<node_type_t>(
-      c.trans, node_size);
+      c.trans, node_size, placement_hint_t::HOT, 0);
     auto right = c.cache.template alloc_new_extent<node_type_t>(
-      c.trans, node_size);
+      c.trans, node_size, placement_hint_t::HOT, 0);
     auto pivot = this->split_into(*left, *right);
     left->pin.set_range(left->get_meta());
     right->pin.set_range(right->get_meta());
@@ -371,7 +371,7 @@ struct FixedKVLeafNode
     op_context_t<NODE_KEY> c,
     Ref &right) {
     auto replacement = c.cache.template alloc_new_extent<node_type_t>(
-      c.trans, node_size);
+      c.trans, node_size, placement_hint_t::HOT, 0);
     replacement->merge_from(*this, *right->template cast<node_type_t>());
     replacement->pin.set_range(replacement->get_meta());
     return replacement;
@@ -385,9 +385,9 @@ struct FixedKVLeafNode
     ceph_assert(_right->get_type() == this->get_type());
     auto &right = *_right->template cast<node_type_t>();
     auto replacement_left = c.cache.template alloc_new_extent<node_type_t>(
-      c.trans, node_size);
+      c.trans, node_size, placement_hint_t::HOT, 0);
     auto replacement_right = c.cache.template alloc_new_extent<node_type_t>(
-      c.trans, node_size);
+      c.trans, node_size, placement_hint_t::HOT, 0);
 
     auto pivot = this->balance_into_new_nodes(
       *this,
index dc7fdffa43ef1e9944a7345d2926ce284dcb7184..fe676990f155c6d284ac8ae67a18a3fbf9db27e5 100644 (file)
@@ -85,8 +85,10 @@ Cache::retire_extent_ret Cache::retire_extent_addr(
     // add a new placeholder to Cache
     ext = CachedExtent::make_cached_extent_ref<
       RetiredExtentPlaceholder>(length);
-    ext->set_paddr(addr);
-    ext->state = CachedExtent::extent_state_t::CLEAN;
+    ext->init(CachedExtent::extent_state_t::CLEAN,
+              addr,
+              placement_hint_t::NUM_HINTS,
+              NULL_GENERATION);
     DEBUGT("retire {}~{} as placeholder, add extent -- {}",
            t, addr, length, *ext);
     add_extent(ext);
@@ -924,40 +926,41 @@ void Cache::on_transaction_destruct(Transaction& t)
 }
 
 CachedExtentRef Cache::alloc_new_extent_by_type(
-  Transaction &t,       ///< [in, out] current transaction
-  extent_types_t type,  ///< [in] type tag
+  Transaction &t,        ///< [in, out] current transaction
+  extent_types_t type,   ///< [in] type tag
   seastore_off_t length, ///< [in] length
-  placement_hint_t hint
+  placement_hint_t hint, ///< [in] user hint
+  reclaim_gen_t gen      ///< [in] reclaim generation
 )
 {
   LOG_PREFIX(Cache::alloc_new_extent_by_type);
-  SUBDEBUGT(seastore_cache, "allocate {} {}B, hint={}",
-            t, type, length, hint);
+  SUBDEBUGT(seastore_cache, "allocate {} {}B, hint={}, gen={}",
+            t, type, length, hint, reclaim_gen_printer_t{gen});
   switch (type) {
   case extent_types_t::ROOT:
     ceph_assert(0 == "ROOT is never directly alloc'd");
     return CachedExtentRef();
   case extent_types_t::LADDR_INTERNAL:
-    return alloc_new_extent<lba_manager::btree::LBAInternalNode>(t, length, hint);
+    return alloc_new_extent<lba_manager::btree::LBAInternalNode>(t, length, hint, gen);
   case extent_types_t::LADDR_LEAF:
-    return alloc_new_extent<lba_manager::btree::LBALeafNode>(t, length, hint);
+    return alloc_new_extent<lba_manager::btree::LBALeafNode>(t, length, hint, gen);
   case extent_types_t::ONODE_BLOCK_STAGED:
-    return alloc_new_extent<onode::SeastoreNodeExtent>(t, length, hint);
+    return alloc_new_extent<onode::SeastoreNodeExtent>(t, length, hint, gen);
   case extent_types_t::OMAP_INNER:
-    return alloc_new_extent<omap_manager::OMapInnerNode>(t, length, hint);
+    return alloc_new_extent<omap_manager::OMapInnerNode>(t, length, hint, gen);
   case extent_types_t::OMAP_LEAF:
-    return alloc_new_extent<omap_manager::OMapLeafNode>(t, length, hint);
+    return alloc_new_extent<omap_manager::OMapLeafNode>(t, length, hint, gen);
   case extent_types_t::COLL_BLOCK:
-    return alloc_new_extent<collection_manager::CollectionNode>(t, length, hint);
+    return alloc_new_extent<collection_manager::CollectionNode>(t, length, hint, gen);
   case extent_types_t::OBJECT_DATA_BLOCK:
-    return alloc_new_extent<ObjectDataBlock>(t, length, hint);
+    return alloc_new_extent<ObjectDataBlock>(t, length, hint, gen);
   case extent_types_t::RETIRED_PLACEHOLDER:
     ceph_assert(0 == "impossible");
     return CachedExtentRef();
   case extent_types_t::TEST_BLOCK:
-    return alloc_new_extent<TestBlock>(t, length, hint);
+    return alloc_new_extent<TestBlock>(t, length, hint, gen);
   case extent_types_t::TEST_BLOCK_PHYSICAL:
-    return alloc_new_extent<TestBlockPhysical>(t, length, hint);
+    return alloc_new_extent<TestBlockPhysical>(t, length, hint, gen);
   case extent_types_t::NONE: {
     ceph_assert(0 == "NONE is an invalid extent type");
     return CachedExtentRef();
@@ -986,6 +989,7 @@ CachedExtentRef Cache::duplicate_for_write(
 
   ret->version++;
   ret->state = CachedExtent::extent_state_t::MUTATION_PENDING;
+  ret->set_reclaim_generation(DIRTY_GENERATION);
   DEBUGT("{} -> {}", t, *i, *ret);
   return ret;
 }
index 3560406c4ebb348c7a2050beb8af7ede806c20f9..872fc4dfdefad5fcf682fd3ab967a7f40814bee2 100644 (file)
@@ -303,8 +303,10 @@ public:
     if (!cached) {
       auto ret = CachedExtent::make_cached_extent_ref<T>(
         alloc_cache_buf(length));
-      ret->set_paddr(offset);
-      ret->state = CachedExtent::extent_state_t::CLEAN_PENDING;
+      ret->init(CachedExtent::extent_state_t::CLEAN_PENDING,
+                offset,
+                placement_hint_t::NUM_HINTS,
+                NULL_GENERATION);
       SUBDEBUG(seastore_cache,
           "{} {}~{} is absent, add extent and reading ... -- {}",
           T::TYPE, offset, length, *ret);
@@ -319,8 +321,10 @@ public:
     if (cached->get_type() == extent_types_t::RETIRED_PLACEHOLDER) {
       auto ret = CachedExtent::make_cached_extent_ref<T>(
         alloc_cache_buf(length));
-      ret->set_paddr(offset);
-      ret->state = CachedExtent::extent_state_t::CLEAN_PENDING;
+      ret->init(CachedExtent::extent_state_t::CLEAN_PENDING,
+                offset,
+                placement_hint_t::NUM_HINTS,
+                NULL_GENERATION);
       SUBDEBUG(seastore_cache,
           "{} {}~{} is absent(placeholder), reading ... -- {}",
           T::TYPE, offset, length, *ret);
@@ -681,19 +685,23 @@ public:
   TCachedExtentRef<T> alloc_new_extent(
     Transaction &t,         ///< [in, out] current transaction
     seastore_off_t length,  ///< [in] length
-    placement_hint_t hint = placement_hint_t::HOT
+    placement_hint_t hint,  ///< [in] user hint
+    reclaim_gen_t gen       ///< [in] reclaim generation
   ) {
     LOG_PREFIX(Cache::alloc_new_extent);
-    SUBTRACET(seastore_cache, "allocate {} {}B, hint={}",
-              t, T::TYPE, length, hint);
-    auto result = epm.alloc_new_extent(t, T::TYPE, length, hint);
+    SUBTRACET(seastore_cache, "allocate {} {}B, hint={}, gen={}",
+              t, T::TYPE, length, hint, reclaim_gen_printer_t{gen});
+    auto result = epm.alloc_new_extent(t, T::TYPE, length, hint, gen);
     auto ret = CachedExtent::make_cached_extent_ref<T>(std::move(result.bp));
-    ret->set_paddr(result.paddr);
-    ret->hint = hint;
-    ret->state = CachedExtent::extent_state_t::INITIAL_WRITE_PENDING;
+    ret->init(CachedExtent::extent_state_t::INITIAL_WRITE_PENDING,
+              result.paddr,
+              hint,
+              result.gen);
     t.add_fresh_extent(ret);
-    SUBDEBUGT(seastore_cache, "allocated {} {}B extent at {}, hint={} -- {}",
-              t, T::TYPE, length, result.paddr, hint, *ret);
+    SUBDEBUGT(seastore_cache,
+              "allocated {} {}B extent at {}, hint={}, gen={} -- {}",
+              t, T::TYPE, length, result.paddr,
+              hint, reclaim_gen_printer_t{result.gen}, *ret);
     return ret;
   }
 
@@ -703,10 +711,11 @@ public:
    * Allocates a fresh extent.  addr will be relative until commit.
    */
   CachedExtentRef alloc_new_extent_by_type(
-    Transaction &t,       ///< [in, out] current transaction
-    extent_types_t type,  ///< [in] type tag
+    Transaction &t,        ///< [in, out] current transaction
+    extent_types_t type,   ///< [in] type tag
     seastore_off_t length, ///< [in] length
-    placement_hint_t hint = placement_hint_t::HOT
+    placement_hint_t hint, ///< [in] user hint
+    reclaim_gen_t gen      ///< [in] reclaim generation
     );
 
   /**
index ec4c9ba0d9cb9c45311cc421c62487c766aa857d..867cf82727cb245558cf46dec637c56bcb2183ea 100644 (file)
@@ -105,7 +105,17 @@ class CachedExtent : public boost::intrusive_ref_counter<
 
   // time of the last rewrite
   seastar::lowres_system_clock::time_point last_rewritten;
+
 public:
+  void init(extent_state_t _state,
+            paddr_t paddr,
+            placement_hint_t hint,
+            reclaim_gen_t gen) {
+    state = _state;
+    set_paddr(paddr);
+    user_hint = hint;
+    reclaim_generation = gen;
+  }
 
   void set_last_modified(seastar::lowres_system_clock::duration d) {
     last_modified = seastar::lowres_system_clock::time_point(d);
@@ -209,7 +219,9 @@ public:
        << ", length=" << get_length()
        << ", state=" << state
        << ", last_committed_crc=" << last_committed_crc
-       << ", refcount=" << use_count();
+       << ", refcount=" << use_count()
+       << ", user_hint=" << user_hint
+       << ", reclaim_gen=" << reclaim_generation;
     if (state != extent_state_t::INVALID &&
         state != extent_state_t::CLEAN_PENDING) {
       print_detail(out);
@@ -374,8 +386,24 @@ public:
 
   virtual ~CachedExtent();
 
-  /// hint for allocators
-  placement_hint_t hint = placement_hint_t::NUM_HINTS;
+  placement_hint_t get_user_hint() const {
+    return user_hint;
+  }
+
+  reclaim_gen_t get_reclaim_generation() const {
+    return reclaim_generation;
+  }
+
+  void invalidate_hints() {
+    user_hint = placement_hint_t::NUM_HINTS;
+    reclaim_generation = NULL_GENERATION;
+  }
+
+  void set_reclaim_generation(reclaim_gen_t gen) {
+    assert(gen < RECLAIM_GENERATIONS);
+    user_hint = placement_hint_t::REWRITE;
+    reclaim_generation = gen;
+  }
 
   bool is_inline() const {
     return poffset.is_relative();
@@ -454,6 +482,11 @@ private:
 
   read_set_item_t<Transaction>::list transactions;
 
+  placement_hint_t user_hint;
+
+  /// > 0 and not null means the extent is under reclaimming
+  reclaim_gen_t reclaim_generation;
+
 protected:
   CachedExtent(CachedExtent &&other) = delete;
   CachedExtent(ceph::bufferptr &&ptr) : ptr(std::move(ptr)) {}
index c16fe645923838fa2013b07e9ab5317b8c3f7fca..e1fb033b68b34e7ba8338a9cf0bbab13030d7d06 100644 (file)
@@ -10,10 +10,11 @@ SET_SUBSYS(seastore_journal);
 namespace crimson::os::seastore {
 
 SegmentedOolWriter::SegmentedOolWriter(
-  std::string name,
+  data_category_t category,
+  reclaim_gen_t gen,
   SegmentProvider& sp,
   SegmentSeqAllocator &ssa)
-  : segment_allocator(name, segment_type_t::OOL, sp, ssa),
+  : segment_allocator(segment_type_t::OOL, category, gen, sp, ssa),
     record_submitter(crimson::common::get_conf<uint64_t>(
                        "seastore_journal_iodepth_limit"),
                      crimson::common::get_conf<uint64_t>(
@@ -55,7 +56,7 @@ SegmentedOolWriter::write_record(
       TRACET("{} ool extent written at {} -- {}",
              t, segment_allocator.get_name(),
              extent_addr, *extent);
-      extent->hint = placement_hint_t::NUM_HINTS; // invalidate hint
+      extent->invalidate_hints();
       t.mark_delayed_extent_ool(extent, extent_addr);
       extent_addr = extent_addr.as_seg_paddr().add_offset(
           extent->get_length());
index d5b66ea42b6ce82be260fefa8374658ebba5d7b4..d03c1eb5e4219d2c82d8b3aa2884a4a3210ee765 100644 (file)
@@ -48,7 +48,8 @@ class SegmentProvider;
  */
 class SegmentedOolWriter : public ExtentOolWriter {
 public:
-  SegmentedOolWriter(std::string name,
+  SegmentedOolWriter(data_category_t category,
+                     reclaim_gen_t gen,
                      SegmentProvider &sp,
                      SegmentSeqAllocator &ssa);
 
@@ -85,26 +86,29 @@ private:
 
 class ExtentPlacementManager {
 public:
-  ExtentPlacementManager() {
+  ExtentPlacementManager(bool prefer_ool)
+    : prefer_ool{prefer_ool} {
     devices_by_id.resize(DEVICE_ID_GLOBAL_MAX, nullptr);
   }
 
   void init_ool_writers(SegmentProvider &sp, SegmentSeqAllocator &ssa) {
-    // Currently only one SegmentProvider is supported, so hardcode the
-    // writers_by_hint for now.
-    writer_seed = 0;
+    // Currently only one SegmentProvider is supported
     writer_refs.clear();
-    writers_by_hint.resize((std::size_t)placement_hint_t::NUM_HINTS, {});
-
-    // ool writer is not supported for placement_hint_t::HOT
-    writer_refs.emplace_back(
-        std::make_unique<SegmentedOolWriter>("COLD", sp, ssa));
-    writers_by_hint[(std::size_t)placement_hint_t::COLD
-                   ].emplace_back(writer_refs.back().get());
-    writer_refs.emplace_back(
-        std::make_unique<SegmentedOolWriter>("REWRITE", sp, ssa));
-    writers_by_hint[(std::size_t)placement_hint_t::REWRITE
-                   ].emplace_back(writer_refs.back().get());
+
+    ceph_assert(RECLAIM_GENERATIONS > 0);
+    data_writers_by_gen.resize(RECLAIM_GENERATIONS, {});
+    for (reclaim_gen_t gen = 0; gen < RECLAIM_GENERATIONS; ++gen) {
+      writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(
+            data_category_t::DATA, gen, sp, ssa));
+      data_writers_by_gen[gen] = writer_refs.back().get();
+    }
+
+    md_writers_by_gen.resize(RECLAIM_GENERATIONS - 1, {});
+    for (reclaim_gen_t gen = 1; gen < RECLAIM_GENERATIONS; ++gen) {
+      writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(
+            data_category_t::METADATA, gen, sp, ssa));
+      md_writers_by_gen[gen - 1] = writer_refs.back().get();
+    }
   }
 
   void add_device(Device* device, bool is_primary) {
@@ -132,8 +136,10 @@ public:
   open_ertr::future<> open() {
     LOG_PREFIX(ExtentPlacementManager::open);
     SUBINFO(seastore_journal, "started");
-    return crimson::do_for_each(writers_by_hint, [](auto& writers) {
-      return crimson::do_for_each(writers, [](auto& writer) {
+    return crimson::do_for_each(data_writers_by_gen, [](auto &writer) {
+      return writer->open();
+    }).safe_then([this] {
+      return crimson::do_for_each(md_writers_by_gen, [](auto &writer) {
         return writer->open();
       });
     });
@@ -142,14 +148,18 @@ public:
   struct alloc_result_t {
     paddr_t paddr;
     bufferptr bp;
+    reclaim_gen_t gen;
   };
   alloc_result_t alloc_new_extent(
     Transaction& t,
     extent_types_t type,
     seastore_off_t length,
-    placement_hint_t hint
+    placement_hint_t hint,
+    reclaim_gen_t gen
   ) {
     assert(hint < placement_hint_t::NUM_HINTS);
+    assert(gen < RECLAIM_GENERATIONS);
+    assert(gen == 0 || hint == placement_hint_t::REWRITE);
 
     // XXX: bp might be extended to point to differnt memory (e.g. PMem)
     // according to the allocator.
@@ -160,19 +170,35 @@ public:
     if (!is_logical_type(type)) {
       // TODO: implement out-of-line strategy for physical extent.
       return {make_record_relative_paddr(0),
-              std::move(bp)};
+              std::move(bp),
+              0};
     }
 
-    // FIXME: set delay for COLD extent and improve GC
-    // NOTE: delay means to delay the decision about whether to write the
-    // extent as inline or out-of-line extents.
-    bool delay = (hint > placement_hint_t::COLD);
-    if (delay) {
+    if (hint == placement_hint_t::COLD) {
+      assert(gen == 0);
       return {make_delayed_temp_paddr(0),
-              std::move(bp)};
+              std::move(bp),
+              COLD_GENERATION};
+    }
+
+    if (get_extent_category(type) == data_category_t::METADATA &&
+        gen == 0) {
+      // gen 0 METADATA writer is the journal writer
+      if (prefer_ool) {
+        return {make_delayed_temp_paddr(0),
+                std::move(bp),
+                1};
+      } else {
+        return {make_record_relative_paddr(0),
+                std::move(bp),
+                0};
+      }
     } else {
-      return {make_record_relative_paddr(0),
-              std::move(bp)};
+      assert(get_extent_category(type) == data_category_t::DATA ||
+             gen > 0);
+      return {make_delayed_temp_paddr(0),
+              std::move(bp),
+              gen};
     }
   }
 
@@ -193,7 +219,10 @@ public:
         [this, &t, &delayed_extents](auto& alloc_map) {
       for (auto& extent : delayed_extents) {
         // For now, just do ool allocation for any delayed extent
-        auto writer_ptr = get_writer(extent->hint);
+        auto writer_ptr = get_writer(
+            extent->get_user_hint(),
+            get_extent_category(extent->get_type()),
+            extent->get_reclaim_generation());
         alloc_map[writer_ptr].emplace_back(extent);
       }
       return trans_intr::do_for_each(alloc_map, [&t](auto& p) {
@@ -208,8 +237,10 @@ public:
   close_ertr::future<> close() {
     LOG_PREFIX(ExtentPlacementManager::close);
     SUBINFO(seastore_journal, "started");
-    return crimson::do_for_each(writers_by_hint, [](auto& writers) {
-      return crimson::do_for_each(writers, [](auto& writer) {
+    return crimson::do_for_each(data_writers_by_gen, [](auto &writer) {
+      return writer->close();
+    }).safe_then([this] {
+      return crimson::do_for_each(md_writers_by_gen, [](auto &writer) {
         return writer->close();
       });
     }).safe_then([this] {
@@ -230,18 +261,27 @@ public:
   }
 
 private:
-  ExtentOolWriter* get_writer(placement_hint_t hint) {
+  ExtentOolWriter* get_writer(placement_hint_t hint,
+                              data_category_t category,
+                              reclaim_gen_t gen) {
     assert(hint < placement_hint_t::NUM_HINTS);
-    auto hint_index = static_cast<std::size_t>(hint);
-    assert(hint_index < writers_by_hint.size());
-    auto& writers = writers_by_hint[hint_index];
-    assert(writers.size() > 0);
-    return writers[writer_seed++ % writers.size()];
+    assert(gen < RECLAIM_GENERATIONS);
+    if (category == data_category_t::DATA) {
+      return data_writers_by_gen[gen];
+    } else {
+      assert(category == data_category_t::METADATA);
+      // gen 0 METADATA writer is the journal writer
+      assert(gen > 0);
+      return md_writers_by_gen[gen - 1];
+    }
   }
 
-  std::size_t writer_seed = 0;
+  bool prefer_ool;
   std::vector<ExtentOolWriterRef> writer_refs;
-  std::vector<std::vector<ExtentOolWriter*>> writers_by_hint;
+  std::vector<ExtentOolWriter*> data_writers_by_gen;
+  // gen 0 METADATA writer is the journal writer
+  std::vector<ExtentOolWriter*> md_writers_by_gen;
+
   std::vector<Device*> devices_by_id;
   Device* primary_device = nullptr;
 };
index 2716228531d4388e6853253b4b797bf39d297d88..b1ae37d4d54543076ff4e4c2b927fbc264c690c1 100644 (file)
@@ -13,13 +13,15 @@ SET_SUBSYS(seastore_journal);
 namespace crimson::os::seastore::journal {
 
 SegmentAllocator::SegmentAllocator(
-  std::string name,
   segment_type_t type,
+  data_category_t category,
+  reclaim_gen_t gen,
   SegmentProvider &sp,
   SegmentSeqAllocator &ssa)
-  : name{name},
-    print_name{fmt::format("D?_{}", name)},
+  : print_name{fmt::format("{}_G{}", category, gen)},
     type{type},
+    category{category},
+    gen{gen},
     segment_provider{sp},
     sm_group{*sp.get_segment_manager_group()},
     segment_seq_allocator(ssa)
@@ -40,7 +42,8 @@ SegmentAllocator::do_open()
     new_segment_seq,
     reinterpret_cast<const unsigned char *>(meta.seastore_id.bytes()),
     sizeof(meta.seastore_id.uuid));
-  auto new_segment_id = segment_provider.allocate_segment(new_segment_seq, type);
+  auto new_segment_id = segment_provider.allocate_segment(
+      new_segment_seq, type, category, gen);
   ceph_assert(new_segment_id != NULL_SEG_ID);
   return sm_group.open(new_segment_id
   ).handle_error(
@@ -66,7 +69,9 @@ SegmentAllocator::do_open()
       new_journal_tail,
       new_alloc_replay_from,
       current_segment_nonce,
-      type};
+      type,
+      category,
+      gen};
     INFO("{} writing header to new segment ... -- {}",
          print_name, header);
 
@@ -124,7 +129,8 @@ SegmentAllocator::open()
   for (auto& device_id : device_ids) {
     oss << "_" << device_id_printer_t{device_id};
   }
-  oss << "_" << name;
+  oss << "_"
+      << fmt::format("{}_G{}", category, gen);
   print_name = oss.str();
 
   INFO("{}", print_name);
index b5a583e998cb561cd8e2d8c7f2003cf0c897528d..5651ae9c0979baabd62feac3ffe5393a1de18a43 100644 (file)
@@ -30,8 +30,9 @@ class SegmentAllocator {
       crimson::ct_error::input_output_error>;
 
  public:
-  SegmentAllocator(std::string name,
-                   segment_type_t type,
+  SegmentAllocator(segment_type_t type,
+                   data_category_t category,
+                   reclaim_gen_t gen,
                    SegmentProvider &sp,
                    SegmentSeqAllocator &ssa);
 
@@ -111,11 +112,12 @@ class SegmentAllocator {
   using close_segment_ertr = base_ertr;
   close_segment_ertr::future<> close_segment();
 
-  const std::string name;
   // device id is not available during construction,
   // so generate the print_name later.
   std::string print_name;
   const segment_type_t type; // JOURNAL or OOL
+  const data_category_t category;
+  const reclaim_gen_t gen;
   SegmentProvider &segment_provider;
   SegmentManagerGroup &sm_group;
   SegmentRef current_segment;
index f9c979df05dfdfbd5c1c1a90d302330c9c5975f9..dcb6120439f33c459e1a2793e31ffe3b82b74205 100644 (file)
@@ -31,8 +31,9 @@ SegmentedJournal::SegmentedJournal(
   : segment_provider(segment_provider),
     segment_seq_allocator(
       new SegmentSeqAllocator(segment_type_t::JOURNAL)),
-    journal_segment_allocator("JOURNAL",
-                              segment_type_t::JOURNAL,
+    journal_segment_allocator(segment_type_t::JOURNAL,
+                              data_category_t::METADATA,
+                              0, // generation
                               segment_provider,
                               *segment_seq_allocator),
     record_submitter(crimson::common::get_conf<uint64_t>(
index 52e8a0b6e96c6c8b02f690a2588b084b2ad80553..8a483b0edc867b232fd4fc014d4d8cccc93dff18 100644 (file)
@@ -173,6 +173,29 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t)
   }
 }
 
+std::ostream &operator<<(std::ostream &out, reclaim_gen_printer_t gen)
+{
+  if (gen.gen == NULL_GENERATION) {
+    return out << "NULL_GEN";
+  } else if (gen.gen >= RECLAIM_GENERATIONS) {
+    return out << "INVALID_GEN(" << (unsigned)gen.gen << ")";
+  } else {
+    return out << "GEN(" << (unsigned)gen.gen << ")";
+  }
+}
+
+std::ostream &operator<<(std::ostream &out, data_category_t c)
+{
+  switch (c) {
+    case data_category_t::METADATA:
+      return out << "MD";
+    case data_category_t::DATA:
+      return out << "DATA";
+    default:
+      return out << "INVALID_CATEGORY!";
+  }
+}
+
 std::ostream &operator<<(std::ostream &out, const laddr_list_t &rhs)
 {
   bool first = false;
@@ -224,6 +247,8 @@ std::ostream &operator<<(std::ostream &out, const segment_header_t &header)
             << ", journal_tail=" << header.journal_tail
             << ", segment_nonce=" << header.segment_nonce
             << ", type=" << header.type
+            << ", category=" << header.category
+            << ", generaton=" << (unsigned)header.generation
             << ")";
 }
 
index f7f2e97de64bdcf8a5acacb9db8fc3abbb360436..146df569c96a2924adaa4d251f8d4c36bc8b47f1 100644 (file)
@@ -757,9 +757,9 @@ constexpr objaddr_t OBJ_ADDR_MAX = std::numeric_limits<objaddr_t>::max();
 constexpr objaddr_t OBJ_ADDR_NULL = OBJ_ADDR_MAX;
 
 enum class placement_hint_t {
-  HOT = 0,   // Most of the metadata
-  COLD,      // Object data
-  REWRITE,   // Cold metadata and data (probably need further splits)
+  HOT = 0,   // The default user hint that expects mutations or retirement
+  COLD,      // Expect no mutations and no retirement in the near future
+  REWRITE,   // Hint for the internal rewrites
   NUM_HINTS  // Constant for number of hints
 };
 
@@ -973,6 +973,37 @@ constexpr bool is_backref_node(extent_types_t type)
 
 std::ostream &operator<<(std::ostream &out, extent_types_t t);
 
+using reclaim_gen_t = uint8_t;
+
+constexpr reclaim_gen_t DIRTY_GENERATION = 1;
+constexpr reclaim_gen_t COLD_GENERATION = 1;
+constexpr reclaim_gen_t RECLAIM_GENERATIONS = 3;
+constexpr reclaim_gen_t NULL_GENERATION =
+  std::numeric_limits<reclaim_gen_t>::max();
+
+struct reclaim_gen_printer_t {
+  reclaim_gen_t gen;
+};
+
+std::ostream &operator<<(std::ostream &out, reclaim_gen_printer_t gen);
+
+enum class data_category_t : uint8_t {
+  METADATA = 0,
+  DATA,
+  NUM
+};
+
+std::ostream &operator<<(std::ostream &out, data_category_t c);
+
+constexpr data_category_t get_extent_category(extent_types_t type) {
+  if (type == extent_types_t::OBJECT_DATA_BLOCK ||
+      type == extent_types_t::COLL_BLOCK) {
+    return data_category_t::DATA;
+  } else {
+    return data_category_t::METADATA;
+  }
+}
+
 enum class record_commit_type_t : uint8_t {
   NONE,
   MODIFY,
@@ -1419,6 +1450,9 @@ struct segment_header_t {
 
   segment_type_t type;
 
+  data_category_t category;
+  reclaim_gen_t generation;
+
   segment_type_t get_type() const {
     return type;
   }
@@ -1431,6 +1465,8 @@ struct segment_header_t {
     denc(v.alloc_replay_from, p);
     denc(v.segment_nonce, p);
     denc(v.type, p);
+    denc(v.category, p);
+    denc(v.generation, p);
     DENC_FINISH(p);
   }
 };
index 0a7d316235a931b397bf3bddf2e91992348974ae..881794e681d16219e9b4c34e2e698f2fcce9d82e 100644 (file)
@@ -28,16 +28,14 @@ TransactionManager::TransactionManager(
   CacheRef _cache,
   LBAManagerRef _lba_manager,
   ExtentPlacementManagerRef &&epm,
-  BackrefManagerRef&& backref_manager,
-  tm_make_config_t config)
+  BackrefManagerRef&& backref_manager)
   : async_cleaner(std::move(_async_cleaner)),
     cache(std::move(_cache)),
     lba_manager(std::move(_lba_manager)),
     journal(std::move(_journal)),
     epm(std::move(epm)),
     backref_manager(std::move(backref_manager)),
-    sm_group(*async_cleaner->get_segment_manager_group()),
-    config(config)
+    sm_group(*async_cleaner->get_segment_manager_group())
 {
   async_cleaner->set_extent_callback(this);
   journal->set_write_pipeline(&write_pipeline);
@@ -473,7 +471,8 @@ TransactionManager::rewrite_logical_extent(
     t,
     lextent->get_type(),
     lextent->get_length(),
-    placement_hint_t::REWRITE)->cast<LogicalCachedExtent>();
+    lextent->get_user_hint(),
+    lextent->get_reclaim_generation())->cast<LogicalCachedExtent>();
   lextent->get_bptr().copy_out(
     0,
     lextent->get_length(),
@@ -497,7 +496,8 @@ TransactionManager::rewrite_logical_extent(
 
 TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent(
   Transaction &t,
-  CachedExtentRef extent)
+  CachedExtentRef extent,
+  reclaim_gen_t target_generation)
 {
   LOG_PREFIX(TransactionManager::rewrite_extent);
 
@@ -511,6 +511,13 @@ TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent(
     ceph_assert(!extent->is_pending_io());
   }
 
+  assert(extent->is_valid() && !extent->is_initial_pending());
+  if (extent->is_dirty()) {
+    extent->set_reclaim_generation(DIRTY_GENERATION);
+  } else {
+    extent->set_reclaim_generation(target_generation);
+  }
+
   t.get_rewrite_version_stats().increment(extent->get_version());
 
   if (is_backref_node(extent->get_type())) {
@@ -640,7 +647,7 @@ TransactionManager::~TransactionManager() {}
 TransactionManagerRef make_transaction_manager(tm_make_config_t config)
 {
   LOG_PREFIX(make_transaction_manager);
-  auto epm = std::make_unique<ExtentPlacementManager>();
+  auto epm = std::make_unique<ExtentPlacementManager>(config.epm_prefer_ool);
   auto cache = std::make_unique<Cache>(*epm);
   auto lba_manager = lba_manager::create_lba_manager(*cache);
   auto sms = std::make_unique<SegmentManagerGroup>();
@@ -681,8 +688,7 @@ TransactionManagerRef make_transaction_manager(tm_make_config_t config)
     std::move(cache),
     std::move(lba_manager),
     std::move(epm),
-    std::move(backref_manager),
-    config);
+    std::move(backref_manager));
 }
 
 }
index 8661297cf4bbbcdf890d14bbf2a75de147b45d82..81f7bb1450f304b92a9b3ef1294ff465f43d99e9 100644 (file)
@@ -35,15 +35,16 @@ namespace crimson::os::seastore {
 class Journal;
 
 struct tm_make_config_t {
-  bool is_test = true;
-  journal_type_t j_type = journal_type_t::SEGMENT_JOURNAL;
-  placement_hint_t default_placement_hint = placement_hint_t::HOT;
+  bool is_test;
+  journal_type_t j_type;
+  bool epm_prefer_ool;
+  reclaim_gen_t default_generation;
 
   static tm_make_config_t get_default() {
     return tm_make_config_t {
       false,
       journal_type_t::SEGMENT_JOURNAL,
-      placement_hint_t::HOT
+      false
     };
   }
   static tm_make_config_t get_test_segmented_journal() {
@@ -52,7 +53,7 @@ struct tm_make_config_t {
     return tm_make_config_t {
       true,
       journal_type_t::SEGMENT_JOURNAL,
-      placement_hint_t::HOT
+      false
     };
   }
   static tm_make_config_t get_test_cb_journal() {
@@ -61,7 +62,7 @@ struct tm_make_config_t {
     return tm_make_config_t {
       true,
       journal_type_t::CIRCULARBOUNDED_JOURNAL,
-      placement_hint_t::REWRITE
+      true
     };
   }
 
@@ -71,9 +72,9 @@ private:
   tm_make_config_t(
     bool is_test,
     journal_type_t j_type,
-    placement_hint_t default_placement_hint)
+    bool epm_prefer_ool)
     : is_test(is_test), j_type(j_type),
-      default_placement_hint(default_placement_hint)
+      epm_prefer_ool(epm_prefer_ool)
   {}
 };
 
@@ -114,8 +115,7 @@ public:
     CacheRef cache,
     LBAManagerRef lba_manager,
     ExtentPlacementManagerRef &&epm,
-    BackrefManagerRef&& backref_manager,
-    tm_make_config_t config = tm_make_config_t::get_default());
+    BackrefManagerRef&& backref_manager);
 
   /// Writes initial metadata to disk
   using mkfs_ertr = base_ertr;
@@ -338,14 +338,8 @@ public:
   alloc_extent_ret<T> alloc_extent(
     Transaction &t,
     laddr_t laddr_hint,
-    extent_len_t len) {
-    placement_hint_t placement_hint;
-    if constexpr (T::TYPE == extent_types_t::OBJECT_DATA_BLOCK ||
-                  T::TYPE == extent_types_t::COLL_BLOCK) {
-      placement_hint = placement_hint_t::COLD;
-    } else {
-      placement_hint = config.default_placement_hint;
-    }
+    extent_len_t len,
+    placement_hint_t placement_hint = placement_hint_t::HOT) {
     LOG_PREFIX(TransactionManager::alloc_extent);
     SUBTRACET(seastore_tm, "{} len={}, placement_hint={}, laddr_hint={}",
               t, T::TYPE, len, placement_hint, laddr_hint);
@@ -353,7 +347,8 @@ public:
     auto ext = cache->alloc_new_extent<T>(
       t,
       len,
-      placement_hint);
+      placement_hint,
+      0);
     return lba_manager->alloc_extent(
       t,
       laddr_hint,
@@ -447,7 +442,8 @@ public:
   using AsyncCleaner::ExtentCallbackInterface::rewrite_extent_ret;
   rewrite_extent_ret rewrite_extent(
     Transaction &t,
-    CachedExtentRef extent) final;
+    CachedExtentRef extent,
+    reclaim_gen_t target_generation) final;
 
   using AsyncCleaner::ExtentCallbackInterface::get_extent_if_live_ret;
   get_extent_if_live_ret get_extent_if_live(
@@ -608,10 +604,10 @@ private:
 
   WritePipeline write_pipeline;
 
-  tm_make_config_t config;
   rewrite_extent_ret rewrite_logical_extent(
     Transaction& t,
     LogicalCachedExtentRef extent);
+
 public:
   // Testing interfaces
   auto get_async_cleaner() {
index 49821be3b4ae9c2f29914d02c857627dbd4c7063..8b412d3debaa7b0f33f62545b388ea9b7dfe26fa 100644 (file)
@@ -60,7 +60,9 @@ struct btree_test_base :
 
   segment_id_t allocate_segment(
     segment_seq_t seq,
-    segment_type_t type
+    segment_type_t type,
+    data_category_t,
+    reclaim_gen_t
   ) final {
     auto ret = next;
     next = segment_id_t{
@@ -111,7 +113,7 @@ struct btree_test_base :
     }).safe_then([this] {
       sms.reset(new SegmentManagerGroup());
       journal = journal::make_segmented(*this);
-      epm.reset(new ExtentPlacementManager());
+      epm.reset(new ExtentPlacementManager(false));
       cache.reset(new Cache(*epm));
 
       block_size = segment_manager->get_block_size();
@@ -368,7 +370,11 @@ struct btree_lba_manager_test : btree_test_base {
       test_lba_mappings
     };
     if (create_fake_extent) {
-      cache->alloc_new_extent<TestBlockPhysical>(*t.t, TestBlockPhysical::SIZE);
+      cache->alloc_new_extent<TestBlockPhysical>(
+          *t.t,
+          TestBlockPhysical::SIZE,
+          placement_hint_t::HOT,
+          0);
     };
     return t;
   }
index 129fb7ae058d7daf64b87d940c543cde2b3a2118..b4dc8b4aed7df4730704daf1b096551619957bb8 100644 (file)
@@ -135,7 +135,7 @@ struct cbjournal_test_t : public seastar_test_suite_t
 
   cbjournal_test_t() :
       segment_manager(segment_manager::create_test_ephemeral()),
-      epm(new ExtentPlacementManager()),
+      epm(new ExtentPlacementManager(true)),
       cache(*epm)
   {
     device = new nvme_device::TestMemory(CBTEST_DEFAULT_TEST_SIZE);
index e778c5c62efb3a215fad8a060178cb89ea46615b..d89b7168675059e13c0ddbf50e49738331d8f8a3 100644 (file)
@@ -88,7 +88,7 @@ struct cache_test_t : public seastar_test_suite_t {
       return segment_manager->mkfs(
         segment_manager::get_ephemeral_device_config(0, 1));
     }).safe_then([this] {
-      epm.reset(new ExtentPlacementManager());
+      epm.reset(new ExtentPlacementManager(false));
       cache.reset(new Cache(*epm));
       current = paddr_t::make_seg_paddr(segment_id_t(segment_manager->get_device_id(), 0), 0);
       epm->add_device(segment_manager.get(), true);
@@ -131,7 +131,9 @@ TEST_F(cache_test_t, test_addr_fixup)
       auto t = get_transaction();
       auto extent = cache->alloc_new_extent<TestBlockPhysical>(
        *t,
-       TestBlockPhysical::SIZE);
+       TestBlockPhysical::SIZE,
+       placement_hint_t::HOT,
+       0);
       extent->set_contents('c');
       csum = extent->get_crc32c();
       submit_transaction(std::move(t)).get0();
@@ -160,7 +162,9 @@ TEST_F(cache_test_t, test_dirty_extent)
       auto t = get_transaction();
       auto extent = cache->alloc_new_extent<TestBlockPhysical>(
        *t,
-       TestBlockPhysical::SIZE);
+       TestBlockPhysical::SIZE,
+       placement_hint_t::HOT,
+       0);
       extent->set_contents('c');
       csum = extent->get_crc32c();
       auto reladdr = extent->get_paddr();
index a67b0aa70d43df8de4eb6d31f3ff6fc0c330d558..32bfc965d53482b9d7051cfcfa96ab2114ec51e6 100644 (file)
@@ -109,7 +109,9 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
 
   segment_id_t allocate_segment(
     segment_seq_t seq,
-    segment_type_t type
+    segment_type_t type,
+    data_category_t,
+    reclaim_gen_t
   ) final {
     auto ret = next;
     next = segment_id_t{