]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/EPM/BackgroundProcess: add cold_cleaner
authorZhang Song <zhangsong325@gmail.com>
Tue, 10 Jan 2023 08:58:16 +0000 (16:58 +0800)
committerZhang Song <zhangsong325@gmail.com>
Wed, 15 Mar 2023 01:21:08 +0000 (09:21 +0800)
Signed-off-by: Zhang Song <zhangsong325@gmail.com>
src/crimson/os/seastore/extent_placement_manager.cc
src/crimson/os/seastore/extent_placement_manager.h
src/crimson/os/seastore/seastore_types.h

index 97a0d02a7ead42a0d3063a7e56902024e254ca6a..027f20d995fcbf911c2674d8ee511e3b856a357f 100644 (file)
@@ -178,16 +178,24 @@ SegmentedOolWriter::alloc_write_ool_extents(
 }
 
 void ExtentPlacementManager::init(
-    JournalTrimmerImplRef &&trimmer, AsyncCleanerRef &&cleaner)
+    JournalTrimmerImplRef &&trimmer,
+    AsyncCleanerRef &&cleaner,
+    AsyncCleanerRef &&cold_cleaner)
 {
   writer_refs.clear();
+  auto cold_segment_cleaner = dynamic_cast<SegmentCleaner*>(cold_cleaner.get());
+  dynamic_max_rewrite_generation = MIN_COLD_GENERATION - 1;
+  if (cold_segment_cleaner) {
+    dynamic_max_rewrite_generation = MAX_REWRITE_GENERATION;
+  }
 
   if (trimmer->get_journal_type() == journal_type_t::SEGMENTED) {
     auto segment_cleaner = dynamic_cast<SegmentCleaner*>(cleaner.get());
     ceph_assert(segment_cleaner != nullptr);
-    auto num_writers = generation_to_writer(REWRITE_GENERATIONS);
+    auto num_writers = generation_to_writer(dynamic_max_rewrite_generation + 1);
+
     data_writers_by_gen.resize(num_writers, {});
-    for (rewrite_gen_t gen = OOL_GENERATION; gen < REWRITE_GENERATIONS; ++gen) {
+    for (rewrite_gen_t gen = OOL_GENERATION; gen < MIN_COLD_GENERATION; ++gen) {
       writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(
            data_category_t::DATA, gen, *segment_cleaner,
             *ool_segment_seq_allocator));
@@ -195,7 +203,7 @@ void ExtentPlacementManager::init(
     }
 
     md_writers_by_gen.resize(num_writers, {});
-    for (rewrite_gen_t gen = OOL_GENERATION; gen < REWRITE_GENERATIONS; ++gen) {
+    for (rewrite_gen_t gen = OOL_GENERATION; gen < MIN_COLD_GENERATION; ++gen) {
       writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(
            data_category_t::METADATA, gen, *segment_cleaner,
             *ool_segment_seq_allocator));
@@ -210,7 +218,7 @@ void ExtentPlacementManager::init(
     assert(trimmer->get_journal_type() == journal_type_t::RANDOM_BLOCK);
     auto rb_cleaner = dynamic_cast<RBMCleaner*>(cleaner.get());
     ceph_assert(rb_cleaner != nullptr);
-    auto num_writers = generation_to_writer(REWRITE_GENERATIONS);
+    auto num_writers = generation_to_writer(dynamic_max_rewrite_generation + 1);
     data_writers_by_gen.resize(num_writers, {});
     md_writers_by_gen.resize(num_writers, {});
     writer_refs.emplace_back(std::make_unique<RandomBlockOolWriter>(
@@ -223,7 +231,34 @@ void ExtentPlacementManager::init(
     }
   }
 
-  background_process.init(std::move(trimmer), std::move(cleaner));
+  if (cold_segment_cleaner) {
+    for (rewrite_gen_t gen = MIN_COLD_GENERATION; gen < REWRITE_GENERATIONS; ++gen) {
+      writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(
+            data_category_t::DATA, gen, *cold_segment_cleaner,
+            *ool_segment_seq_allocator));
+      data_writers_by_gen[generation_to_writer(gen)] = writer_refs.back().get();
+    }
+    for (rewrite_gen_t gen = MIN_COLD_GENERATION; gen < REWRITE_GENERATIONS; ++gen) {
+      writer_refs.emplace_back(std::make_unique<SegmentedOolWriter>(
+            data_category_t::METADATA, gen, *cold_segment_cleaner,
+            *ool_segment_seq_allocator));
+      md_writers_by_gen[generation_to_writer(gen)] = writer_refs.back().get();
+    }
+    for (auto *device : cold_segment_cleaner->get_segment_manager_group()
+                                            ->get_segment_managers()) {
+      add_device(device);
+    }
+  }
+
+  background_process.init(std::move(trimmer),
+                          std::move(cleaner),
+                          std::move(cold_cleaner));
+  if (cold_segment_cleaner) {
+    ceph_assert(get_main_backend_type() == backend_type_t::SEGMENTED);
+    ceph_assert(background_process.has_cold_tier());
+  } else {
+    ceph_assert(!background_process.has_cold_tier());
+  }
 }
 
 void ExtentPlacementManager::set_primary_device(Device *device)
@@ -269,11 +304,17 @@ ExtentPlacementManager::dispatch_delayed_extents(Transaction &t)
   }
 
   for (auto &extent : res.delayed_extents) {
-    res.usage.cleaner_usage.main_usage += extent->get_length();
     if (dispatch_delayed_extent(extent)) {
       res.usage.inline_usage += extent->get_length();
+      res.usage.cleaner_usage.main_usage += extent->get_length();
       t.mark_delayed_extent_inline(extent);
     } else {
+      if (extent->get_rewrite_generation() < MIN_COLD_GENERATION) {
+        res.usage.cleaner_usage.main_usage += extent->get_length();
+      } else {
+        assert(background_process.has_cold_tier());
+        res.usage.cleaner_usage.cold_ool_usage += extent->get_length();
+      }
       t.mark_delayed_extent_ool(extent);
       auto writer_ptr = get_writer(
           extent->get_user_hint(),
@@ -350,6 +391,11 @@ void ExtentPlacementManager::BackgroundProcess::log_state(const char *caller) co
         caller,
         JournalTrimmerImpl::stat_printer_t{*trimmer, true},
         AsyncCleaner::stat_printer_t{*main_cleaner, true});
+  if (has_cold_tier()) {
+    DEBUG("caller {}, cold_cleaner: {}",
+          caller,
+          AsyncCleaner::stat_printer_t{*cold_cleaner, true});
+  }
 }
 
 void ExtentPlacementManager::BackgroundProcess::start_background()
@@ -358,6 +404,10 @@ void ExtentPlacementManager::BackgroundProcess::start_background()
   INFO("{}, {}",
        JournalTrimmerImpl::stat_printer_t{*trimmer, true},
        AsyncCleaner::stat_printer_t{*main_cleaner, true});
+  if (has_cold_tier()) {
+    INFO("cold_cleaner: {}",
+         AsyncCleaner::stat_printer_t{*cold_cleaner, true});
+  }
   ceph_assert(trimmer->check_is_ready());
   ceph_assert(state == state_t::SCAN_SPACE);
   assert(!is_running());
@@ -388,6 +438,10 @@ ExtentPlacementManager::BackgroundProcess::stop_background()
     INFO("done, {}, {}",
          JournalTrimmerImpl::stat_printer_t{*trimmer, true},
          AsyncCleaner::stat_printer_t{*main_cleaner, true});
+    if (has_cold_tier()) {
+      INFO("done, cold_cleaner: {}",
+           AsyncCleaner::stat_printer_t{*cold_cleaner, true});
+    }
     // run_until_halt() can be called at HALT
   });
 }
index 15eb592f57a9f95b5655f58575a45a0a67b350f8..ab5ffdf7495fcfc4e7729c1976bec99482e0828c 100644 (file)
@@ -132,7 +132,8 @@ struct cleaner_usage_t {
   // The size of all extents write to the main devices, including inline extents
   // and out-of-line extents.
   std::size_t main_usage = 0;
-  // TODO: add cold_ool_usage
+  // The size of extents write to the cold devices
+  std::size_t cold_ool_usage = 0;
 };
 
 struct reserve_cleaner_result_t {
@@ -157,6 +158,7 @@ struct io_usage_t {
     return out << "io_usage_t("
                << "inline_usage=" << usage.inline_usage
                << ", main_cleaner_usage=" << usage.cleaner_usage.main_usage
+               << ", cold_cleaner_usage=" << usage.cleaner_usage.cold_ool_usage
                << ")";
   }
 };
@@ -180,7 +182,7 @@ public:
     devices_by_id.resize(DEVICE_ID_MAX, nullptr);
   }
 
-  void init(JournalTrimmerImplRef &&, AsyncCleanerRef &&);
+  void init(JournalTrimmerImplRef &&, AsyncCleanerRef &&, AsyncCleanerRef &&);
 
   SegmentSeqAllocator &get_ool_segment_seq_allocator() const {
     return *ool_segment_seq_allocator;
@@ -275,7 +277,7 @@ public:
       return alloc_paddr(INLINE_GENERATION, data_category_t::METADATA, length);
     }
 
-    if (get_backend_type() == backend_type_t::SEGMENTED &&
+    if (get_main_backend_type() == backend_type_t::SEGMENTED &&
        is_lba_backref_node(type)) {
       // with SEGMENTED, lba-backref extents must be INLINE
       return alloc_paddr(INLINE_GENERATION, data_category_t::METADATA, length);
@@ -288,22 +290,22 @@ public:
 
     if (get_extent_category(type) == data_category_t::METADATA &&
         gen == INIT_GENERATION) {
-      if (get_backend_type() == backend_type_t::SEGMENTED) {
+      if (get_main_backend_type() == backend_type_t::SEGMENTED) {
        // with SEGMENTED, default not to ool metadata extents to reduce 
        // padding overhead.
        // TODO: improve padding so we can default to the ool path.
        return alloc_paddr(INLINE_GENERATION, get_extent_category(type), length);
       } else {
         // with RBM, all extents must be OOL
-       assert(get_backend_type() ==
+       assert(get_main_backend_type() ==
               backend_type_t::RANDOM_BLOCK);
        return alloc_paddr(OOL_GENERATION, get_extent_category(type), length);
       }
     } else {
       assert(get_extent_category(type) == data_category_t::DATA ||
              gen >= MIN_REWRITE_GENERATION);
-      if (gen > MAX_REWRITE_GENERATION) {
-        gen = MAX_REWRITE_GENERATION;
+      if (gen > dynamic_max_rewrite_generation) {
+        gen = dynamic_max_rewrite_generation;
       } else if (gen == INIT_GENERATION) {
         gen = OOL_GENERATION;
       }
@@ -391,9 +393,9 @@ public:
     background_process.release_projected_usage(usage);
   }
 
-  backend_type_t get_backend_type() const {
+  backend_type_t get_main_backend_type() const {
     if (!background_process.is_no_background()) {
-      return background_process.get_backend_type();
+      return background_process.get_main_backend_type();
     } 
     // for test
     assert(primary_device);
@@ -442,6 +444,7 @@ private:
     assert(hint < placement_hint_t::NUM_HINTS);
     assert(is_rewrite_generation(gen));
     assert(gen != INLINE_GENERATION);
+    assert(gen <= dynamic_max_rewrite_generation);
     if (category == data_category_t::DATA) {
       return data_writers_by_gen[generation_to_writer(gen)];
     } else {
@@ -462,24 +465,40 @@ private:
     BackgroundProcess() = default;
 
     void init(JournalTrimmerImplRef &&_trimmer,
-              AsyncCleanerRef &&_cleaner) {
+              AsyncCleanerRef &&_cleaner,
+              AsyncCleanerRef &&_cold_cleaner) {
       trimmer = std::move(_trimmer);
       trimmer->set_background_callback(this);
       main_cleaner = std::move(_cleaner);
       main_cleaner->set_background_callback(this);
+      if (_cold_cleaner) {
+        cold_cleaner = std::move(_cold_cleaner);
+        cold_cleaner->set_background_callback(this);
+      }
     }
 
     journal_type_t get_journal_type() const {
       return trimmer->get_journal_type();
     }
 
+    bool has_cold_tier() const {
+      return cold_cleaner.get() != nullptr;
+    }
+
     void set_extent_callback(ExtentCallbackInterface *cb) {
       trimmer->set_extent_callback(cb);
       main_cleaner->set_extent_callback(cb);
+      if (has_cold_tier()) {
+        cold_cleaner->set_extent_callback(cb);
+      }
     }
 
     store_statfs_t get_stat() const {
-      return main_cleaner->get_stat();
+      auto stat = main_cleaner->get_stat();
+      if (has_cold_tier()) {
+        stat.add(cold_cleaner->get_stat());
+      }
+      return stat;
     }
 
     using mount_ret = ExtentPlacementManager::mount_ret;
@@ -489,13 +508,18 @@ private:
       trimmer->reset();
       stats = {};
       register_metrics();
-      return main_cleaner->mount();
+      return main_cleaner->mount(
+      ).safe_then([this] {
+        return has_cold_tier() ? cold_cleaner->mount() : mount_ertr::now();
+      });
     }
 
     void start_scan_space() {
       ceph_assert(state == state_t::MOUNT);
       state = state_t::SCAN_SPACE;
       ceph_assert(main_cleaner->check_usage_is_empty());
+      ceph_assert(!has_cold_tier() ||
+                  cold_cleaner->check_usage_is_empty());
     }
 
     void start_background();
@@ -534,14 +558,15 @@ private:
     }
 
     seastar::future<> stop_background();
-    backend_type_t get_backend_type() const {
+    backend_type_t get_main_backend_type() const {
       return get_journal_type();
     }
 
     // Testing interfaces
 
     bool check_usage() {
-      return main_cleaner->check_usage();
+      return main_cleaner->check_usage() &&
+        (!has_cold_tier() || cold_cleaner->check_usage());
     }
 
     seastar::future<> run_until_halt();
@@ -635,6 +660,11 @@ private:
     JournalTrimmerImplRef trimmer;
     AsyncCleanerRef main_cleaner;
 
+    /*
+     * cold tier (optional, see has_cold_tier())
+     */
+    AsyncCleanerRef cold_cleaner;
+
     std::optional<seastar::future<>> process_join;
     std::optional<seastar::promise<>> blocking_background;
     std::optional<seastar::promise<>> blocking_io;
@@ -651,6 +681,7 @@ private:
   Device* primary_device = nullptr;
   std::size_t num_devices = 0;
 
+  rewrite_gen_t dynamic_max_rewrite_generation;
   BackgroundProcess background_process;
   // TODO: drop once paddr->journal_seq_t is introduced
   SegmentSeqAllocatorRef ool_segment_seq_allocator;
index 8d1bace0eeadd93c1e283c166bb56c9de74e7c14..be7656e6cfc0d0cdfccfc2887a52c9f471955d2c 100644 (file)
@@ -1149,14 +1149,9 @@ constexpr rewrite_gen_t OOL_GENERATION = 2;
 
 // All the rewritten extents start with MIN_REWRITE_GENERATION
 constexpr rewrite_gen_t MIN_REWRITE_GENERATION = 3;
-constexpr rewrite_gen_t MAX_REWRITE_GENERATION = 4;
-
-/**
- * TODO:
- * For tiering, might introduce 5 and 6 for the cold tier, and 1 ~ 4 for the
- * hot tier.
- */
-
+// without cold tier, the largest generation is less than MIN_COLD_GENERATION
+constexpr rewrite_gen_t MIN_COLD_GENERATION = 5;
+constexpr rewrite_gen_t MAX_REWRITE_GENERATION = 7;
 constexpr rewrite_gen_t REWRITE_GENERATIONS = MAX_REWRITE_GENERATION + 1;
 constexpr rewrite_gen_t NULL_GENERATION =
   std::numeric_limits<rewrite_gen_t>::max();