]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/EPM/BackgroundProcess: introduce more eviction policy
authorZhang Song <zhangsong325@gmail.com>
Wed, 11 Jan 2023 10:17:41 +0000 (18:17 +0800)
committerMatan Breizman <mbreizma@redhat.com>
Tue, 23 May 2023 08:09:20 +0000 (08:09 +0000)
Signed-off-by: Zhang Song <zhangsong325@gmail.com>
(cherry picked from commit 45c53a26c662b2c23efdecaaa186b6ac8811c2ea)

src/common/options/crimson.yaml.in
src/crimson/os/seastore/async_cleaner.cc
src/crimson/os/seastore/async_cleaner.h
src/crimson/os/seastore/extent_placement_manager.cc
src/crimson/os/seastore/extent_placement_manager.h

index f6f771f385ea276055badafc444e7a7dada26590..1007998fade9745f5f558104e9049476e12633cc 100644 (file)
@@ -102,3 +102,18 @@ options:
   level: dev
   desc: Total size to use for CircularBoundedJournal if created, it is valid only if seastore_main_device_type is RANDOM_BLOCK
   default: 5_G
+- name: seastore_multiple_tiers_stop_evict_ratio
+  type: float
+  level: advanced
+  desc: When the used ratio of main tier is less than this value, then stop evict cold data to the cold tier.
+  default: 0.5
+- name: seastore_multiple_tiers_default_evict_ratio
+  type: float
+  level: advanced
+  desc: Begin evicting cold data to the cold tier when the used ratio of the main tier reaches this value.
+  default: 0.6
+- name: seastore_multiple_tiers_fast_evict_ratio
+  type: float
+  level: advanced
+  desc: Begin fast eviction when the used ratio of the main tier reaches this value.
+  default: 0.7
index 1cf89ca1e13ccad0c2b51ef195fb48ef412fa61b..4bac744e4a0672bf27a6279a2a0b34aad4014eed 100644 (file)
@@ -1149,6 +1149,7 @@ SegmentCleaner::clean_space_ret SegmentCleaner::clean_space()
 {
   LOG_PREFIX(SegmentCleaner::clean_space);
   assert(background_callback->is_ready());
+  ceph_assert(can_clean_space());
   if (!reclaim_state) {
     segment_id_t seg_id = get_next_reclaim_segment();
     auto &segment_info = segments[seg_id];
index 1a7a30be610fb3c35f9d2eed89d952a2abba28d8..fb8e03bb4bcf8ae1e2c75005ebfd351b5ab7a290 100644 (file)
@@ -1144,6 +1144,8 @@ public:
 
   virtual bool should_block_io_on_clean() const = 0;
 
+  virtual bool can_clean_space() const = 0;
+
   virtual bool should_clean_space() const = 0;
 
   using clean_space_ertr = base_ertr;
@@ -1317,6 +1319,11 @@ public:
     return aratio < config.available_ratio_hard_limit;
   }
 
+  bool can_clean_space() const final {
+    assert(background_callback->is_ready());
+    return get_segments_reclaimable() > 0;
+  }
+
   bool should_clean_space() const final {
     assert(background_callback->is_ready());
     if (get_segments_reclaimable() == 0) {
@@ -1652,6 +1659,10 @@ public:
     return false;
   }
 
+  bool can_clean_space() const final {
+    return false;
+  }
+
   bool should_clean_space() const final {
     return false;
   }
index 4ad3074c1d2916325d20517c2982c2f8a8d11001..b7aabefc644148c9fc0e4e7d0e3d458f55067683 100644 (file)
@@ -680,7 +680,7 @@ ExtentPlacementManager::BackgroundProcess::do_background_cycle()
     });
   } else {
     bool should_clean_main =
-      main_cleaner->should_clean_space() ||
+      main_cleaner_should_run() ||
       // make sure cleaner will start
       // when the trimmer should run but
       // failed to reserve space.
index bb9749f04eec3c43bdbcacd7b87d362fd68937b8..f94b52bf90bf4ab2bf37e56ec5ede10240332224 100644 (file)
@@ -261,10 +261,12 @@ public:
       if (gen == INLINE_GENERATION) {
        addr = make_record_relative_paddr(0);
       } else if (category == data_category_t::DATA) {
+        gen = background_process.adjust_generation(gen);
        assert(data_writers_by_gen[generation_to_writer(gen)]);
        addr = data_writers_by_gen[
          generation_to_writer(gen)]->alloc_paddr(length);
       } else {
+        gen = background_process.adjust_generation(gen);
        assert(category == data_category_t::METADATA);
        assert(md_writers_by_gen[generation_to_writer(gen)]);
        addr = md_writers_by_gen[
@@ -484,6 +486,14 @@ private:
         for (auto id : cold_cleaner->get_device_ids()) {
           cleaners_by_device_id[id] = cold_cleaner.get();
         }
+
+        eviction_state.init(
+          crimson::common::get_conf<double>(
+            "seastore_multiple_tiers_stop_evict_ratio"),
+          crimson::common::get_conf<double>(
+            "seastore_multiple_tiers_default_evict_ratio"),
+          crimson::common::get_conf<double>(
+            "seastore_multiple_tiers_fast_evict_ratio"));
       }
     }
 
@@ -585,6 +595,14 @@ private:
       }
     }
 
+    rewrite_gen_t adjust_generation(rewrite_gen_t gen) {
+      if (has_cold_tier()) {
+        return eviction_state.adjust_generation_with_eviction(gen);
+      } else {
+        return gen;
+      }
+    }
+
     seastar::future<> reserve_projected_usage(io_usage_t usage);
 
     void release_projected_usage(const io_usage_t &usage) {
@@ -673,13 +691,30 @@ private:
       }
     }
 
-    bool background_should_run() const {
+    // background_should_run() should be atomic with do_background_cycle()
+    // to make sure the condition is consistent.
+    bool background_should_run() {
       assert(is_ready());
-      return main_cleaner->should_clean_space()
-        || (has_cold_tier() && cold_cleaner->should_clean_space())
+      maybe_update_eviction_mode();
+      return main_cleaner_should_run()
+        || cold_cleaner_should_run()
         || trimmer->should_trim();
     }
 
+    bool main_cleaner_should_run() const {
+      assert(is_ready());
+      return main_cleaner->should_clean_space() ||
+        (has_cold_tier() &&
+         main_cleaner->can_clean_space() &&
+         eviction_state.is_fast_mode());
+    }
+
+    bool cold_cleaner_should_run() const {
+      assert(is_ready());
+      return has_cold_tier() &&
+        cold_cleaner->should_clean_space();
+    }
+
     bool should_block_io() const {
       assert(is_ready());
       return trimmer->should_block_io_on_trim() ||
@@ -688,6 +723,123 @@ private:
               cold_cleaner->should_block_io_on_clean());
     }
 
+    void maybe_update_eviction_mode() {
+      if (has_cold_tier()) {
+        auto main_alive_ratio = main_cleaner->get_stat().get_used_raw_ratio();
+        eviction_state.maybe_update_eviction_mode(main_alive_ratio);
+      }
+    }
+
+    struct eviction_state_t {
+      enum class eviction_mode_t {
+        STOP,     // generation greater than or equal to MIN_COLD_GENERATION
+                  // will be set to MIN_COLD_GENERATION - 1, which means
+                  // no extents will be evicted.
+        DEFAULT,  // generation incremented with each rewrite. Extents will
+                  // be evicted when generation reaches MIN_COLD_GENERATION.
+        FAST,     // map all generations located in
+                  // [MIN_REWRITE_GENERATION, MIN_COLD_GENERATIOIN) to
+                  // MIN_COLD_GENERATION.
+      };
+
+      eviction_mode_t eviction_mode;
+      double stop_evict_ratio;
+      double default_evict_ratio;
+      double fast_evict_ratio;
+
+      void init(double stop_ratio,
+                double default_ratio,
+                double fast_ratio) {
+        ceph_assert(0 <= stop_ratio);
+        ceph_assert(stop_ratio < default_ratio);
+        ceph_assert(default_ratio < fast_ratio);
+        ceph_assert(fast_ratio <= 1);
+        eviction_mode = eviction_mode_t::STOP;
+        stop_evict_ratio = stop_ratio;
+        default_evict_ratio = default_ratio;
+        fast_evict_ratio = fast_ratio;
+      }
+
+      bool is_stop_mode() const {
+        return eviction_mode == eviction_mode_t::STOP;
+      }
+
+      bool is_default_mode() const {
+        return eviction_mode == eviction_mode_t::DEFAULT;
+      }
+
+      bool is_fast_mode() const {
+        return eviction_mode == eviction_mode_t::FAST;
+      }
+
+      rewrite_gen_t adjust_generation_with_eviction(rewrite_gen_t gen) {
+        rewrite_gen_t ret = gen;
+        switch(eviction_mode) {
+        case eviction_mode_t::STOP:
+          if (gen == MIN_COLD_GENERATION) {
+            ret = MIN_COLD_GENERATION - 1;
+          }
+          break;
+        case eviction_mode_t::DEFAULT:
+          break;
+        case eviction_mode_t::FAST:
+          if (gen >= MIN_REWRITE_GENERATION && gen < MIN_COLD_GENERATION) {
+            ret = MIN_COLD_GENERATION;
+          }
+          break;
+        default:
+          ceph_abort("impossible");
+        }
+        return ret;
+      }
+
+      // We change the state of eviction_mode according to the alive ratio
+      // of the main cleaner.
+      //
+      // Use A, B, C, D to represent the state of alive ratio:
+      //   A: alive ratio <= stop_evict_ratio
+      //   B: alive ratio <= default_evict_ratio
+      //   C: alive ratio <= fast_evict_ratio
+      //   D: alive ratio >  fast_evict_ratio
+      //
+      // and use X, Y, Z to shorten the state of eviction_mode_t:
+      //   X: STOP
+      //   Y: DEFAULT
+      //   Z: FAST
+      //
+      // Then we can use a form like (A && X) to describe the current state
+      // of the main cleaner, which indicates the alive ratio is less than or
+      // equal to stop_evict_ratio and current eviction mode is STOP.
+      //
+      // all valid state transitions show as follow:
+      //   (A && X) => (B && X) => (C && Y) => (D && Z) =>
+      //   (C && Z) => (B && Y) => (A && X)
+      //                      `--> (C && Y) => ...
+      //
+      // when the system restarts, the init state is (_ && X), the
+      // transitions should be:
+      // (_ && X) -> (A && X) => normal transition
+      //          -> (B && X) => normal transition
+      //          -> (C && X) => (C && Y) => normal transition
+      //          -> (D && X) => (D && Z) => normal transition
+      void maybe_update_eviction_mode(double main_alive_ratio) {
+        if (main_alive_ratio <= stop_evict_ratio) {
+          eviction_mode = eviction_mode_t::STOP;
+        } else if (main_alive_ratio <= default_evict_ratio) {
+          if (eviction_mode > eviction_mode_t::DEFAULT) {
+            eviction_mode = eviction_mode_t::DEFAULT;
+          }
+        } else if (main_alive_ratio <= fast_evict_ratio) {
+          if (eviction_mode < eviction_mode_t::DEFAULT) {
+            eviction_mode = eviction_mode_t::DEFAULT;
+          }
+        } else {
+          assert(main_alive_ratio > fast_evict_ratio);
+          eviction_mode = eviction_mode_t::FAST;
+        }
+      }
+    };
+
     seastar::future<> do_background_cycle();
 
     void register_metrics();
@@ -716,6 +868,7 @@ private:
     std::optional<seastar::promise<>> blocking_io;
     bool is_running_until_halt = false;
     state_t state = state_t::STOP;
+    eviction_state_t eviction_state;
   };
 
   std::vector<ExtentOolWriterRef> writer_refs;