From 45c53a26c662b2c23efdecaaa186b6ac8811c2ea Mon Sep 17 00:00:00 2001 From: Zhang Song Date: Wed, 11 Jan 2023 18:17:41 +0800 Subject: [PATCH] crimson/os/seastore/EPM/BackgroundProcess: introduce more eviction policy Signed-off-by: Zhang Song --- src/common/options/crimson.yaml.in | 15 ++ src/crimson/os/seastore/async_cleaner.cc | 1 + src/crimson/os/seastore/async_cleaner.h | 11 ++ .../os/seastore/extent_placement_manager.cc | 2 +- .../os/seastore/extent_placement_manager.h | 159 +++++++++++++++++- 5 files changed, 184 insertions(+), 4 deletions(-) diff --git a/src/common/options/crimson.yaml.in b/src/common/options/crimson.yaml.in index f6f771f385ea2..1007998fade97 100644 --- a/src/common/options/crimson.yaml.in +++ b/src/common/options/crimson.yaml.in @@ -102,3 +102,18 @@ options: level: dev desc: Total size to use for CircularBoundedJournal if created, it is valid only if seastore_main_device_type is RANDOM_BLOCK default: 5_G +- name: seastore_multiple_tiers_stop_evict_ratio + type: float + level: advanced + desc: When the used ratio of main tier is less than this value, then stop evict cold data to the cold tier. + default: 0.5 +- name: seastore_multiple_tiers_default_evict_ratio + type: float + level: advanced + desc: Begin evicting cold data to the cold tier when the used ratio of the main tier reaches this value. + default: 0.6 +- name: seastore_multiple_tiers_fast_evict_ratio + type: float + level: advanced + desc: Begin fast eviction when the used ratio of the main tier reaches this value. + default: 0.7 diff --git a/src/crimson/os/seastore/async_cleaner.cc b/src/crimson/os/seastore/async_cleaner.cc index 3dac3d9f9fd93..508f0dda8bbf9 100644 --- a/src/crimson/os/seastore/async_cleaner.cc +++ b/src/crimson/os/seastore/async_cleaner.cc @@ -1149,6 +1149,7 @@ SegmentCleaner::clean_space_ret SegmentCleaner::clean_space() { LOG_PREFIX(SegmentCleaner::clean_space); assert(background_callback->is_ready()); + ceph_assert(can_clean_space()); if (!reclaim_state) { segment_id_t seg_id = get_next_reclaim_segment(); auto &segment_info = segments[seg_id]; diff --git a/src/crimson/os/seastore/async_cleaner.h b/src/crimson/os/seastore/async_cleaner.h index 3bc9338893824..cc7dfb0a1243b 100644 --- a/src/crimson/os/seastore/async_cleaner.h +++ b/src/crimson/os/seastore/async_cleaner.h @@ -1144,6 +1144,8 @@ public: virtual bool should_block_io_on_clean() const = 0; + virtual bool can_clean_space() const = 0; + virtual bool should_clean_space() const = 0; using clean_space_ertr = base_ertr; @@ -1317,6 +1319,11 @@ public: return aratio < config.available_ratio_hard_limit; } + bool can_clean_space() const final { + assert(background_callback->is_ready()); + return get_segments_reclaimable() > 0; + } + bool should_clean_space() const final { assert(background_callback->is_ready()); if (get_segments_reclaimable() == 0) { @@ -1649,6 +1656,10 @@ public: return false; } + bool can_clean_space() const final { + return false; + } + bool should_clean_space() const final { return false; } diff --git a/src/crimson/os/seastore/extent_placement_manager.cc b/src/crimson/os/seastore/extent_placement_manager.cc index 4ad3074c1d291..b7aabefc64414 100644 --- a/src/crimson/os/seastore/extent_placement_manager.cc +++ b/src/crimson/os/seastore/extent_placement_manager.cc @@ -680,7 +680,7 @@ ExtentPlacementManager::BackgroundProcess::do_background_cycle() }); } else { bool should_clean_main = - main_cleaner->should_clean_space() || + main_cleaner_should_run() || // make sure cleaner will start // when the trimmer should run but // failed to reserve space. diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index bb9749f04eec3..f94b52bf90bf4 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -261,10 +261,12 @@ public: if (gen == INLINE_GENERATION) { addr = make_record_relative_paddr(0); } else if (category == data_category_t::DATA) { + gen = background_process.adjust_generation(gen); assert(data_writers_by_gen[generation_to_writer(gen)]); addr = data_writers_by_gen[ generation_to_writer(gen)]->alloc_paddr(length); } else { + gen = background_process.adjust_generation(gen); assert(category == data_category_t::METADATA); assert(md_writers_by_gen[generation_to_writer(gen)]); addr = md_writers_by_gen[ @@ -484,6 +486,14 @@ private: for (auto id : cold_cleaner->get_device_ids()) { cleaners_by_device_id[id] = cold_cleaner.get(); } + + eviction_state.init( + crimson::common::get_conf( + "seastore_multiple_tiers_stop_evict_ratio"), + crimson::common::get_conf( + "seastore_multiple_tiers_default_evict_ratio"), + crimson::common::get_conf( + "seastore_multiple_tiers_fast_evict_ratio")); } } @@ -585,6 +595,14 @@ private: } } + rewrite_gen_t adjust_generation(rewrite_gen_t gen) { + if (has_cold_tier()) { + return eviction_state.adjust_generation_with_eviction(gen); + } else { + return gen; + } + } + seastar::future<> reserve_projected_usage(io_usage_t usage); void release_projected_usage(const io_usage_t &usage) { @@ -673,13 +691,30 @@ private: } } - bool background_should_run() const { + // background_should_run() should be atomic with do_background_cycle() + // to make sure the condition is consistent. + bool background_should_run() { assert(is_ready()); - return main_cleaner->should_clean_space() - || (has_cold_tier() && cold_cleaner->should_clean_space()) + maybe_update_eviction_mode(); + return main_cleaner_should_run() + || cold_cleaner_should_run() || trimmer->should_trim(); } + bool main_cleaner_should_run() const { + assert(is_ready()); + return main_cleaner->should_clean_space() || + (has_cold_tier() && + main_cleaner->can_clean_space() && + eviction_state.is_fast_mode()); + } + + bool cold_cleaner_should_run() const { + assert(is_ready()); + return has_cold_tier() && + cold_cleaner->should_clean_space(); + } + bool should_block_io() const { assert(is_ready()); return trimmer->should_block_io_on_trim() || @@ -688,6 +723,123 @@ private: cold_cleaner->should_block_io_on_clean()); } + void maybe_update_eviction_mode() { + if (has_cold_tier()) { + auto main_alive_ratio = main_cleaner->get_stat().get_used_raw_ratio(); + eviction_state.maybe_update_eviction_mode(main_alive_ratio); + } + } + + struct eviction_state_t { + enum class eviction_mode_t { + STOP, // generation greater than or equal to MIN_COLD_GENERATION + // will be set to MIN_COLD_GENERATION - 1, which means + // no extents will be evicted. + DEFAULT, // generation incremented with each rewrite. Extents will + // be evicted when generation reaches MIN_COLD_GENERATION. + FAST, // map all generations located in + // [MIN_REWRITE_GENERATION, MIN_COLD_GENERATIOIN) to + // MIN_COLD_GENERATION. + }; + + eviction_mode_t eviction_mode; + double stop_evict_ratio; + double default_evict_ratio; + double fast_evict_ratio; + + void init(double stop_ratio, + double default_ratio, + double fast_ratio) { + ceph_assert(0 <= stop_ratio); + ceph_assert(stop_ratio < default_ratio); + ceph_assert(default_ratio < fast_ratio); + ceph_assert(fast_ratio <= 1); + eviction_mode = eviction_mode_t::STOP; + stop_evict_ratio = stop_ratio; + default_evict_ratio = default_ratio; + fast_evict_ratio = fast_ratio; + } + + bool is_stop_mode() const { + return eviction_mode == eviction_mode_t::STOP; + } + + bool is_default_mode() const { + return eviction_mode == eviction_mode_t::DEFAULT; + } + + bool is_fast_mode() const { + return eviction_mode == eviction_mode_t::FAST; + } + + rewrite_gen_t adjust_generation_with_eviction(rewrite_gen_t gen) { + rewrite_gen_t ret = gen; + switch(eviction_mode) { + case eviction_mode_t::STOP: + if (gen == MIN_COLD_GENERATION) { + ret = MIN_COLD_GENERATION - 1; + } + break; + case eviction_mode_t::DEFAULT: + break; + case eviction_mode_t::FAST: + if (gen >= MIN_REWRITE_GENERATION && gen < MIN_COLD_GENERATION) { + ret = MIN_COLD_GENERATION; + } + break; + default: + ceph_abort("impossible"); + } + return ret; + } + + // We change the state of eviction_mode according to the alive ratio + // of the main cleaner. + // + // Use A, B, C, D to represent the state of alive ratio: + // A: alive ratio <= stop_evict_ratio + // B: alive ratio <= default_evict_ratio + // C: alive ratio <= fast_evict_ratio + // D: alive ratio > fast_evict_ratio + // + // and use X, Y, Z to shorten the state of eviction_mode_t: + // X: STOP + // Y: DEFAULT + // Z: FAST + // + // Then we can use a form like (A && X) to describe the current state + // of the main cleaner, which indicates the alive ratio is less than or + // equal to stop_evict_ratio and current eviction mode is STOP. + // + // all valid state transitions show as follow: + // (A && X) => (B && X) => (C && Y) => (D && Z) => + // (C && Z) => (B && Y) => (A && X) + // `--> (C && Y) => ... + // + // when the system restarts, the init state is (_ && X), the + // transitions should be: + // (_ && X) -> (A && X) => normal transition + // -> (B && X) => normal transition + // -> (C && X) => (C && Y) => normal transition + // -> (D && X) => (D && Z) => normal transition + void maybe_update_eviction_mode(double main_alive_ratio) { + if (main_alive_ratio <= stop_evict_ratio) { + eviction_mode = eviction_mode_t::STOP; + } else if (main_alive_ratio <= default_evict_ratio) { + if (eviction_mode > eviction_mode_t::DEFAULT) { + eviction_mode = eviction_mode_t::DEFAULT; + } + } else if (main_alive_ratio <= fast_evict_ratio) { + if (eviction_mode < eviction_mode_t::DEFAULT) { + eviction_mode = eviction_mode_t::DEFAULT; + } + } else { + assert(main_alive_ratio > fast_evict_ratio); + eviction_mode = eviction_mode_t::FAST; + } + } + }; + seastar::future<> do_background_cycle(); void register_metrics(); @@ -716,6 +868,7 @@ private: std::optional> blocking_io; bool is_running_until_halt = false; state_t state = state_t::STOP; + eviction_state_t eviction_state; }; std::vector writer_refs; -- 2.39.5