From 343f579748d9856a7f8fde49025db91cdce1367d Mon Sep 17 00:00:00 2001 From: Zhang Song Date: Tue, 10 Jan 2023 16:58:16 +0800 Subject: [PATCH] crimson/os/seastore/EPM/BackgroundProcess: add cold_cleaner Signed-off-by: Zhang Song --- .../os/seastore/extent_placement_manager.cc | 68 +++++++++++++++++-- .../os/seastore/extent_placement_manager.h | 59 ++++++++++++---- src/crimson/os/seastore/seastore_types.h | 11 +-- 3 files changed, 109 insertions(+), 29 deletions(-) diff --git a/src/crimson/os/seastore/extent_placement_manager.cc b/src/crimson/os/seastore/extent_placement_manager.cc index 97a0d02a7ea..027f20d995f 100644 --- a/src/crimson/os/seastore/extent_placement_manager.cc +++ b/src/crimson/os/seastore/extent_placement_manager.cc @@ -178,16 +178,24 @@ SegmentedOolWriter::alloc_write_ool_extents( } void ExtentPlacementManager::init( - JournalTrimmerImplRef &&trimmer, AsyncCleanerRef &&cleaner) + JournalTrimmerImplRef &&trimmer, + AsyncCleanerRef &&cleaner, + AsyncCleanerRef &&cold_cleaner) { writer_refs.clear(); + auto cold_segment_cleaner = dynamic_cast(cold_cleaner.get()); + dynamic_max_rewrite_generation = MIN_COLD_GENERATION - 1; + if (cold_segment_cleaner) { + dynamic_max_rewrite_generation = MAX_REWRITE_GENERATION; + } if (trimmer->get_journal_type() == journal_type_t::SEGMENTED) { auto segment_cleaner = dynamic_cast(cleaner.get()); ceph_assert(segment_cleaner != nullptr); - auto num_writers = generation_to_writer(REWRITE_GENERATIONS); + auto num_writers = generation_to_writer(dynamic_max_rewrite_generation + 1); + data_writers_by_gen.resize(num_writers, {}); - for (rewrite_gen_t gen = OOL_GENERATION; gen < REWRITE_GENERATIONS; ++gen) { + for (rewrite_gen_t gen = OOL_GENERATION; gen < MIN_COLD_GENERATION; ++gen) { writer_refs.emplace_back(std::make_unique( data_category_t::DATA, gen, *segment_cleaner, *ool_segment_seq_allocator)); @@ -195,7 +203,7 @@ void ExtentPlacementManager::init( } md_writers_by_gen.resize(num_writers, {}); - for (rewrite_gen_t gen = OOL_GENERATION; gen < REWRITE_GENERATIONS; ++gen) { + for (rewrite_gen_t gen = OOL_GENERATION; gen < MIN_COLD_GENERATION; ++gen) { writer_refs.emplace_back(std::make_unique( data_category_t::METADATA, gen, *segment_cleaner, *ool_segment_seq_allocator)); @@ -210,7 +218,7 @@ void ExtentPlacementManager::init( assert(trimmer->get_journal_type() == journal_type_t::RANDOM_BLOCK); auto rb_cleaner = dynamic_cast(cleaner.get()); ceph_assert(rb_cleaner != nullptr); - auto num_writers = generation_to_writer(REWRITE_GENERATIONS); + auto num_writers = generation_to_writer(dynamic_max_rewrite_generation + 1); data_writers_by_gen.resize(num_writers, {}); md_writers_by_gen.resize(num_writers, {}); writer_refs.emplace_back(std::make_unique( @@ -223,7 +231,34 @@ void ExtentPlacementManager::init( } } - background_process.init(std::move(trimmer), std::move(cleaner)); + if (cold_segment_cleaner) { + for (rewrite_gen_t gen = MIN_COLD_GENERATION; gen < REWRITE_GENERATIONS; ++gen) { + writer_refs.emplace_back(std::make_unique( + data_category_t::DATA, gen, *cold_segment_cleaner, + *ool_segment_seq_allocator)); + data_writers_by_gen[generation_to_writer(gen)] = writer_refs.back().get(); + } + for (rewrite_gen_t gen = MIN_COLD_GENERATION; gen < REWRITE_GENERATIONS; ++gen) { + writer_refs.emplace_back(std::make_unique( + data_category_t::METADATA, gen, *cold_segment_cleaner, + *ool_segment_seq_allocator)); + md_writers_by_gen[generation_to_writer(gen)] = writer_refs.back().get(); + } + for (auto *device : cold_segment_cleaner->get_segment_manager_group() + ->get_segment_managers()) { + add_device(device); + } + } + + background_process.init(std::move(trimmer), + std::move(cleaner), + std::move(cold_cleaner)); + if (cold_segment_cleaner) { + ceph_assert(get_main_backend_type() == backend_type_t::SEGMENTED); + ceph_assert(background_process.has_cold_tier()); + } else { + ceph_assert(!background_process.has_cold_tier()); + } } void ExtentPlacementManager::set_primary_device(Device *device) @@ -269,11 +304,17 @@ ExtentPlacementManager::dispatch_delayed_extents(Transaction &t) } for (auto &extent : res.delayed_extents) { - res.usage.cleaner_usage.main_usage += extent->get_length(); if (dispatch_delayed_extent(extent)) { res.usage.inline_usage += extent->get_length(); + res.usage.cleaner_usage.main_usage += extent->get_length(); t.mark_delayed_extent_inline(extent); } else { + if (extent->get_rewrite_generation() < MIN_COLD_GENERATION) { + res.usage.cleaner_usage.main_usage += extent->get_length(); + } else { + assert(background_process.has_cold_tier()); + res.usage.cleaner_usage.cold_ool_usage += extent->get_length(); + } t.mark_delayed_extent_ool(extent); auto writer_ptr = get_writer( extent->get_user_hint(), @@ -350,6 +391,11 @@ void ExtentPlacementManager::BackgroundProcess::log_state(const char *caller) co caller, JournalTrimmerImpl::stat_printer_t{*trimmer, true}, AsyncCleaner::stat_printer_t{*main_cleaner, true}); + if (has_cold_tier()) { + DEBUG("caller {}, cold_cleaner: {}", + caller, + AsyncCleaner::stat_printer_t{*cold_cleaner, true}); + } } void ExtentPlacementManager::BackgroundProcess::start_background() @@ -358,6 +404,10 @@ void ExtentPlacementManager::BackgroundProcess::start_background() INFO("{}, {}", JournalTrimmerImpl::stat_printer_t{*trimmer, true}, AsyncCleaner::stat_printer_t{*main_cleaner, true}); + if (has_cold_tier()) { + INFO("cold_cleaner: {}", + AsyncCleaner::stat_printer_t{*cold_cleaner, true}); + } ceph_assert(trimmer->check_is_ready()); ceph_assert(state == state_t::SCAN_SPACE); assert(!is_running()); @@ -388,6 +438,10 @@ ExtentPlacementManager::BackgroundProcess::stop_background() INFO("done, {}, {}", JournalTrimmerImpl::stat_printer_t{*trimmer, true}, AsyncCleaner::stat_printer_t{*main_cleaner, true}); + if (has_cold_tier()) { + INFO("done, cold_cleaner: {}", + AsyncCleaner::stat_printer_t{*cold_cleaner, true}); + } // run_until_halt() can be called at HALT }); } diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index 15eb592f57a..ab5ffdf7495 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -132,7 +132,8 @@ struct cleaner_usage_t { // The size of all extents write to the main devices, including inline extents // and out-of-line extents. std::size_t main_usage = 0; - // TODO: add cold_ool_usage + // The size of extents write to the cold devices + std::size_t cold_ool_usage = 0; }; struct reserve_cleaner_result_t { @@ -157,6 +158,7 @@ struct io_usage_t { return out << "io_usage_t(" << "inline_usage=" << usage.inline_usage << ", main_cleaner_usage=" << usage.cleaner_usage.main_usage + << ", cold_cleaner_usage=" << usage.cleaner_usage.cold_ool_usage << ")"; } }; @@ -180,7 +182,7 @@ public: devices_by_id.resize(DEVICE_ID_MAX, nullptr); } - void init(JournalTrimmerImplRef &&, AsyncCleanerRef &&); + void init(JournalTrimmerImplRef &&, AsyncCleanerRef &&, AsyncCleanerRef &&); SegmentSeqAllocator &get_ool_segment_seq_allocator() const { return *ool_segment_seq_allocator; @@ -275,7 +277,7 @@ public: return alloc_paddr(INLINE_GENERATION, data_category_t::METADATA, length); } - if (get_backend_type() == backend_type_t::SEGMENTED && + if (get_main_backend_type() == backend_type_t::SEGMENTED && is_lba_backref_node(type)) { // with SEGMENTED, lba-backref extents must be INLINE return alloc_paddr(INLINE_GENERATION, data_category_t::METADATA, length); @@ -288,22 +290,22 @@ public: if (get_extent_category(type) == data_category_t::METADATA && gen == INIT_GENERATION) { - if (get_backend_type() == backend_type_t::SEGMENTED) { + if (get_main_backend_type() == backend_type_t::SEGMENTED) { // with SEGMENTED, default not to ool metadata extents to reduce // padding overhead. // TODO: improve padding so we can default to the ool path. return alloc_paddr(INLINE_GENERATION, get_extent_category(type), length); } else { // with RBM, all extents must be OOL - assert(get_backend_type() == + assert(get_main_backend_type() == backend_type_t::RANDOM_BLOCK); return alloc_paddr(OOL_GENERATION, get_extent_category(type), length); } } else { assert(get_extent_category(type) == data_category_t::DATA || gen >= MIN_REWRITE_GENERATION); - if (gen > MAX_REWRITE_GENERATION) { - gen = MAX_REWRITE_GENERATION; + if (gen > dynamic_max_rewrite_generation) { + gen = dynamic_max_rewrite_generation; } else if (gen == INIT_GENERATION) { gen = OOL_GENERATION; } @@ -391,9 +393,9 @@ public: background_process.release_projected_usage(usage); } - backend_type_t get_backend_type() const { + backend_type_t get_main_backend_type() const { if (!background_process.is_no_background()) { - return background_process.get_backend_type(); + return background_process.get_main_backend_type(); } // for test assert(primary_device); @@ -442,6 +444,7 @@ private: assert(hint < placement_hint_t::NUM_HINTS); assert(is_rewrite_generation(gen)); assert(gen != INLINE_GENERATION); + assert(gen <= dynamic_max_rewrite_generation); if (category == data_category_t::DATA) { return data_writers_by_gen[generation_to_writer(gen)]; } else { @@ -462,24 +465,40 @@ private: BackgroundProcess() = default; void init(JournalTrimmerImplRef &&_trimmer, - AsyncCleanerRef &&_cleaner) { + AsyncCleanerRef &&_cleaner, + AsyncCleanerRef &&_cold_cleaner) { trimmer = std::move(_trimmer); trimmer->set_background_callback(this); main_cleaner = std::move(_cleaner); main_cleaner->set_background_callback(this); + if (_cold_cleaner) { + cold_cleaner = std::move(_cold_cleaner); + cold_cleaner->set_background_callback(this); + } } journal_type_t get_journal_type() const { return trimmer->get_journal_type(); } + bool has_cold_tier() const { + return cold_cleaner.get() != nullptr; + } + void set_extent_callback(ExtentCallbackInterface *cb) { trimmer->set_extent_callback(cb); main_cleaner->set_extent_callback(cb); + if (has_cold_tier()) { + cold_cleaner->set_extent_callback(cb); + } } store_statfs_t get_stat() const { - return main_cleaner->get_stat(); + auto stat = main_cleaner->get_stat(); + if (has_cold_tier()) { + stat.add(cold_cleaner->get_stat()); + } + return stat; } using mount_ret = ExtentPlacementManager::mount_ret; @@ -489,13 +508,18 @@ private: trimmer->reset(); stats = {}; register_metrics(); - return main_cleaner->mount(); + return main_cleaner->mount( + ).safe_then([this] { + return has_cold_tier() ? cold_cleaner->mount() : mount_ertr::now(); + }); } void start_scan_space() { ceph_assert(state == state_t::MOUNT); state = state_t::SCAN_SPACE; ceph_assert(main_cleaner->check_usage_is_empty()); + ceph_assert(!has_cold_tier() || + cold_cleaner->check_usage_is_empty()); } void start_background(); @@ -534,14 +558,15 @@ private: } seastar::future<> stop_background(); - backend_type_t get_backend_type() const { + backend_type_t get_main_backend_type() const { return get_journal_type(); } // Testing interfaces bool check_usage() { - return main_cleaner->check_usage(); + return main_cleaner->check_usage() && + (!has_cold_tier() || cold_cleaner->check_usage()); } seastar::future<> run_until_halt(); @@ -635,6 +660,11 @@ private: JournalTrimmerImplRef trimmer; AsyncCleanerRef main_cleaner; + /* + * cold tier (optional, see has_cold_tier()) + */ + AsyncCleanerRef cold_cleaner; + std::optional> process_join; std::optional> blocking_background; std::optional> blocking_io; @@ -651,6 +681,7 @@ private: Device* primary_device = nullptr; std::size_t num_devices = 0; + rewrite_gen_t dynamic_max_rewrite_generation; BackgroundProcess background_process; // TODO: drop once paddr->journal_seq_t is introduced SegmentSeqAllocatorRef ool_segment_seq_allocator; diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 8d1bace0eea..be7656e6cfc 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -1149,14 +1149,9 @@ constexpr rewrite_gen_t OOL_GENERATION = 2; // All the rewritten extents start with MIN_REWRITE_GENERATION constexpr rewrite_gen_t MIN_REWRITE_GENERATION = 3; -constexpr rewrite_gen_t MAX_REWRITE_GENERATION = 4; - -/** - * TODO: - * For tiering, might introduce 5 and 6 for the cold tier, and 1 ~ 4 for the - * hot tier. - */ - +// without cold tier, the largest generation is less than MIN_COLD_GENERATION +constexpr rewrite_gen_t MIN_COLD_GENERATION = 5; +constexpr rewrite_gen_t MAX_REWRITE_GENERATION = 7; constexpr rewrite_gen_t REWRITE_GENERATIONS = MAX_REWRITE_GENERATION + 1; constexpr rewrite_gen_t NULL_GENERATION = std::numeric_limits::max(); -- 2.39.5