From: Xuehan Xu Date: Thu, 18 Dec 2025 07:00:43 +0000 (+0800) Subject: crimson/os/seastore: skip backref operations for pure rbm seastore X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=28de7b1a1a09990f7d528147bd078c8019085c0d;p=ceph.git crimson/os/seastore: skip backref operations for pure rbm seastore instancees Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/os/seastore/async_cleaner.cc b/src/crimson/os/seastore/async_cleaner.cc index f7592810a401..c52bcb75659c 100644 --- a/src/crimson/os/seastore/async_cleaner.cc +++ b/src/crimson/os/seastore/async_cleaner.cc @@ -8,6 +8,7 @@ #include "crimson/os/seastore/async_cleaner.h" #include "crimson/os/seastore/backref_manager.h" +#include "crimson/os/seastore/lba_manager.h" #include "crimson/os/seastore/transaction_manager.h" SET_SUBSYS(seastore_cleaner); @@ -413,8 +414,10 @@ JournalTrimmerImpl::JournalTrimmerImpl( config_t config, backend_type_t type, device_off_t roll_start, - device_off_t roll_size) - : backref_manager(backref_manager), + device_off_t roll_size, + bool tail_include_alloc) + : JournalTrimmer(tail_include_alloc), + backref_manager(backref_manager), config(config), backend_type(type), roll_start(roll_start), @@ -487,7 +490,7 @@ void JournalTrimmerImpl::update_journal_tails( } } - if (alloc_tail != JOURNAL_SEQ_NULL) { + if (tail_include_alloc && alloc_tail != JOURNAL_SEQ_NULL) { ceph_assert(journal_head == JOURNAL_SEQ_NULL || journal_head >= alloc_tail); if (journal_alloc_tail != JOURNAL_SEQ_NULL && @@ -584,7 +587,8 @@ std::size_t JournalTrimmerImpl::get_dirty_journal_size() const std::size_t JournalTrimmerImpl::get_alloc_journal_size() const { - if (!background_callback->is_ready()) { + if (!background_callback->is_ready() || + !tail_include_alloc) { return 0; } auto ret = journal_head.relative_to( @@ -1567,7 +1571,7 @@ SegmentCleaner::scan_extents_ret SegmentCleaner::scan_no_tail_segment( }); } -bool SegmentCleaner::check_usage() +bool SegmentCleaner::check_usage(bool) { SpaceTrackerIRef tracker(space_tracker->make_empty()); extent_callback->with_transaction_weak( @@ -1767,10 +1771,12 @@ void SegmentCleaner::print(std::ostream &os, bool is_detailed) const RBMCleaner::RBMCleaner( RBMDeviceGroupRef&& rb_group, BackrefManager &backref_manager, + LBAManager &lba_manager, bool detailed) : detailed(detailed), rb_group(std::move(rb_group)), - backref_manager(backref_manager) + backref_manager(backref_manager), + lba_manager(lba_manager) {} void RBMCleaner::print(std::ostream &os, bool is_detailed) const @@ -1873,7 +1879,7 @@ RBMCleaner::mount_ret RBMCleaner::mount() }); } -bool RBMCleaner::check_usage() +bool RBMCleaner::check_usage(bool has_cold_tier) { assert(detailed); const auto& rbms = rb_group->get_rb_managers(); @@ -1881,39 +1887,56 @@ bool RBMCleaner::check_usage() extent_callback->with_transaction_weak( "check_usage", CACHE_HINT_NOCACHE, - [this, &tracker, &rbms](auto &t) { - return backref_manager.scan_mapped_space( - t, - [&tracker, &rbms]( - paddr_t paddr, - paddr_t backref_key, - extent_len_t len, - extent_types_t type, - laddr_t laddr) - { - for (auto rbm : rbms) { - if (rbm->get_device_id() == paddr.get_device_id()) { - if (is_backref_node(type)) { - assert(laddr == L_ADDR_NULL); - assert(backref_key.is_absolute_random_block() - || backref_key == P_ADDR_MIN); - tracker.allocate( - paddr, - len); - } else if (laddr == L_ADDR_NULL) { - assert(backref_key == P_ADDR_NULL); - tracker.release( - paddr, - len); - } else { - assert(backref_key == P_ADDR_NULL); - tracker.allocate( - paddr, - len); - } - } - } - }); + [this, &tracker, &rbms, has_cold_tier](auto &t) { + if (has_cold_tier) { + return backref_manager.scan_mapped_space( + t, + [&tracker, &rbms]( + paddr_t paddr, + paddr_t backref_key, + extent_len_t len, + extent_types_t type, + laddr_t laddr) + { + for (auto rbm : rbms) { + if (rbm->get_device_id() == paddr.get_device_id()) { + if (is_backref_node(type)) { + assert(laddr == L_ADDR_NULL); + assert(backref_key.is_absolute_random_block() + || backref_key == P_ADDR_MIN); + tracker.allocate( + paddr, + len); + } else if (laddr == L_ADDR_NULL) { + assert(backref_key == P_ADDR_NULL); + tracker.release( + paddr, + len); + } else { + assert(backref_key == P_ADDR_NULL); + tracker.allocate( + paddr, + len); + } + } + } + }); + } else { + return lba_manager.scan_mapped_space( + t, + [&tracker, &rbms]( + paddr_t paddr, + extent_len_t len, + extent_types_t type, + laddr_t laddr) + { + for (auto rbm : rbms) { + if (rbm->get_device_id() == paddr.get_device_id()) { + tracker.allocate(paddr, len); + } + } + }); + } }).unsafe_get(); return equals(tracker); } diff --git a/src/crimson/os/seastore/async_cleaner.h b/src/crimson/os/seastore/async_cleaner.h index 07fc8a81bf73..0f335f4edfb8 100644 --- a/src/crimson/os/seastore/async_cleaner.h +++ b/src/crimson/os/seastore/async_cleaner.h @@ -431,6 +431,8 @@ struct BackgroundListener { */ class JournalTrimmer { public: + JournalTrimmer(bool tail_include_alloc) + : tail_include_alloc(tail_include_alloc) {} // get the committed journal head virtual journal_seq_t get_journal_head() const = 0; @@ -465,7 +467,11 @@ public: virtual ~JournalTrimmer() {} journal_seq_t get_journal_tail() const { - return std::min(get_alloc_tail(), get_dirty_tail()); + if (tail_include_alloc) { + return std::min(get_alloc_tail(), get_dirty_tail()); + } else { + return get_dirty_tail(); + } } virtual std::size_t get_trim_size_per_cycle() const = 0; @@ -473,7 +479,8 @@ public: bool check_is_ready() const { return (get_journal_head() != JOURNAL_SEQ_NULL && get_dirty_tail() != JOURNAL_SEQ_NULL && - get_alloc_tail() != JOURNAL_SEQ_NULL); + (get_alloc_tail() != JOURNAL_SEQ_NULL || + !tail_include_alloc)); } std::size_t get_num_rolls() const { @@ -487,9 +494,12 @@ public: return get_journal_head_sequence() + 1 - get_journal_tail().segment_seq; } +protected: + bool tail_include_alloc = true; }; class BackrefManager; +class LBAManager; class JournalTrimmerImpl; using JournalTrimmerImplRef = std::unique_ptr; @@ -531,7 +541,8 @@ public: config_t config, backend_type_t type, device_off_t roll_start, - device_off_t roll_size); + device_off_t roll_size, + bool tail_include_alloc); ~JournalTrimmerImpl() = default; @@ -618,9 +629,11 @@ public: config_t config, backend_type_t type, device_off_t roll_start, - device_off_t roll_size) { + device_off_t roll_size, + bool tail_include_alloc) { return std::make_unique( - backref_manager, config, type, roll_start, roll_size); + backref_manager, config, type, roll_start, + roll_size, tail_include_alloc); } struct stat_printer_t { @@ -638,7 +651,14 @@ private: return target <= journal_dirty_tail; } + bool can_drop_backref() const { + return get_backend_type() == backend_type_t::RANDOM_BLOCK; + } + bool should_trim_alloc() const { + if (can_drop_backref()) { + return false; + } return get_alloc_tail_target() > journal_alloc_tail; } @@ -1228,7 +1248,7 @@ public: #endif // test only - virtual bool check_usage() = 0; + virtual bool check_usage(bool has_cold_tier) = 0; struct stat_printer_t { const AsyncCleaner &cleaner; @@ -1429,7 +1449,7 @@ public: // Testing interfaces - bool check_usage() final; + bool check_usage(bool has_cold_tier) final; private: /* @@ -1692,14 +1712,16 @@ public: RBMCleaner( RBMDeviceGroupRef&& rb_group, BackrefManager &backref_manager, + LBAManager &lba_manager, bool detailed); static RBMCleanerRef create( RBMDeviceGroupRef&& rb_group, BackrefManager &backref_manager, + LBAManager &lba_manager, bool detailed) { return std::make_unique( - std::move(rb_group), backref_manager, detailed); + std::move(rb_group), backref_manager, lba_manager, detailed); } RBMDeviceGroup* get_rb_group() { @@ -1829,7 +1851,7 @@ public: // Testing interfaces - bool check_usage() final; + bool check_usage(bool has_cold_tier) final; bool check_usage_is_empty() const final { // TODO @@ -1842,6 +1864,7 @@ private: const bool detailed; RBMDeviceGroupRef rb_group; BackrefManager &backref_manager; + LBAManager &lba_manager; struct { /** diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 5b8371f0ab83..864654e3488b 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -1446,11 +1446,13 @@ record_t Cache::prepare_record( extent->get_paddr(), extent->get_length(), extent->get_type())); - backref_entries.emplace_back( - backref_entry_t::create_retire( - extent->get_paddr(), - extent->get_length(), - extent->get_type())); + if (!can_drop_backref()) { + backref_entries.emplace_back( + backref_entry_t::create_retire( + extent->get_paddr(), + extent->get_length(), + extent->get_type())); + } } else if (is_backref_node(extent->get_type())) { // The retire alloc deltas are used to identify the invalid backref extent // deltas during replay when using CircularBoundedJournal, see @@ -1460,7 +1462,9 @@ record_t Cache::prepare_record( extent->get_paddr(), extent->get_length(), extent->get_type())); - remove_backref_extent(extent->get_paddr()); + if (!can_drop_backref()) { + remove_backref_extent(extent->get_paddr()); + } } else { ERRORT("Got unexpected extent type: {}", t, *extent); ceph_abort_msg("imposible"); @@ -1632,6 +1636,10 @@ record_t Cache::prepare_record( i->get_length(), i->get_type())); + if (can_drop_backref()) { + continue; + } + // Note: commit extents and backref allocations in the same place // Note: remapping is split into 2 steps, retire and alloc, they must be // committed atomically together @@ -1696,8 +1704,10 @@ record_t Cache::prepare_record( record.push_back(std::move(delta)); } - apply_backref_mset(backref_entries); - t.set_backref_entries(std::move(backref_entries)); + if (!can_drop_backref()) { + apply_backref_mset(backref_entries); + t.set_backref_entries(std::move(backref_entries)); + } ceph_assert(t.get_fresh_block_stats().num == t.inline_block_list.size() + @@ -1871,6 +1881,9 @@ void Cache::complete_commit( i->complete_io(); epm.commit_space_used(i->get_paddr(), i->get_length()); + if (can_drop_backref()) { + return; + } // Note: commit extents and backref allocations in the same place if (is_backref_mapped_type(i->get_type())) { DEBUGT("backref_entry alloc {}~0x{:x}", @@ -1944,8 +1957,10 @@ void Cache::complete_commit( last_commit = start_seq; - apply_backref_byseq(t.move_backref_entries(), start_seq); - commit_backref_entries(std::move(backref_entries), start_seq); + if (!can_drop_backref()) { + apply_backref_byseq(t.move_backref_entries(), start_seq); + commit_backref_entries(std::move(backref_entries), start_seq); + } } void Cache::init() @@ -2017,7 +2032,9 @@ Cache::replay_delta( { LOG_PREFIX(Cache::replay_delta); assert(dirty_tail != JOURNAL_SEQ_NULL); - assert(alloc_tail != JOURNAL_SEQ_NULL); + if (!can_drop_backref()) { + assert(alloc_tail != JOURNAL_SEQ_NULL); + } ceph_assert(modify_time != NULL_TIME); // FIXME: This is specific to the segmented implementation @@ -2055,6 +2072,11 @@ Cache::replay_delta( // replay alloc if (delta.type == extent_types_t::ALLOC_INFO) { + if (can_drop_backref()) { + return replay_delta_ertr::make_ready_future< + std::pair>(std::make_pair(false, nullptr)); + } + if (journal_seq < alloc_tail) { DEBUG("journal_seq {} < alloc_tail {}, don't replay {}", journal_seq, alloc_tail, delta); diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index 9561bd1516b8..b5967d9f808b 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -743,6 +743,10 @@ public: return query_cache(offset); } + bool can_drop_backref() const { + return epm.is_pure_rbm(); + } + private: using get_extent_ertr = base_ertr; template diff --git a/src/crimson/os/seastore/extent_placement_manager.h b/src/crimson/os/seastore/extent_placement_manager.h index 7d98978630e2..a5a6846e8565 100644 --- a/src/crimson/os/seastore/extent_placement_manager.h +++ b/src/crimson/os/seastore/extent_placement_manager.h @@ -560,6 +560,13 @@ public: return primary_device->get_backend_type(); } + + bool is_pure_rbm() const { + return get_main_backend_type() == backend_type_t::RANDOM_BLOCK && + // as of now, cold tier can only be segmented. + !background_process.has_cold_tier(); + } + // Testing interfaces void test_init_no_background(Device *test_device) { @@ -855,8 +862,8 @@ private: // Testing interfaces bool check_usage() { - return main_cleaner->check_usage() && - (!has_cold_tier() || cold_cleaner->check_usage()); + return main_cleaner->check_usage(has_cold_tier()) && + (!has_cold_tier() || cold_cleaner->check_usage(true)); } seastar::future<> run_until_halt(); diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 706217ea42a9..91197c040d7b 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -3136,6 +3136,7 @@ template <> struct fmt::formatter : template <> struct fmt::formatter : fmt::ostream_formatter {}; template <> struct fmt::formatter : fmt::ostream_formatter {}; template <> struct fmt::formatter : fmt::ostream_formatter {}; +template <> struct fmt::formatter : fmt::ostream_formatter {}; template <> struct fmt::formatter : fmt::ostream_formatter {}; template <> struct fmt::formatter : fmt::ostream_formatter {}; template <> struct fmt::formatter : fmt::ostream_formatter {}; diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index ca1a45af9a77..7baea2cd82c4 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -143,31 +143,46 @@ TransactionManager::mount() } }).si_then([this, &t] { epm->start_scan_space(); - return backref_manager->scan_mapped_space( - t, - [this]( - paddr_t paddr, - paddr_t backref_key, - extent_len_t len, - extent_types_t type, - laddr_t laddr) { - assert(paddr.is_absolute()); - if (is_backref_node(type)) { - assert(laddr == L_ADDR_NULL); - assert(backref_key.is_absolute() || backref_key == P_ADDR_MIN); - backref_manager->cache_new_backref_extent(paddr, backref_key, type); + if (can_drop_backref()) { + return lba_manager->scan_mapped_space( + t, + [this]( + paddr_t paddr, + extent_len_t len, + extent_types_t type, + laddr_t laddr) { + assert(paddr.is_absolute()); cache->update_tree_extents_num(type, 1); epm->mark_space_used(paddr, len); - } else if (laddr == L_ADDR_NULL) { - assert(backref_key == P_ADDR_NULL); - cache->update_tree_extents_num(type, -1); - epm->mark_space_free(paddr, len); - } else { - assert(backref_key == P_ADDR_NULL); - cache->update_tree_extents_num(type, 1); - epm->mark_space_used(paddr, len); - } - }); + }); + } else { + return backref_manager->scan_mapped_space( + t, + [this]( + paddr_t paddr, + paddr_t backref_key, + extent_len_t len, + extent_types_t type, + laddr_t laddr) { + assert(paddr.is_absolute()); + if (is_backref_node(type)) { + assert(laddr == L_ADDR_NULL); + assert(backref_key.is_absolute() || backref_key == P_ADDR_MIN); + backref_manager->cache_new_backref_extent( + paddr, backref_key, type); + cache->update_tree_extents_num(type, 1); + epm->mark_space_used(paddr, len); + } else if (laddr == L_ADDR_NULL) { + assert(backref_key == P_ADDR_NULL); + cache->update_tree_extents_num(type, -1); + epm->mark_space_free(paddr, len); + } else { + assert(backref_key == P_ADDR_NULL); + cache->update_tree_extents_num(type, 1); + epm->mark_space_used(paddr, len); + } + }); + } }); }); }).safe_then([this] { @@ -983,6 +998,7 @@ TransactionManagerRef make_transaction_manager( shard_stats_t& shard_stats, bool is_test) { + LOG_PREFIX(make_transaction_manager); rewrite_gen_t hot_tier_generations = crimson::common::get_conf( "seastore_hot_tier_generations"); rewrite_gen_t cold_tier_generations = crimson::common::get_conf( @@ -1061,9 +1077,12 @@ TransactionManagerRef make_transaction_manager( roll_size, backend_type); } + bool pure_rbm_backend = + (p_backend_type == backend_type_t::RANDOM_BLOCK) && !cold_sms; auto journal_trimmer = JournalTrimmerImpl::create( *backref_manager, trimmer_config, - backend_type, roll_start, roll_size); + backend_type, roll_start, roll_size, + !pure_rbm_backend); AsyncCleanerRef cleaner; JournalRef journal; @@ -1108,6 +1127,7 @@ TransactionManagerRef make_transaction_manager( cleaner = RBMCleaner::create( std::move(rbs), *backref_manager, + *lba_manager, cleaner_is_detailed); journal = journal::make_circularbounded( *journal_trimmer, @@ -1122,6 +1142,8 @@ TransactionManagerRef make_transaction_manager( std::move(cold_segment_cleaner)); epm->set_primary_device(primary_device); + INFO("main backend type: {}, cold tier: {}", + epm->get_main_backend_type(), (bool)cold_sms); return std::make_unique( std::move(journal), std::move(cache), diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 8e8086c85955..1ba7599051f8 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -1172,6 +1172,10 @@ private: shard_stats_t& shard_stats; + bool can_drop_backref() const { + return cache->can_drop_backref(); + } + using LBALeafNode = lba::LBALeafNode; struct unlinked_child_t { LBAMapping mapping; diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc index 53b3d74bd067..c91a6a223854 100644 --- a/src/test/crimson/seastore/test_btree_lba_manager.cc +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -45,7 +45,7 @@ struct btree_test_base : mutable segment_info_t tmp_info; - btree_test_base() = default; + btree_test_base() : JournalTrimmer(true) {} /* * JournalTrimmer interfaces diff --git a/src/test/crimson/seastore/test_cbjournal.cc b/src/test/crimson/seastore/test_cbjournal.cc index a7c31fc92d53..22b26b53aba4 100644 --- a/src/test/crimson/seastore/test_cbjournal.cc +++ b/src/test/crimson/seastore/test_cbjournal.cc @@ -142,7 +142,7 @@ struct cbjournal_test_t : public seastar_test_suite_t, JournalTrimmer uint64_t block_size; WritePipeline pipeline; - cbjournal_test_t() = default; + cbjournal_test_t() : JournalTrimmer(true) {} /* * JournalTrimmer interfaces diff --git a/src/test/crimson/seastore/test_seastore_journal.cc b/src/test/crimson/seastore/test_seastore_journal.cc index d3ad64d6a650..293874080966 100644 --- a/src/test/crimson/seastore/test_seastore_journal.cc +++ b/src/test/crimson/seastore/test_seastore_journal.cc @@ -85,7 +85,7 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider, JournalTrimmer { mutable segment_info_t tmp_info; - journal_test_t() = default; + journal_test_t() : JournalTrimmer(true) {} /* * JournalTrimmer interfaces