From b413f76b0a5f9990bf41cf1a0295798cd4da3bd0 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Sat, 14 Dec 2024 21:26:33 +0800 Subject: [PATCH] crimson/os/seastore: introduce cache_hint_t Layers above Cache can use cache_hint_t to notify Cache whether to put the extents to Cache::lru. CEPH_OSD_OP_FLAG_FADVISE_DONTNEED and CEPH_OSD_OP_FLAG_FADVISE_NOCACHE will be treated as "no need to put into Cache::lru" Signed-off-by: Xuehan Xu --- src/crimson/os/seastore/async_cleaner.cc | 6 ++++ src/crimson/os/seastore/async_cleaner.h | 14 +++++--- src/crimson/os/seastore/cache.cc | 8 ++--- src/crimson/os/seastore/cache.h | 23 ++++++------- src/crimson/os/seastore/seastore.cc | 10 ++++-- src/crimson/os/seastore/seastore.h | 11 ++++--- src/crimson/os/seastore/seastore_types.h | 33 +++++++++++++++++++ src/crimson/os/seastore/transaction.h | 15 +++++++-- .../os/seastore/transaction_manager.cc | 2 ++ src/crimson/os/seastore/transaction_manager.h | 3 +- src/crimson/tools/store_nbd/tm_driver.cc | 2 ++ .../seastore/test_btree_lba_manager.cc | 25 +++++++++++--- .../crimson/seastore/test_seastore_cache.cc | 5 ++- 13 files changed, 122 insertions(+), 35 deletions(-) diff --git a/src/crimson/os/seastore/async_cleaner.cc b/src/crimson/os/seastore/async_cleaner.cc index 341c5c5524a..64e6749562e 100644 --- a/src/crimson/os/seastore/async_cleaner.cc +++ b/src/crimson/os/seastore/async_cleaner.cc @@ -609,6 +609,7 @@ JournalTrimmerImpl::trim_alloc() return extent_callback->with_transaction_intr( Transaction::src_t::TRIM_ALLOC, "trim_alloc", + CACHE_HINT_NOCACHE, [this, FNAME](auto &t) { auto target = get_alloc_tail_target(); @@ -653,6 +654,7 @@ JournalTrimmerImpl::trim_dirty() return extent_callback->with_transaction_intr( Transaction::src_t::TRIM_DIRTY, "trim_dirty", + CACHE_HINT_NOCACHE, [this, FNAME](auto &t) { auto target = get_dirty_tail_target(); @@ -1125,6 +1127,7 @@ SegmentCleaner::do_reclaim_space( return extent_callback->with_transaction_intr( src, "clean_reclaim_space", + CACHE_HINT_NOCACHE, [this, &backref_extents, &pin_list, &reclaimed](auto &t) { return seastar::do_with( @@ -1240,6 +1243,7 @@ SegmentCleaner::clean_space_ret SegmentCleaner::clean_space() return extent_callback->with_transaction_intr( Transaction::src_t::READ, "retrieve_from_backref_tree", + CACHE_HINT_NOCACHE, [this, &weak_read_ret](auto &t) { return backref_manager.get_mappings( t, @@ -1506,6 +1510,7 @@ bool SegmentCleaner::check_usage() SpaceTrackerIRef tracker(space_tracker->make_empty()); extent_callback->with_transaction_weak( "check_usage", + CACHE_HINT_NOCACHE, [this, &tracker](auto &t) { return backref_manager.scan_mapped_space( t, @@ -1812,6 +1817,7 @@ bool RBMCleaner::check_usage() RBMSpaceTracker tracker(rbms); extent_callback->with_transaction_weak( "check_usage", + CACHE_HINT_NOCACHE, [this, &tracker, &rbms](auto &t) { return backref_manager.scan_mapped_space( t, diff --git a/src/crimson/os/seastore/async_cleaner.h b/src/crimson/os/seastore/async_cleaner.h index 424247c5bdc..01ab44c4c7c 100644 --- a/src/crimson/os/seastore/async_cleaner.h +++ b/src/crimson/os/seastore/async_cleaner.h @@ -299,24 +299,29 @@ public: /// Creates empty transaction /// weak transaction should be type READ virtual TransactionRef create_transaction( - Transaction::src_t, const char *name, bool is_weak=false) = 0; + Transaction::src_t, + const char *name, + cache_hint_t cache_hint = CACHE_HINT_TOUCH, + bool is_weak=false) = 0; /// Creates empty transaction with interruptible context template auto with_transaction_intr( Transaction::src_t src, const char* name, + cache_hint_t cache_hint, Func &&f) { return do_with_transaction_intr( - src, name, std::forward(f)); + src, name, cache_hint, std::forward(f)); } template auto with_transaction_weak( const char* name, + cache_hint_t cache_hint, Func &&f) { return do_with_transaction_intr( - Transaction::src_t::READ, name, std::forward(f) + Transaction::src_t::READ, name, cache_hint, std::forward(f) ).handle_error( crimson::ct_error::eagain::assert_failure{"unexpected eagain"}, crimson::ct_error::pass_further_all{} @@ -385,9 +390,10 @@ private: auto do_with_transaction_intr( Transaction::src_t src, const char* name, + cache_hint_t cache_hint, Func &&f) { return seastar::do_with( - create_transaction(src, name, IsWeak), + create_transaction(src, name, cache_hint, IsWeak), [f=std::forward(f)](auto &ref_t) mutable { return with_trans_intr( *ref_t, diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 5898b9bad0a..86f816e1648 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -1483,7 +1483,7 @@ record_t Cache::prepare_record( i->state = CachedExtent::extent_state_t::CLEAN; assert(i->is_logical()); i->clear_modified_region(); - touch_extent(*i, &trans_src); + touch_extent(*i, &trans_src, t.get_cache_hint()); DEBUGT("inplace rewrite ool block is commmitted -- {}", t, *i); } @@ -1513,7 +1513,7 @@ record_t Cache::prepare_record( if (i->is_dirty()) { add_to_dirty(i, &t_src); } else { - touch_extent(*i, &t_src); + touch_extent(*i, &t_src, t.get_cache_hint()); } alloc_delta.alloc_blk_ranges.emplace_back( @@ -1759,7 +1759,7 @@ void Cache::complete_commit( add_extent(i); assert(!i->is_dirty()); const auto t_src = t.get_src(); - touch_extent(*i, &t_src); + touch_extent(*i, &t_src, t.get_cache_hint()); epm.commit_space_used(i->get_paddr(), i->get_length()); // Note: commit extents and backref allocations in the same place @@ -2026,7 +2026,7 @@ Cache::replay_delta( [](CachedExtent &) {}, [this](CachedExtent &ext) { // replay is not included by the cache hit metrics - touch_extent(ext, nullptr); + touch_extent(ext, nullptr, CACHE_HINT_TOUCH); }, nullptr) : _get_extent_if_cached( diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index b2248ff37dd..a239b861726 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -124,6 +124,7 @@ public: TransactionRef create_transaction( Transaction::src_t src, const char* name, + cache_hint_t cache_hint, bool is_weak) { LOG_PREFIX(Cache::create_transaction); @@ -137,7 +138,8 @@ public: [this](Transaction& t) { return on_transaction_destruct(t); }, - ++next_id + ++next_id, + cache_hint ); SUBDEBUGT(seastore_t, "created name={}, source={}, is_weak={}", *ret, name, src, is_weak); @@ -284,7 +286,7 @@ public: SUBDEBUGT(seastore_cache, "{} {} is present in cache -- {}", t, type, offset, *ret); t.add_to_read_set(ret); - touch_extent(*ret, &t_src); + touch_extent(*ret, &t_src, t.get_cache_hint()); return ret->wait_io().then([ret] { return get_extent_if_cached_iertr::make_ready_future< CachedExtentRef>(ret); @@ -341,7 +343,7 @@ public: t, T::TYPE, offset, length); auto f = [&t, this, t_src](CachedExtent &ext) { t.add_to_read_set(CachedExtentRef(&ext)); - touch_extent(ext, &t_src); + touch_extent(ext, &t_src, t.get_cache_hint()); }; return trans_intr::make_interruptible( do_get_caching_extent( @@ -389,7 +391,7 @@ public: ++stats.access.s.load_absent; t.add_to_read_set(CachedExtentRef(&ext)); - touch_extent(ext, &t_src); + touch_extent(ext, &t_src, t.get_cache_hint()); }; return trans_intr::make_interruptible( do_get_caching_extent( @@ -487,7 +489,7 @@ public: ++access_stats.cache_lru; ++stats.access.s.cache_lru; } - touch_extent(*p_extent, &t_src); + touch_extent(*p_extent, &t_src, t.get_cache_hint()); } else { if (p_extent->is_dirty()) { ++access_stats.trans_dirty; @@ -834,7 +836,7 @@ private: t, type, offset, length, laddr); auto f = [&t, this, t_src](CachedExtent &ext) { t.add_to_read_set(CachedExtentRef(&ext)); - touch_extent(ext, &t_src); + touch_extent(ext, &t_src, t.get_cache_hint()); }; return trans_intr::make_interruptible( do_get_caching_extent_by_type( @@ -876,7 +878,7 @@ private: ++stats.access.s.load_absent; t.add_to_read_set(CachedExtentRef(&ext)); - touch_extent(ext, &t_src); + touch_extent(ext, &t_src, t.get_cache_hint()); }; return trans_intr::make_interruptible( do_get_caching_extent_by_type( @@ -1472,11 +1474,10 @@ private: /// Update lru for access to ref void touch_extent( CachedExtent &ext, - const Transaction::src_t* p_src) + const Transaction::src_t* p_src, + cache_hint_t hint) { - if (p_src && - is_background_transaction(*p_src) && - is_logical_type(ext.get_type())) { + if (hint == CACHE_HINT_NOCACHE && is_logical_type(ext.get_type())) { return; } if (ext.is_stable_clean() && !ext.is_placeholder()) { diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index 5b51083f344..e3d8092837f 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -408,6 +408,7 @@ SeaStore::Shard::mkfs_managers() return transaction_manager->with_transaction_intr( Transaction::src_t::MUTATE, "mkfs_seastore", + CACHE_HINT_TOUCH, [this](auto& t) { LOG_PREFIX(SeaStoreS::mkfs_managers); @@ -917,6 +918,7 @@ SeaStore::Shard::list_objects(CollectionRef ch, return transaction_manager->with_transaction_intr( Transaction::src_t::READ, "list_objects", + CACHE_HINT_TOUCH, [this, ch, start, end, &limit, &ret](auto &t) { LOG_PREFIX(SeaStoreS::list_objects); @@ -1054,6 +1056,7 @@ SeaStore::Shard::list_collections() return transaction_manager->with_transaction_intr( Transaction::src_t::READ, "list_collections", + CACHE_HINT_TOUCH, [this, &ret](auto& t) { LOG_PREFIX(SeaStoreS::list_collections); @@ -1138,8 +1141,10 @@ SeaStore::Shard::read( "read", op_type_t::READ, [this, offset, len, op_flags](auto &t, auto &onode) { - return _read(t, onode, offset, len, op_flags); - }).finally([this] { + return _read(t, onode, offset, len, op_flags); + }, + op_flags + ).finally([this] { assert(shard_stats.pending_read_num); --(shard_stats.pending_read_num); }); @@ -2677,6 +2682,7 @@ seastar::future<> SeaStore::Shard::write_meta( return transaction_manager->with_transaction_intr( Transaction::src_t::MUTATE, "write_meta", + CACHE_HINT_NOCACHE, [this, &key, &value](auto& t) { LOG_PREFIX(SeaStoreS::write_meta); diff --git a/src/crimson/os/seastore/seastore.h b/src/crimson/os/seastore/seastore.h index fd7e177da63..a35975f580d 100644 --- a/src/crimson/os/seastore/seastore.h +++ b/src/crimson/os/seastore/seastore.h @@ -251,7 +251,8 @@ public: return seastar::do_with( internal_context_t( ch, std::move(t), - transaction_manager->create_transaction(src, tname)), + transaction_manager->create_transaction( + src, tname, t.get_fadvise_flags())), std::forward(f), [this, op_type](auto &ctx, auto &f) { assert(shard_stats.starting_io_num); @@ -298,20 +299,22 @@ public: Transaction::src_t src, const char* tname, op_type_t op_type, - F &&f) const { + F &&f, + cache_hint_t cache_hint_flags = CACHE_HINT_TOUCH) const { auto begin_time = std::chrono::steady_clock::now(); return seastar::do_with( oid, Ret{}, std::forward(f), - [this, ch, src, op_type, begin_time, tname + [this, ch, src, op_type, begin_time, tname, cache_hint_flags ](auto &oid, auto &ret, auto &f) { - return repeat_eagain([&, this, ch, src, tname] { + return repeat_eagain([&, this, ch, src, tname, cache_hint_flags] { assert(src == Transaction::src_t::READ); ++(shard_stats.repeat_read_num); return transaction_manager->with_transaction_intr( src, tname, + cache_hint_flags, [&, this, ch, tname](auto& t) { LOG_PREFIX(SeaStoreS::repeat_with_onode); diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 335a439dcb5..5930469ca07 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -20,9 +20,42 @@ #include "include/intarith.h" #include "include/interval_set.h" #include "include/uuid.h" +#include "include/rados.h" namespace crimson::os::seastore { +class cache_hint_t { + enum hint_t { + TOUCH, + NOCACHE + }; +public: + static constexpr cache_hint_t get_touch() { + return hint_t::TOUCH; + } + static constexpr cache_hint_t get_nocache() { + return hint_t::NOCACHE; + } + cache_hint_t(uint32_t flags) { + if (unlikely(flags & CEPH_OSD_OP_FLAG_FADVISE_DONTNEED) || + unlikely(flags & CEPH_OSD_OP_FLAG_FADVISE_NOCACHE)) { + hint = NOCACHE; + } + } + bool operator==(const cache_hint_t &other) const { + return hint == other.hint; + } + bool operator!=(const cache_hint_t &other) const { + return hint != other.hint; + } +private: + constexpr cache_hint_t(hint_t hint) : hint(hint) {} + hint_t hint = hint_t::TOUCH; +}; + +inline constexpr cache_hint_t CACHE_HINT_TOUCH = cache_hint_t::get_touch(); +inline constexpr cache_hint_t CACHE_HINT_NOCACHE = cache_hint_t::get_nocache(); + /* using a special xattr key "omap_header" to store omap header */ const std::string OMAP_HEADER_XATTR_KEY = "omap_header"; diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h index 66a9f896520..cd8c333c69f 100644 --- a/src/crimson/os/seastore/transaction.h +++ b/src/crimson/os/seastore/transaction.h @@ -409,12 +409,14 @@ public: src_t src, journal_seq_t initiated_after, on_destruct_func_t&& f, - transaction_id_t trans_id + transaction_id_t trans_id, + cache_hint_t cache_hint ) : weak(weak), handle(std::move(handle)), on_destruct(std::move(f)), src(src), - trans_id(trans_id) + trans_id(trans_id), + cache_hint(cache_hint) {} void invalidate_clear_write_set() { @@ -573,6 +575,10 @@ public: return pre_alloc_list; } + cache_hint_t get_cache_hint() const { + return cache_hint; + } + private: friend class Cache; friend Ref make_test_transaction(); @@ -682,6 +688,8 @@ private: seastar::lw_shared_ptr pending_ool; backref_entry_refs_t backref_entries; + + cache_hint_t cache_hint = CACHE_HINT_TOUCH; }; using TransactionRef = Transaction::Ref; @@ -694,7 +702,8 @@ inline TransactionRef make_test_transaction() { Transaction::src_t::MUTATE, JOURNAL_SEQ_NULL, [](Transaction&) {}, - ++next_id + ++next_id, + CACHE_HINT_TOUCH ); } diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 753bd5d6ff6..807d88b2cbc 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -66,6 +66,7 @@ TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs() return with_transaction_intr( Transaction::src_t::MUTATE, "mkfs_tm", + CACHE_HINT_TOUCH, [this, FNAME](auto& t) { cache->init(); @@ -131,6 +132,7 @@ TransactionManager::mount() journal->get_trimmer().set_journal_head(start_seq); return with_transaction_weak( "mount", + CACHE_HINT_TOUCH, [this](auto &t) { return cache->init_cached_extents(t, [this](auto &t, auto &e) { diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index dc6cc20cf59..e574460894a 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -741,8 +741,9 @@ public: TransactionRef create_transaction( Transaction::src_t src, const char* name, + cache_hint_t cache_hint = CACHE_HINT_TOUCH, bool is_weak=false) final { - return cache->create_transaction(src, name, is_weak); + return cache->create_transaction(src, name, cache_hint, is_weak); } using ExtentCallbackInterface::submit_transaction_direct_ret; diff --git a/src/crimson/tools/store_nbd/tm_driver.cc b/src/crimson/tools/store_nbd/tm_driver.cc index 389ecd78afc..870809c5153 100644 --- a/src/crimson/tools/store_nbd/tm_driver.cc +++ b/src/crimson/tools/store_nbd/tm_driver.cc @@ -25,6 +25,7 @@ seastar::future<> TMDriver::write( return tm->with_transaction_intr( Transaction::src_t::MUTATE, "write", + CACHE_HINT_TOUCH, [this, offset, &ptr](auto& t) { return tm->remove(t, laddr_t::from_byte_offset(offset) @@ -112,6 +113,7 @@ seastar::future TMDriver::read( return tm->with_transaction_intr( Transaction::src_t::READ, "read", + CACHE_HINT_TOUCH, [=, &blret, this](auto& t) { return read_extents(t, laddr_t::from_byte_offset(offset), size diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc index 8b1f7435c87..7874411e0ff 100644 --- a/src/test/crimson/seastore/test_btree_lba_manager.cc +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -157,7 +157,10 @@ struct btree_test_base : }).safe_then([this] { return seastar::do_with( cache->create_transaction( - Transaction::src_t::MUTATE, "test_set_up_fut", false), + Transaction::src_t::MUTATE, + "test_set_up_fut", + CACHE_HINT_TOUCH, + false), [this](auto &ref_t) { return with_trans_intr(*ref_t, [&](auto &t) { cache->init(); @@ -236,7 +239,10 @@ struct lba_btree_test : btree_test_base { template auto lba_btree_update(F &&f) { auto tref = cache->create_transaction( - Transaction::src_t::MUTATE, "test_btree_update", false); + Transaction::src_t::MUTATE, + "test_btree_update", + CACHE_HINT_TOUCH, + false); auto &t = *tref; with_trans_intr( t, @@ -281,7 +287,10 @@ struct lba_btree_test : btree_test_base { template auto lba_btree_read(F &&f) { auto t = cache->create_transaction( - Transaction::src_t::READ, "test_btree_read", false); + Transaction::src_t::READ, + "test_btree_read", + CACHE_HINT_TOUCH, + false); return with_trans_intr( *t, [this, f=std::forward(f)](auto &t) mutable { @@ -429,7 +438,10 @@ struct btree_lba_manager_test : btree_test_base { auto create_transaction(bool create_fake_extent=true) { auto t = test_transaction_t{ cache->create_transaction( - Transaction::src_t::MUTATE, "test_mutate_lba", false), + Transaction::src_t::MUTATE, + "test_mutate_lba", + CACHE_HINT_TOUCH, + false), test_lba_mappings }; if (create_fake_extent) { @@ -445,7 +457,10 @@ struct btree_lba_manager_test : btree_test_base { auto create_weak_transaction() { auto t = test_transaction_t{ cache->create_transaction( - Transaction::src_t::READ, "test_read_weak", true), + Transaction::src_t::READ, + "test_read_weak", + CACHE_HINT_TOUCH, + true), test_lba_mappings }; return t; diff --git a/src/test/crimson/seastore/test_seastore_cache.cc b/src/test/crimson/seastore/test_seastore_cache.cc index 6e24f436b98..fa774886139 100644 --- a/src/test/crimson/seastore/test_seastore_cache.cc +++ b/src/test/crimson/seastore/test_seastore_cache.cc @@ -87,7 +87,10 @@ struct cache_test_t : public seastar_test_suite_t { auto get_transaction() { return cache->create_transaction( - Transaction::src_t::MUTATE, "test_cache", false); + Transaction::src_t::MUTATE, + "test_cache", + CACHE_HINT_TOUCH, + false); } template -- 2.39.5