From 2c3a3e3fe9753f27adefe2f3b68425dd6c486e6a Mon Sep 17 00:00:00 2001 From: Yingxin Cheng Date: Mon, 21 Apr 2025 15:38:56 +0800 Subject: [PATCH] crimson/os/seastore/cached_extent: add "seen_by_users" to LogicalCachedExtent This indicates whether the extent has been seen by user transactions. This is required to implement the accurate init callback which is guaranteed be called only once. Signed-off-by: Yingxin Cheng Signed-off-by: Xuehan Xu --- src/crimson/os/seastore/cache.cc | 15 +++++-- src/crimson/os/seastore/cached_extent.cc | 3 +- src/crimson/os/seastore/cached_extent.h | 31 ++++++++++++- src/crimson/os/seastore/seastore_types.h | 4 ++ src/crimson/os/seastore/transaction_manager.h | 43 +++++++++++++++++-- 5 files changed, 85 insertions(+), 11 deletions(-) diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 53b23025cd6..6f7c3f48b29 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -1151,13 +1151,19 @@ CachedExtentRef Cache::duplicate_for_write( LOG_PREFIX(Cache::duplicate_for_write); assert(i->is_fully_loaded()); +#ifndef NDEBUG + if (i->is_logical()) { + assert(static_cast(*i).has_laddr()); + assert(static_cast(*i).is_seen_by_users()); + } +#endif + if (i->is_mutable()) { return i; } if (i->is_exist_clean()) { assert(i->is_logical()); - assert(static_cast(*i).has_laddr()); i->version++; i->state = CachedExtent::extent_state_t::EXIST_MUTATION_PENDING; i->last_committed_crc = i->calc_crc32c(); @@ -1188,10 +1194,11 @@ CachedExtentRef Cache::duplicate_for_write( ret->version++; ret->state = CachedExtent::extent_state_t::MUTATION_PENDING; if (i->is_logical()) { - auto& lextent = static_cast(*i); - assert(lextent.has_laddr()); + auto& stable_extent = static_cast(*i); assert(ret->is_logical()); - static_cast(*ret).set_laddr(lextent.get_laddr()); + auto& mutate_extent = static_cast(*ret); + mutate_extent.set_laddr(stable_extent.get_laddr()); + assert(mutate_extent.is_seen_by_users()); } t.add_mutated_extent(ret); diff --git a/src/crimson/os/seastore/cached_extent.cc b/src/crimson/os/seastore/cached_extent.cc index 6bceb999c37..d96500736de 100644 --- a/src/crimson/os/seastore/cached_extent.cc +++ b/src/crimson/os/seastore/cached_extent.cc @@ -91,7 +91,8 @@ CachedExtent* CachedExtent::get_transactional_view(transaction_id_t tid) { std::ostream &LogicalCachedExtent::print_detail(std::ostream &out) const { - out << ", laddr=" << laddr; + out << ", laddr=" << laddr + << ", seen=" << seen_by_users; return print_detail_l(out); } diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index d7b47d3cae1..3bc58f07f3d 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -857,10 +857,12 @@ private: /// used to wait while in-progress commit completes std::optional> io_wait_promise; + void set_io_wait() { ceph_assert(!io_wait_promise); io_wait_promise = seastar::shared_promise<>(); } + void complete_io() { ceph_assert(io_wait_promise); io_wait_promise->set_value(); @@ -1385,18 +1387,37 @@ class LBAMapping; */ class LogicalCachedExtent : public CachedExtent { public: - template - LogicalCachedExtent(T&&... t) : CachedExtent(std::forward(t)...) {} + using CachedExtent::CachedExtent; + + LogicalCachedExtent(const LogicalCachedExtent& other) + : CachedExtent(other), + seen_by_users(other.seen_by_users) {} + + LogicalCachedExtent(const LogicalCachedExtent& other, share_buffer_t s) + : CachedExtent(other, s), + seen_by_users(other.seen_by_users) {} void on_rewrite(Transaction &t, CachedExtent &extent, extent_len_t off) final { assert(get_type() == extent.get_type()); auto &lextent = (LogicalCachedExtent&)extent; set_laddr((lextent.get_laddr() + off).checked_to_laddr()); + seen_by_users = lextent.seen_by_users; do_on_rewrite(t, lextent); } virtual void do_on_rewrite(Transaction &t, LogicalCachedExtent &extent) {} + bool is_seen_by_users() const { + return seen_by_users; + } + + // handled under TransactionManager, + // user should not set this by themselves. + void set_seen_by_users() { + assert(!seen_by_users); + seen_by_users = true; + } + bool has_laddr() const { return laddr != L_ADDR_NULL; } @@ -1461,6 +1482,12 @@ private: // the logical address of the extent, and if shared, // it is the intermediate_base, see BtreeLBAMapping comments. laddr_t laddr = L_ADDR_NULL; + + // whether the extent has been seen by users since OSD startup, + // otherwise, an extent can still be alive if it's dirty or pinned by lru. + // + // user must initialize the logical extent upon the first access. + bool seen_by_users = false; }; using LogicalCachedExtentRef = TCachedExtentRef; diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index 29bd7b9c85f..d1ccab63342 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -2197,6 +2197,10 @@ constexpr bool is_valid_transaction(transaction_type_t type) { return type < transaction_type_t::MAX; } +constexpr bool is_user_transaction(transaction_type_t type) { + return type <= transaction_type_t::READ; +} + constexpr bool is_background_transaction(transaction_type_t type) { return (type >= transaction_type_t::TRIM_DIRTY && type < transaction_type_t::MAX); diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index e7d449cdd5e..d9773d2fbc1 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -260,6 +260,8 @@ public: static_assert(is_logical_type(T::TYPE)); assert(is_aligned(partial_off, get_block_size())); assert(is_aligned(partial_len, get_block_size())); + // must be user-oriented required by maybe_init + assert(is_user_transaction(t.get_src())); extent_len_t direct_partial_off = partial_off; bool is_clone = pin->is_clone(); @@ -302,9 +304,12 @@ public: return cache->read_extent_maybe_partial( t, std::move(extent), direct_partial_off, partial_len); }).si_then([maybe_init=std::move(maybe_init)](auto extent) { - maybe_init(*extent); - return extent; - }); + if (!extent->is_seen_by_users()) { + maybe_init(*extent); + extent->set_seen_by_users(); + } + return std::move(extent); + }); } else { return this->pin_to_extent( t, std::move(std::get<0>(ret)), @@ -381,6 +386,7 @@ public: laddr_t laddr_hint, extent_len_t len, placement_hint_t placement_hint = placement_hint_t::HOT) { + static_assert(is_logical_metadata_type(T::TYPE)); LOG_PREFIX(TransactionManager::alloc_non_data_extent); SUBDEBUGT(seastore_tm, "{} hint {}~0x{:x} phint={} ...", t, T::TYPE, laddr_hint, len, placement_hint); @@ -389,6 +395,9 @@ public: len, placement_hint, INIT_GENERATION); + // user must initialize the logical extent themselves. + assert(is_user_transaction(t.get_src())); + ext->set_seen_by_users(); return lba_manager->alloc_extent( t, laddr_hint, @@ -416,6 +425,7 @@ public: laddr_t laddr_hint, extent_len_t len, placement_hint_t placement_hint = placement_hint_t::HOT) { + static_assert(is_data_type(T::TYPE)); LOG_PREFIX(TransactionManager::alloc_data_extents); SUBDEBUGT(seastore_tm, "{} hint {}~0x{:x} phint={} ...", t, T::TYPE, laddr_hint, len, placement_hint); @@ -424,6 +434,11 @@ public: len, placement_hint, INIT_GENERATION); + // user must initialize the logical extent themselves + assert(is_user_transaction(t.get_src())); + for (auto& ext : exts) { + ext->set_seen_by_users(); + } return lba_manager->alloc_extents( t, laddr_hint, @@ -479,6 +494,10 @@ public: LBAMappingRef &&pin, std::array remaps) { static_assert(std::is_base_of_v); + // data extents don't need maybe_init yet, currently, + static_assert(is_data_type(T::TYPE)); + // must be user-oriented required by (the potential) maybe_init + assert(is_user_transaction(t.get_src())); #ifndef NDEBUG std::sort(remaps.begin(), remaps.end(), @@ -550,7 +569,14 @@ public: } else { auto ret = get_extent_if_linked(t, pin->duplicate()); if (ret.index() == 1) { - return std::move(std::get<1>(ret)); + return std::get<1>(ret + ).si_then([](auto extent) { + if (!extent->is_seen_by_users()) { + // Note, no maybe_init available for data extents + extent->set_seen_by_users(); + } + return std::move(extent); + }); } else { // absent return base_iertr::make_ready_future>(); @@ -571,6 +597,7 @@ public: original_bptr = ext->get_bptr(); } if (ext) { + assert(ext->is_seen_by_users()); cache->retire_extent(t, ext); } else { cache->retire_absent_extent_addr(t, original_paddr, original_len); @@ -594,6 +621,8 @@ public: remap_len, original_laddr, original_bptr); + // user must initialize the logical extent themselves. + extent->set_seen_by_users(); extents.emplace_back(std::move(extent)); } }); @@ -1034,6 +1063,8 @@ private: extent_len_t partial_len, lextent_init_func_t &&maybe_init) { static_assert(is_logical_type(T::TYPE)); + // must be user-oriented required by maybe_init + assert(is_user_transaction(t.get_src())); using ret = pin_to_extent_ret; auto &pref = *pin; auto direct_length = pref.is_indirect() ? @@ -1062,6 +1093,7 @@ private: pref.link_child(&extent); extent.maybe_set_intermediate_laddr(pref); maybe_init(extent); + extent.set_seen_by_users(); } ).si_then([FNAME, &t, pin=std::move(pin), this](auto ref) mutable -> ret { if (ref->is_fully_loaded()) { @@ -1114,6 +1146,7 @@ private: SUBTRACET(seastore_tm, "getting absent extent from pin {} type {} ...", t, *pin, type); assert(is_logical_type(type)); + assert(is_background_transaction(t.get_src())); auto &pref = *pin; laddr_t direct_key; extent_len_t direct_length; @@ -1140,6 +1173,8 @@ private: assert(!pref.get_parent()->is_pending()); pref.link_child(&lextent); lextent.maybe_set_intermediate_laddr(pref); + // No change to extent::seen_by_user because this path is only + // for background cleaning. } ).si_then([FNAME, &t, pin=std::move(pin), this](auto ref) { auto crc = ref->calc_crc32c(); -- 2.39.5