From: Xuehan Xu Date: Tue, 22 Apr 2025 13:47:58 +0000 (+0800) Subject: crimson/os/seastore/transaction: turn Transaction::read_set into an intrusive set X-Git-Tag: v21.0.0~256^2~549^2~5 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2388ab106df233284dfaef2ac663324a34adfcbd;p=ceph.git crimson/os/seastore/transaction: turn Transaction::read_set into an intrusive set Fixes: https://tracker.ceph.com/issues/70976 Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 472c1d57dc3d..4fd6ee40be72 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -1231,6 +1231,7 @@ record_t Cache::prepare_record( auto trans_src = t.get_src(); assert(!t.is_weak()); assert(trans_src != Transaction::src_t::READ); + assert(t.read_set.size() + t.num_replace_placeholder == t.read_items.size()); auto& efforts = get_by_src(stats.committed_efforts_by_src, trans_src); @@ -1247,7 +1248,7 @@ record_t Cache::prepare_record( i.ref->get_type()).increment(i.ref->get_length()); read_stat.increment(i.ref->get_length()); } - t.read_set.clear(); + t.clear_read_set(); t.write_set.clear(); record_t record(record_type_t::JOURNAL, trans_src); diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index 6f8b73bb2ad5..a65bf46bd181 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -67,11 +67,16 @@ class read_set_item_t { using set_hook_t = boost::intrusive::set_member_hook< boost::intrusive::link_mode< boost::intrusive::auto_unlink>>; - set_hook_t trans_hook; - using set_hook_options = boost::intrusive::member_hook< + set_hook_t trans_hook; // used to attach transactions to extents + set_hook_t extent_hook; // used to attach extents to transactions + using trans_hook_options = boost::intrusive::member_hook< read_set_item_t, set_hook_t, &read_set_item_t::trans_hook>; + using extent_hook_options = boost::intrusive::member_hook< + read_set_item_t, + set_hook_t, + &read_set_item_t::extent_hook>; public: struct extent_cmp_t { @@ -101,9 +106,14 @@ public: using trans_set_t = boost::intrusive::set< read_set_item_t, - set_hook_options, + trans_hook_options, boost::intrusive::constant_time_size, boost::intrusive::compare>; + using extent_set_t = boost::intrusive::set< + read_set_item_t, + extent_hook_options, + boost::intrusive::constant_time_size, + boost::intrusive::compare>; T *t = nullptr; CachedExtentRef ref; @@ -115,9 +125,7 @@ public: }; template -using read_extent_set_t = std::set< - read_set_item_t, - typename read_set_item_t::extent_cmp_t>; +using read_extent_set_t = typename read_set_item_t::extent_set_t; template using read_trans_set_t = typename read_set_item_t::trans_set_t; diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h index c6e4545bfa83..ddd42531a038 100644 --- a/src/crimson/os/seastore/transaction.h +++ b/src/crimson/os/seastore/transaction.h @@ -126,6 +126,7 @@ public: add_present_to_retired_set(ref); } + using extent_cmp_t = read_set_item_t::extent_cmp_t; void add_present_to_retired_set(CachedExtentRef ref) { assert(ref->get_paddr().is_real_location()); assert(!is_weak()); @@ -147,7 +148,7 @@ public: write_set.erase(*ref); assert(ref->prior_instance); retired_set.emplace(ref->prior_instance, trans_id); - assert(read_set.count(ref->prior_instance->get_paddr())); + assert(read_set.count(ref->prior_instance->get_paddr(), extent_cmp_t{})); ref->prior_instance.reset(); } else { // && retired_set.count(ref->get_paddr()) == 0 @@ -269,7 +270,7 @@ public: assert(ref->get_paddr().is_absolute() || ref->get_paddr().is_root()); assert(ref->is_exist_mutation_pending() || - read_set.count(ref->prior_instance->get_paddr())); + read_set.count(ref->prior_instance->get_paddr(), extent_cmp_t{})); mutated_block_list.push_back(ref); if (!ref->is_exist_mutation_pending()) { write_set.insert(*ref); @@ -288,12 +289,19 @@ public: assert(extent.get_paddr() == placeholder.get_paddr()); assert(extent.get_paddr().is_absolute()); { - auto where = read_set.find(placeholder.get_paddr()); + auto where = read_set.find(placeholder.get_paddr(), extent_cmp_t{}); assert(where != read_set.end()); assert(where->ref.get() == &placeholder); where = read_set.erase(where); - auto it = read_set.emplace_hint(where, this, &extent); + placeholder.read_transactions.erase( + read_trans_set_t::s_iterator_to(*where)); + // Note, the retired-placeholder is not removed from read_items after replace. + read_items.emplace_back(this, &extent); + auto it = read_set.insert_before(where, read_items.back()); extent.read_transactions.insert(const_cast&>(*it)); +#ifndef NDEBUG + num_replace_placeholder++; +#endif } { auto where = retired_set.find(&placeholder); @@ -428,7 +436,7 @@ public: root.reset(); offset = 0; delayed_temp_offset = 0; - read_set.clear(); + read_items.clear(); fresh_backref_extents = 0; invalidate_clear_write_set(); mutated_block_list.clear(); @@ -587,7 +595,7 @@ private: } else if (retired_set.count(addr)) { return {get_extent_ret::RETIRED, nullptr}; } else if ( - auto iter = read_set.find(addr); + auto iter = read_set.find(addr, extent_cmp_t{}); iter != read_set.end()) { auto ret = iter->ref; SUBTRACET(seastore_cache, "{} is present in read_set -- {}", @@ -617,7 +625,8 @@ private: return false; } - auto [iter, inserted] = read_set.emplace(this, ref); + read_items.emplace_back(this, ref); + auto [iter, inserted] = read_set.insert(read_items.back()); ceph_assert(inserted); ref->read_transactions.insert_before( it, const_cast&>(*iter)); @@ -652,6 +661,10 @@ private: * invalidate *this. */ read_extent_set_t read_set; ///< set of extents read by paddr + std::list> read_items; +#ifndef NDEBUG + size_t num_replace_placeholder = 0; +#endif uint64_t fresh_backref_extents = 0; // counter of new backref extents