From b73c339c2569395cf7dbbc7ee36c5c3440e2b814 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Wed, 17 Aug 2022 18:07:42 +0800 Subject: [PATCH] crimson/os/seastore/backref_manager: retrieve live backref extents throught the backref tree After involving intra-fixed-kv-btree parent-child pointers, we need to keep the invariant that it's only when extents are not in transactions' read_set that we can directly query cache with inspecting the transaction Signed-off-by: Xuehan Xu (cherry picked from commit 45440fadd20fa21deaddbe6db4e0c4e84015c9bf) --- src/crimson/os/seastore/async_cleaner.cc | 10 ++++- .../seastore/backref/btree_backref_manager.cc | 44 +++++++++++++------ .../seastore/backref/btree_backref_manager.h | 5 ++- src/crimson/os/seastore/backref_manager.h | 7 ++- .../os/seastore/btree/fixed_kv_btree.h | 6 ++- src/crimson/os/seastore/cache.cc | 5 ++- src/crimson/os/seastore/cache.h | 11 +++-- .../os/seastore/transaction_manager.cc | 6 ++- 8 files changed, 71 insertions(+), 23 deletions(-) diff --git a/src/crimson/os/seastore/async_cleaner.cc b/src/crimson/os/seastore/async_cleaner.cc index 4bac744e4a0..84677747b77 100644 --- a/src/crimson/os/seastore/async_cleaner.cc +++ b/src/crimson/os/seastore/async_cleaner.cc @@ -1424,23 +1424,27 @@ bool SegmentCleaner::check_usage() t, [&tracker]( paddr_t paddr, + paddr_t backref_key, extent_len_t len, extent_types_t type, laddr_t laddr) { if (paddr.get_addr_type() == paddr_types_t::SEGMENT) { if (is_backref_node(type)) { - assert(laddr == L_ADDR_NULL); + assert(laddr == L_ADDR_NULL); + assert(backref_key != P_ADDR_NULL); tracker->allocate( paddr.as_seg_paddr().get_segment_id(), paddr.as_seg_paddr().get_segment_off(), len); } else if (laddr == L_ADDR_NULL) { + assert(backref_key == P_ADDR_NULL); tracker->release( paddr.as_seg_paddr().get_segment_id(), paddr.as_seg_paddr().get_segment_off(), len); } else { + assert(backref_key == P_ADDR_NULL); tracker->allocate( paddr.as_seg_paddr().get_segment_id(), paddr.as_seg_paddr().get_segment_off(), @@ -1724,6 +1728,7 @@ bool RBMCleaner::check_usage() t, [&tracker, &rbms]( paddr_t paddr, + paddr_t backref_key, extent_len_t len, extent_types_t type, laddr_t laddr) @@ -1732,14 +1737,17 @@ bool RBMCleaner::check_usage() if (rbm->get_device_id() == paddr.get_device_id()) { if (is_backref_node(type)) { assert(laddr == L_ADDR_NULL); + assert(backref_key != P_ADDR_NULL); tracker.allocate( paddr, len); } else if (laddr == L_ADDR_NULL) { + assert(backref_key == P_ADDR_NULL); tracker.release( paddr, len); } else { + assert(backref_key == P_ADDR_NULL); tracker.allocate( paddr, len); diff --git a/src/crimson/os/seastore/backref/btree_backref_manager.cc b/src/crimson/os/seastore/backref/btree_backref_manager.cc index c1405ebc18d..3c8eb38c62b 100644 --- a/src/crimson/os/seastore/backref/btree_backref_manager.cc +++ b/src/crimson/os/seastore/backref/btree_backref_manager.cc @@ -328,6 +328,7 @@ BtreeBackrefManager::scan_mapped_space( ceph_assert(pos.get_val().laddr != L_ADDR_NULL); scan_visitor( pos.get_key(), + P_ADDR_NULL, pos.get_val().len, pos.get_val().type, pos.get_val().laddr); @@ -362,6 +363,7 @@ BtreeBackrefManager::scan_mapped_space( ceph_assert(!is_backref_node(backref_entry.type)); scan_visitor( backref_entry.paddr, + P_ADDR_NULL, backref_entry.len, backref_entry.type, backref_entry.laddr); @@ -369,7 +371,7 @@ BtreeBackrefManager::scan_mapped_space( }).si_then([this, &scan_visitor, block_size, c, FNAME] { BackrefBtree::mapped_space_visitor_t f = [&scan_visitor, block_size, FNAME, c]( - paddr_t paddr, extent_len_t len, + paddr_t paddr, paddr_t key, extent_len_t len, depth_t depth, extent_types_t type) { TRACET("tree node {}~{} {}, depth={} used", c.trans, paddr, len, type, depth); @@ -377,7 +379,7 @@ BtreeBackrefManager::scan_mapped_space( ceph_assert(len > 0 && len % block_size == 0); ceph_assert(depth >= 1); ceph_assert(is_backref_node(type)); - return scan_visitor(paddr, len, type, L_ADDR_NULL); + return scan_visitor(paddr, key, len, type, L_ADDR_NULL); }; return seastar::do_with( std::move(f), @@ -534,9 +536,10 @@ BtreeBackrefManager::get_cached_backref_entries_in_range( void BtreeBackrefManager::cache_new_backref_extent( paddr_t paddr, + paddr_t key, extent_types_t type) { - return cache.add_backref_extent(paddr, type); + return cache.add_backref_extent(paddr, key, type); } BtreeBackrefManager::retrieve_backref_extents_in_range_ret @@ -545,10 +548,11 @@ BtreeBackrefManager::retrieve_backref_extents_in_range( paddr_t start, paddr_t end) { + auto backref_extents = cache.get_backref_extents_in_range(start, end); return seastar::do_with( std::vector(), - [this, &t, start, end](auto &extents) { - auto backref_extents = cache.get_backref_extents_in_range(start, end); + std::move(backref_extents), + [this, &t](auto &extents, auto &backref_extents) { return trans_intr::parallel_for_each( backref_extents, [this, &extents, &t](auto &ent) { @@ -556,14 +560,28 @@ BtreeBackrefManager::retrieve_backref_extents_in_range( // so it must be alive assert(is_backref_node(ent.type)); LOG_PREFIX(BtreeBackrefManager::retrieve_backref_extents_in_range); - DEBUGT("getting backref extent of type {} at {}", - t, - ent.type, - ent.paddr); - return cache.get_extent_by_type( - t, ent.type, ent.paddr, L_ADDR_NULL, BACKREF_NODE_SIZE - ).si_then([&extents](auto ext) { - extents.emplace_back(std::move(ext)); + DEBUGT("getting backref extent of type {} at {}, key {}", + t, + ent.type, + ent.paddr, + ent.key); + + auto c = get_context(t); + return with_btree_ret( + cache, + c, + [c, &ent](auto &btree) { + if (ent.type == extent_types_t::BACKREF_INTERNAL) { + return btree.get_internal_if_live( + c, ent.paddr, ent.key, BACKREF_NODE_SIZE); + } else { + assert(ent.type == extent_types_t::BACKREF_LEAF); + return btree.get_leaf_if_live( + c, ent.paddr, ent.key, BACKREF_NODE_SIZE); + } + }).si_then([&extents](auto ext) { + ceph_assert(ext); + extents.emplace_back(std::move(ext)); }); }).si_then([&extents] { return std::move(extents); diff --git a/src/crimson/os/seastore/backref/btree_backref_manager.h b/src/crimson/os/seastore/backref/btree_backref_manager.h index d2241a5dcc3..95a1c03113d 100644 --- a/src/crimson/os/seastore/backref/btree_backref_manager.h +++ b/src/crimson/os/seastore/backref/btree_backref_manager.h @@ -108,7 +108,10 @@ public: paddr_t start, paddr_t end) final; - void cache_new_backref_extent(paddr_t paddr, extent_types_t type) final; + void cache_new_backref_extent( + paddr_t paddr, + paddr_t key, + extent_types_t type) final; private: Cache &cache; diff --git a/src/crimson/os/seastore/backref_manager.h b/src/crimson/os/seastore/backref_manager.h index 5db63708600..68c02b11a81 100644 --- a/src/crimson/os/seastore/backref_manager.h +++ b/src/crimson/os/seastore/backref_manager.h @@ -96,7 +96,10 @@ public: paddr_t start, paddr_t end) = 0; - virtual void cache_new_backref_extent(paddr_t paddr, extent_types_t type) = 0; + virtual void cache_new_backref_extent( + paddr_t paddr, + paddr_t key, + extent_types_t type) = 0; /** * merge in-cache paddr_t -> laddr_t mappings to the on-disk backref tree @@ -132,7 +135,7 @@ public: using scan_mapped_space_iertr = base_iertr; using scan_mapped_space_ret = scan_mapped_space_iertr::future<>; using scan_mapped_space_func_t = std::function< - void(paddr_t, extent_len_t, extent_types_t, laddr_t)>; + void(paddr_t, paddr_t, extent_len_t, extent_types_t, laddr_t)>; virtual scan_mapped_space_ret scan_mapped_space( Transaction &t, scan_mapped_space_func_t &&f) = 0; diff --git a/src/crimson/os/seastore/btree/fixed_kv_btree.h b/src/crimson/os/seastore/btree/fixed_kv_btree.h index b7056e46578..47eda89782c 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_btree.h +++ b/src/crimson/os/seastore/btree/fixed_kv_btree.h @@ -58,7 +58,7 @@ public: using iterator_fut = base_iertr::future; using mapped_space_visitor_t = std::function< - void(paddr_t, extent_len_t, depth_t, extent_types_t)>; + void(paddr_t, node_key_t, extent_len_t, depth_t, extent_types_t)>; class iterator { public: @@ -1172,6 +1172,7 @@ private: iter.get_internal(root.get_depth()).node = root_node; if (visitor) (*visitor)( root_node->get_paddr(), + root_node->get_node_meta().begin, root_node->get_length(), root.get_depth(), internal_node_t::TYPE); @@ -1188,6 +1189,7 @@ private: iter.leaf.node = root_node; if (visitor) (*visitor)( root_node->get_paddr(), + root_node->get_node_meta().begin, root_node->get_length(), root.get_depth(), leaf_node_t::TYPE); @@ -1221,6 +1223,7 @@ private: if (visitor) (*visitor)( node->get_paddr(), + node->get_node_meta().begin, node->get_length(), depth, node->get_type()); @@ -1294,6 +1297,7 @@ private: if (visitor) (*visitor)( node->get_paddr(), + node->get_node_meta().begin, node->get_length(), 1, node->get_type()); diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index c41d29b941c..b189b96c238 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -1480,7 +1480,10 @@ void Cache::complete_commit( i->get_type(), start_seq)); } else if (is_backref_node(i->get_type())) { - add_backref_extent(i->get_paddr(), i->get_type()); + add_backref_extent( + i->get_paddr(), + i->cast()->get_node_meta().begin, + i->get_type()); } else { ERRORT("{}", t, *i); ceph_abort("not possible"); diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index a04693e73d3..789b14074b8 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -1061,9 +1061,11 @@ public: struct backref_extent_entry_t { backref_extent_entry_t( paddr_t paddr, + paddr_t key, extent_types_t type) - : paddr(paddr), type(type) {} + : paddr(paddr), key(key), type(type) {} paddr_t paddr = P_ADDR_NULL; + paddr_t key = P_ADDR_NULL; extent_types_t type = extent_types_t::ROOT; struct cmp_t { using is_transparent = paddr_t; @@ -1155,9 +1157,12 @@ private: backref_extent_entry_t::cmp_t>; backref_extent_entry_query_set_t backref_extents; - void add_backref_extent(paddr_t paddr, extent_types_t type) { + void add_backref_extent( + paddr_t paddr, + paddr_t key, + extent_types_t type) { assert(!paddr.is_relative()); - auto [iter, inserted] = backref_extents.emplace(paddr, type); + auto [iter, inserted] = backref_extents.emplace(paddr, key, type); boost::ignore_unused(inserted); assert(inserted); } diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 28d8346e5c7..0f083340ce2 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -125,18 +125,22 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount() t, [this]( paddr_t paddr, + paddr_t backref_key, extent_len_t len, extent_types_t type, laddr_t laddr) { if (is_backref_node(type)) { assert(laddr == L_ADDR_NULL); - backref_manager->cache_new_backref_extent(paddr, type); + assert(backref_key != P_ADDR_NULL); + backref_manager->cache_new_backref_extent(paddr, backref_key, type); cache->update_tree_extents_num(type, 1); epm->mark_space_used(paddr, len); } else if (laddr == L_ADDR_NULL) { + assert(backref_key == P_ADDR_NULL); cache->update_tree_extents_num(type, -1); epm->mark_space_free(paddr, len); } else { + assert(backref_key == P_ADDR_NULL); cache->update_tree_extents_num(type, 1); epm->mark_space_used(paddr, len); } -- 2.39.5