From c304a0a68a646235d63d9d44836829a3217183fb Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Tue, 14 Oct 2025 11:05:19 +0800 Subject: [PATCH] crimson/os/seastore/btree_types: BtreeCursors don't hold local copies of lba/backref values Since lba mapping values might change during the executions of client transactions once we allow background transactions to be submitted without invalidating client ones, we want to avoid other components using lba/backref mappings from keep local copies to prevent petential problem Signed-off-by: Xuehan Xu --- .../os/seastore/backref/backref_tree_node.h | 24 ++++ src/crimson/os/seastore/backref_mapping.h | 2 + src/crimson/os/seastore/btree/btree_types.cc | 83 +------------ src/crimson/os/seastore/btree/btree_types.h | 113 +++++------------- .../os/seastore/btree/fixed_kv_btree.h | 14 +-- .../os/seastore/lba/btree_lba_manager.cc | 47 ++++---- .../os/seastore/lba/btree_lba_manager.h | 6 +- src/crimson/os/seastore/lba/lba_btree_node.cc | 59 +++++++++ src/crimson/os/seastore/lba/lba_btree_node.h | 50 ++++++++ src/crimson/os/seastore/lba_mapping.cc | 50 ++++---- src/crimson/os/seastore/lba_mapping.h | 14 +-- .../os/seastore/object_data_handler.cc | 4 +- src/crimson/os/seastore/object_data_handler.h | 3 + src/crimson/os/seastore/transaction_manager.h | 8 +- .../seastore/test_transaction_manager.cc | 45 +++++-- 15 files changed, 279 insertions(+), 243 deletions(-) diff --git a/src/crimson/os/seastore/backref/backref_tree_node.h b/src/crimson/os/seastore/backref/backref_tree_node.h index 16d962f7f95..2e331146425 100644 --- a/src/crimson/os/seastore/backref/backref_tree_node.h +++ b/src/crimson/os/seastore/backref/backref_tree_node.h @@ -167,6 +167,29 @@ public: }; using BackrefLeafNodeRef = BackrefLeafNode::Ref; +struct BackrefCursor : + BtreeCursor +{ + using Base = BtreeCursor; + using Base::BtreeCursor; + paddr_t get_paddr() const { + assert(key.is_absolute()); + return key; + } + laddr_t get_laddr() const { + assert(is_viewable()); + assert(!is_end()); + return iter.get_val().laddr; + } + extent_types_t get_type() const { + assert(!is_end()); + return iter.get_val().type; + } +}; +using BackrefCursorRef = boost::intrusive_ptr; + } // namespace crimson::os::seastore::backref #if FMT_VERSION >= 90000 @@ -174,4 +197,5 @@ template <> struct fmt::formatter struct fmt::formatter : fmt::ostream_formatter {}; template <> struct fmt::formatter : fmt::ostream_formatter {}; template <> struct fmt::formatter : fmt::ostream_formatter {}; +template <> struct fmt::formatter : fmt::ostream_formatter {}; #endif diff --git a/src/crimson/os/seastore/backref_mapping.h b/src/crimson/os/seastore/backref_mapping.h index 1dbc564a0f4..4702ba6095f 100644 --- a/src/crimson/os/seastore/backref_mapping.h +++ b/src/crimson/os/seastore/backref_mapping.h @@ -4,10 +4,12 @@ #pragma once #include "crimson/os/seastore/btree/btree_types.h" +#include "crimson/os/seastore/backref/backref_tree_node.h" namespace crimson::os::seastore { class BackrefMapping { + using BackrefCursorRef = backref::BackrefCursorRef; BackrefCursorRef cursor; BackrefMapping(BackrefCursorRef cursor) diff --git a/src/crimson/os/seastore/btree/btree_types.cc b/src/crimson/os/seastore/btree/btree_types.cc index bf4940d59e7..410af029b37 100644 --- a/src/crimson/os/seastore/btree/btree_types.cc +++ b/src/crimson/os/seastore/btree/btree_types.cc @@ -8,81 +8,6 @@ namespace crimson::os::seastore { -base_iertr::future<> LBACursor::refresh() -{ - LOG_PREFIX(LBACursor::refresh); - return with_btree( - ctx.cache, - ctx, - [this, FNAME, c=ctx](auto &btree) { - c.trans.cursor_stats.num_refresh_parent_total++; - - if (!parent->is_valid()) { - c.trans.cursor_stats.num_refresh_invalid_parent++; - SUBTRACET( - seastore_lba, - "cursor {} parent is invalid, re-search from scratch", - c.trans, *this); - return btree.lower_bound(c, this->get_laddr() - ).si_then([this](lba::LBABtree::iterator iter) { - auto leaf = iter.get_leaf_node(); - parent = leaf; - modifications = leaf->modifications; - pos = iter.get_leaf_pos(); - if (!is_end()) { - ceph_assert(!iter.is_end()); - ceph_assert(iter.get_key() == get_laddr()); - val = iter.get_val(); - assert(is_viewable()); - } - }); - } - assert(parent->is_stable() || - parent->is_pending_in_trans(c.trans.get_trans_id())); - auto leaf = parent->cast(); - if (leaf->is_pending_in_trans(c.trans.get_trans_id())) { - if (leaf->modified_since(modifications)) { - c.trans.cursor_stats.num_refresh_modified_viewable_parent++; - } else { - // no need to refresh - return base_iertr::now(); - } - } else { - auto [viewable, l] = leaf->resolve_transaction(c.trans, key); - SUBTRACET( - seastore_lba, - "cursor: {} viewable: {}", - c.trans, *this, viewable); - if (!viewable) { - leaf = l; - c.trans.cursor_stats.num_refresh_unviewable_parent++; - parent = leaf; - } else { - assert(leaf.get() == l.get()); - assert(leaf->is_stable()); - return base_iertr::now(); - } - } - - modifications = leaf->modifications; - if (is_end()) { - pos = leaf->get_size(); - assert(!val); - } else { - auto i = leaf->lower_bound(get_laddr()); - pos = i.get_offset(); - val = i.get_val(); - - auto iter = lba::LBALeafNode::iterator(leaf.get(), pos); - ceph_assert(iter.get_key() == key); - ceph_assert(iter.get_val() == val); - assert(is_viewable()); - } - - return base_iertr::now(); - }); -} - namespace lba { std::ostream& operator<<(std::ostream& out, const lba_map_val_t& v) @@ -126,8 +51,8 @@ bool modified_since(T &&extent, uint64_t iter_modifications) { } } -template -bool BtreeCursor::is_viewable() const { +template +bool BtreeCursor::is_viewable() const { LOG_PREFIX(BtreeCursor::is_viewable()); if (!parent->is_valid() || modified_since(parent, modifications)) { @@ -140,7 +65,7 @@ bool BtreeCursor::is_viewable() const { return viewable; } -template struct BtreeCursor; -template struct BtreeCursor; +template struct BtreeCursor; +template struct BtreeCursor; } // namespace crimson::os::seastore diff --git a/src/crimson/os/seastore/btree/btree_types.h b/src/crimson/os/seastore/btree/btree_types.h index 0f465fcbce9..32ee7701fec 100644 --- a/src/crimson/os/seastore/btree/btree_types.h +++ b/src/crimson/os/seastore/btree/btree_types.h @@ -208,23 +208,22 @@ struct __attribute__((packed)) backref_map_val_le_t { * a key-value mapping's location and the snapshot of its data at construction * time. */ -template +template struct BtreeCursor : public boost::intrusive_ref_counter< - BtreeCursor, boost::thread_unsafe_counter> { + BtreeCursor, boost::thread_unsafe_counter> { BtreeCursor( op_context_t &ctx, - CachedExtentRef parent, + TCachedExtentRef parent, uint64_t modifications, - key_t key, - std::optional val, - btreenode_pos_t pos) + ParentT::iterator &&iter) : ctx(ctx), parent(std::move(parent)), modifications(modifications), - key(key), - val(std::move(val)), - pos(pos) + iter(std::move(iter)), + key(iter == this->parent->end() + ? min_max_t::max + : iter.get_key()) { if constexpr (std::is_same_v) { static_assert(std::is_same_v, @@ -238,11 +237,10 @@ struct BtreeCursor } op_context_t ctx; - CachedExtentRef parent; + TCachedExtentRef parent; uint64_t modifications; - key_t key; - std::optional val; - btreenode_pos_t pos; + ParentT::iterator iter; + key_t key = min_max_t::null; // NOTE: The overhead of calling is_viewable() might be not negligible in the // case of the parent extent is stable and shared by multiple transactions. @@ -251,75 +249,28 @@ struct BtreeCursor bool is_viewable() const; bool is_end() const { - auto max_key = min_max_t::max; - assert((key != max_key) == (bool)val); - return key == max_key; + assert(is_viewable()); + return iter == parent->end(); } extent_len_t get_length() const { + assert(is_viewable()); assert(!is_end()); - return val->len; + return iter.get_val().len; } -}; -struct LBACursor : BtreeCursor { - using Base = BtreeCursor; - using Base::BtreeCursor; - bool is_indirect() const { - return !is_end() && val->pladdr.is_laddr(); - } - laddr_t get_laddr() const { - return key; + uint16_t get_pos() const { + return iter.get_offset(); } - paddr_t get_paddr() const { - assert(!is_indirect()); - assert(!is_end()); - return val->pladdr.get_paddr(); - } - laddr_t get_intermediate_key() const { - assert(is_indirect()); - assert(!is_end()); - return val->pladdr.get_laddr(); - } - checksum_t get_checksum() const { - assert(!is_end()); - assert(!is_indirect()); - return val->checksum; - } - bool contains(laddr_t laddr) const { - return get_laddr() <= laddr && get_laddr() + get_length() > laddr; - } - extent_ref_count_t get_refcount() const { - assert(!is_end()); - assert(!is_indirect()); - return val->refcount; - } - - base_iertr::future<> refresh(); -}; -using LBACursorRef = boost::intrusive_ptr; -struct BackrefCursor : BtreeCursor { - using Base = BtreeCursor; - using Base::BtreeCursor; - paddr_t get_paddr() const { - assert(key.is_absolute()); + key_t get_key() const { return key; } - laddr_t get_laddr() const { - assert(!is_end()); - return val->laddr; - } - extent_types_t get_type() const { - assert(!is_end()); - return val->type; - } }; -using BackrefCursorRef = boost::intrusive_ptr; -template +template std::ostream &operator<<( - std::ostream &out, const BtreeCursor &cursor) + std::ostream &out, const BtreeCursor &cursor) { if constexpr (std::is_same_v) { out << "LBACursor("; @@ -327,20 +278,18 @@ std::ostream &operator<<( out << "BackrefCursor("; } out << (void*)cursor.parent.get() - << "@" << cursor.pos - << "#" << cursor.modifications - << ","; - if (cursor.is_end()) { - return out << "END)"; + << "@" << cursor.iter.get_offset() + << "#" << cursor.modifications; + if (cursor.is_viewable()) { + out << ","; + if (cursor.is_end()) { + return out << "END)"; + } + return out << "," << cursor.iter.get_key() + << "~" << cursor.iter.get_val() + << ")"; } - return out << "," << cursor.key - << "~" << *cursor.val - << ")"; + return out; } } // namespace crimson::os::seastore - -#if FMT_VERSION >= 90000 -template <> struct fmt::formatter : fmt::ostream_formatter {}; -template <> struct fmt::formatter : fmt::ostream_formatter {}; -#endif diff --git a/src/crimson/os/seastore/btree/fixed_kv_btree.h b/src/crimson/os/seastore/btree/fixed_kv_btree.h index 15849be4cd0..46e4195f308 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_btree.h +++ b/src/crimson/os/seastore/btree/fixed_kv_btree.h @@ -289,9 +289,7 @@ public: ctx, leaf.node, leaf.node->modifications, - is_end() ? min_max_t::max : get_key(), - is_end() ? std::nullopt : std::make_optional(get_val()), - leaf.pos); + typename leaf_node_t::iterator(leaf.node.get(), leaf.pos)); } typename leaf_node_t::Ref get_leaf_node() { @@ -493,8 +491,8 @@ public: return make_partial_iter( c, cursor.parent->template cast(), - cursor.key, - cursor.pos); + cursor.get_key(), + cursor.get_pos()); } boost::intrusive_ptr get_cursor( @@ -506,7 +504,7 @@ public: assert(it != leaf->end()); return new cursor_t( c, leaf, leaf->modifications, - key, it.get_val(), it.get_offset()); + typename leaf_node_t::iterator(leaf.get(), it.get_offset())); } boost::intrusive_ptr get_cursor( @@ -1382,9 +1380,7 @@ private: #endif ret.leaf.node = leaf; ret.leaf.pos = pos; - if (ret.is_end()) { - ceph_assert(key == min_max_t::max); - } else { + if (!ret.is_end()) { ceph_assert(key == ret.get_key()); } return ret; diff --git a/src/crimson/os/seastore/lba/btree_lba_manager.cc b/src/crimson/os/seastore/lba/btree_lba_manager.cc index db492e83247..9d8ba487130 100644 --- a/src/crimson/os/seastore/lba/btree_lba_manager.cc +++ b/src/crimson/os/seastore/lba/btree_lba_manager.cc @@ -153,8 +153,8 @@ BtreeLBAManager::get_mappings( c.trans, laddr, length, ret.back()); return get_mappings_iertr::now(); } - assert(cursor->val->refcount == EXTENT_DEFAULT_REF_COUNT); - assert(cursor->val->checksum == 0); + assert(cursor->get_refcount() == EXTENT_DEFAULT_REF_COUNT); + assert(cursor->get_checksum() == 0); return this->resolve_indirect_cursor(c, btree, *cursor ).si_then([FNAME, c, &ret, &cursor, laddr, length](auto direct) { ret.emplace_back(LBAMapping::create_indirect( @@ -265,8 +265,8 @@ BtreeLBAManager::get_mapping( } else { assert(laddr == cursor->get_laddr()); } - assert(cursor->val->refcount == EXTENT_DEFAULT_REF_COUNT); - assert(cursor->val->checksum == 0); + assert(cursor->get_refcount() == EXTENT_DEFAULT_REF_COUNT); + assert(cursor->get_checksum() == 0); return resolve_indirect_cursor(c, btree, *cursor ).si_then([FNAME, c, laddr, indirect=std::move(cursor)] (auto direct) mutable { @@ -467,7 +467,7 @@ BtreeLBAManager::clone_mapping( ).si_then([&mapping](auto res) { assert(!res.mapping.is_indirect()); mapping.direct_cursor = std::move(res.mapping.direct_cursor); - return std::move(mapping); + return mapping.refresh(); }); }); } else { @@ -487,7 +487,7 @@ BtreeLBAManager::clone_mapping( return cursor.refresh( ).si_then([&state, c, &btree]() mutable { auto &cursor = state.pos.get_effective_cursor(); - assert(state.laddr + state.len <= cursor.key); + assert(state.laddr + state.len <= cursor.get_laddr()); auto inter_key = state.mapping.is_indirect() ? state.mapping.get_intermediate_key() : state.mapping.get_key(); @@ -968,10 +968,10 @@ BtreeLBAManager::update_mappings( }, nullptr // all the extents should have already been // added to the fixed_kv_btree - ).si_then([c, &cursor, prev_addr, len, addr, + ).si_then([c, prev_addr, len, addr, checksum, FNAME](auto res) { - DEBUGT("cursor={}, paddr {}~0x{:x} => {}, crc=0x{:x} done -- {}", - c.trans, *cursor, prev_addr, len, + DEBUGT("paddr {}~0x{:x} => {}, crc=0x{:x} done -- {}", + c.trans, prev_addr, len, addr, checksum, res.get_cursor()); return update_mapping_iertr::make_ready_future(); }, @@ -1069,7 +1069,7 @@ BtreeLBAManager::update_refcount( { auto addr = addr_or_cursor.index() == 0 ? std::get<0>(addr_or_cursor) - : std::get<1>(addr_or_cursor)->key; + : std::get<1>(addr_or_cursor)->get_laddr(); LOG_PREFIX(BtreeLBAManager::update_refcount); TRACET("laddr={}, delta={}", t, addr, delta); auto fut = _update_mapping_iertr::make_ready_future< @@ -1091,7 +1091,7 @@ BtreeLBAManager::update_refcount( DEBUGT("laddr={}, delta={} done -- {}", t, addr, delta, res.is_alive_mapping() - ? res.get_cursor().val + ? res.get_cursor().iter.get_val() : res.get_removed_mapping().map_value); return update_mapping_iertr::make_ready_future< mapping_update_result_t>(get_mapping_update_result(res)); @@ -1114,10 +1114,11 @@ BtreeLBAManager::_update_mapping( auto iter = btree.make_partial_iter(c, cursor); auto ret = f(iter.get_val()); if (ret.refcount == 0) { + auto laddr = cursor.get_laddr(); return btree.remove( c, iter - ).si_then([ret, c, laddr=cursor.key](auto iter) { + ).si_then([ret, c, laddr](auto iter) { return update_mapping_ret_bare_t{ laddr, std::move(ret), iter.get_cursor(c)}; }); @@ -1137,7 +1138,7 @@ BtreeLBAManager::_update_mapping( (nextent->has_parent_tracker() && nextent->peek_parent_node().get() == iter.get_leaf_node().get())); LBACursorRef cursor = iter.get_cursor(c); - assert(cursor->val); + assert(!cursor->is_end()); return update_mapping_ret_bare_t{std::move(cursor)}; }); } @@ -1323,19 +1324,23 @@ BtreeLBAManager::remap_mappings( assert(mapping.is_indirect() || (val.pladdr.is_paddr() && val.pladdr.get_paddr().is_absolute())); + auto old_key = mapping.get_key(); + auto old_length = mapping.get_length(); + auto old_indirect = mapping.is_indirect(); return update_refcount(c.trans, &cursor, -1 - ).si_then([&mapping, &btree, &iter, c, &ret, - &remaps, pladdr=val.pladdr](auto r) { + ).si_then([old_key, old_length, old_indirect, + &btree, &iter, c, &ret, &remaps, + pladdr=val.pladdr](auto r) { assert(r.refcount == 0); auto &cursor = r.mapping.get_effective_cursor(); iter = btree.make_partial_iter(c, cursor); return trans_intr::do_for_each( remaps, - [&mapping, &btree, &iter, c, &ret, pladdr](auto &remap) { - assert(remap.offset + remap.len <= mapping.get_length()); - assert((bool)remap.extent == !mapping.is_indirect()); + [old_key, old_length, old_indirect, &btree, + &iter, c, &ret, pladdr](auto &remap) { + assert(remap.offset + remap.len <= old_length); + assert((bool)remap.extent == !old_indirect); lba_map_val_t val; - auto old_key = mapping.get_key(); auto new_key = (old_key + remap.offset).checked_to_laddr(); val.len = remap.len; if (pladdr.is_laddr()) { @@ -1349,11 +1354,11 @@ BtreeLBAManager::remap_mappings( // Checksum will be updated when the committing the transaction val.checksum = CRC_NULL; return btree.insert(c, iter, new_key, std::move(val) - ).si_then([c, &remap, &mapping, &ret, &iter](auto p) { + ).si_then([c, &remap, old_indirect, &ret, &iter](auto p) { auto &[it, inserted] = p; ceph_assert(inserted); auto &leaf_node = *it.get_leaf_node(); - if (mapping.is_indirect()) { + if (old_indirect) { leaf_node.insert_child_ptr( it.get_leaf_pos(), get_reserved_ptr(), diff --git a/src/crimson/os/seastore/lba/btree_lba_manager.h b/src/crimson/os/seastore/lba/btree_lba_manager.h index c34e16062a7..92fd3533238 100644 --- a/src/crimson/os/seastore/lba/btree_lba_manager.h +++ b/src/crimson/os/seastore/lba/btree_lba_manager.h @@ -502,10 +502,10 @@ private: } else { assert(result.is_alive_mapping()); auto &c = result.get_cursor(); - assert(c.val); + assert(!c.is_end()); ceph_assert(!c.is_indirect()); - return {c.get_laddr(), c.val->refcount, - c.val->pladdr, c.val->len, + return {c.get_laddr(), c.get_refcount(), + c.get_pladdr(), c.get_length(), LBAMapping::create_direct(result.take_cursor())}; } } diff --git a/src/crimson/os/seastore/lba/lba_btree_node.cc b/src/crimson/os/seastore/lba/lba_btree_node.cc index f8c645d99d0..ed94290f72a 100644 --- a/src/crimson/os/seastore/lba/lba_btree_node.cc +++ b/src/crimson/os/seastore/lba/lba_btree_node.cc @@ -84,4 +84,63 @@ LBALeafNode::internal_const_iterator_t LBALeafNode::insert( return iter; } +base_iertr::future<> LBACursor::refresh() +{ + LOG_PREFIX(LBACursor::refresh); + return with_btree( + ctx.cache, + ctx, + [this, FNAME, c=ctx](auto &btree) { + c.trans.cursor_stats.num_refresh_parent_total++; + + if (!parent->is_valid()) { + c.trans.cursor_stats.num_refresh_invalid_parent++; + SUBTRACET( + seastore_lba, + "cursor {} parent is invalid, re-search from scratch", + c.trans, *this); + return btree.lower_bound(c, this->get_laddr() + ).si_then([this](lba::LBABtree::iterator it) { + assert(this->get_laddr() == it.get_key()); + iter = LBALeafNode::iterator( + it.get_leaf_node().get(), + it.get_leaf_pos()); + auto leaf = it.get_leaf_node(); + parent = leaf; + modifications = leaf->modifications; + }); + } + assert(parent->is_stable() || + parent->is_pending_in_trans(c.trans.get_trans_id())); + auto leaf = parent->cast(); + if (leaf->is_pending_in_trans(c.trans.get_trans_id())) { + if (leaf->modified_since(modifications)) { + c.trans.cursor_stats.num_refresh_modified_viewable_parent++; + } else { + // no need to refresh + return base_iertr::now(); + } + } else { + auto [viewable, l] = leaf->resolve_transaction(c.trans, get_laddr()); + SUBTRACET(seastore_lba, "cursor: {} viewable: {}", + c.trans, *this, viewable); + if (!viewable) { + leaf = l; + c.trans.cursor_stats.num_refresh_unviewable_parent++; + parent = leaf; + } else { + assert(leaf.get() == l.get()); + assert(leaf->is_stable()); + return base_iertr::now(); + } + } + + modifications = leaf->modifications; + iter = leaf->lower_bound(get_laddr()); + assert(is_viewable()); + + return base_iertr::now(); + }); +} + } diff --git a/src/crimson/os/seastore/lba/lba_btree_node.h b/src/crimson/os/seastore/lba/lba_btree_node.h index 5f40b193d06..b4957fe4872 100644 --- a/src/crimson/os/seastore/lba/lba_btree_node.h +++ b/src/crimson/os/seastore/lba/lba_btree_node.h @@ -29,6 +29,7 @@ namespace crimson::os::seastore::lba { using LBANode = FixedKVNode; class BtreeLBAMapping; +class BtreeLBAManager; constexpr size_t LBA_BLOCK_SIZE = 4096; @@ -289,6 +290,54 @@ struct LBALeafNode }; using LBALeafNodeRef = TCachedExtentRef; +struct LBACursor : BtreeCursor { + using Base = BtreeCursor; + using Base::BtreeCursor; + bool is_indirect() const { + assert(is_viewable()); + return !is_end() && iter.get_val().pladdr.is_laddr(); + } + laddr_t get_laddr() const { + return key; + } + paddr_t get_paddr() const { + assert(is_viewable()); + assert(!is_indirect()); + assert(!is_end()); + auto ret = iter.get_val().pladdr.get_paddr(); + return ret.maybe_relative_to(parent->get_paddr()); + } + laddr_t get_intermediate_key() const { + assert(is_viewable()); + assert(is_indirect()); + assert(!is_end()); + return iter.get_val().pladdr.get_laddr(); + } + checksum_t get_checksum() const { + assert(is_viewable()); + assert(!is_end()); + return iter.get_val().checksum; + } + bool contains(laddr_t laddr) const { + assert(is_viewable()); + return get_laddr() <= laddr && get_laddr() + get_length() > laddr; + } + extent_ref_count_t get_refcount() const { + assert(is_viewable()); + assert(!is_end()); + return iter.get_val().refcount; + } + + base_iertr::future<> refresh(); +private: + + pladdr_t get_pladdr() const { + return std::move(iter.get_val().pladdr); + } + friend class BtreeLBAManager; +}; +using LBACursorRef = boost::intrusive_ptr; + } #if FMT_VERSION >= 90000 @@ -296,4 +345,5 @@ template <> struct fmt::formatter : template <> struct fmt::formatter : fmt::ostream_formatter {}; template <> struct fmt::formatter : fmt::ostream_formatter {}; template <> struct fmt::formatter : fmt::ostream_formatter {}; +template <> struct fmt::formatter : fmt::ostream_formatter {}; #endif diff --git a/src/crimson/os/seastore/lba_mapping.cc b/src/crimson/os/seastore/lba_mapping.cc index b1fbd4bf2a8..f33abd2d6e2 100644 --- a/src/crimson/os/seastore/lba_mapping.cc +++ b/src/crimson/os/seastore/lba_mapping.cc @@ -17,7 +17,7 @@ std::ostream &operator<<(std::ostream &out, const LBAMapping &rhs) out << std::dec << "->" << rhs.get_val(); } else { - out << std::dec << "->" << rhs.indirect_cursor->val; + out << std::dec << "->" << rhs.indirect_cursor->iter.get_val(); } if (rhs.is_complete_indirect()) { out << ",indirect(" << rhs.get_intermediate_base() @@ -51,11 +51,11 @@ LBAMapping::get_logical_extent(Transaction &t) const == t.get_trans_id()); assert(!direct_cursor->is_end()); auto &i = *direct_cursor; - assert(i.pos != BTREENODE_POS_NULL); + assert(i.get_pos() != BTREENODE_POS_NULL); ceph_assert(t.get_trans_id() == i.ctx.trans.get_trans_id()); auto p = direct_cursor->parent->cast(); return p->template get_child( - t, i.ctx.cache, i.pos, i.key); + t, i.ctx.cache, i.get_pos(), i.get_laddr()); } bool LBAMapping::is_stable() const { @@ -65,8 +65,8 @@ bool LBAMapping::is_stable() const { auto leaf = direct_cursor->parent->cast(); return leaf->is_child_stable( direct_cursor->ctx, - direct_cursor->pos, - direct_cursor->key); + direct_cursor->get_pos(), + direct_cursor->get_laddr()); } bool LBAMapping::is_data_stable() const { @@ -76,32 +76,32 @@ bool LBAMapping::is_data_stable() const { auto leaf = direct_cursor->parent->cast(); return leaf->is_child_data_stable( direct_cursor->ctx, - direct_cursor->pos, - direct_cursor->key); + direct_cursor->get_pos(), + direct_cursor->get_laddr()); } base_iertr::future LBAMapping::next() { LOG_PREFIX(LBAMapping::next); auto ctx = get_effective_cursor().ctx; - SUBDEBUGT(seastore_lba, "{}", ctx.trans, *this); - return refresh().si_then([ctx](auto mapping) { - return with_btree_state( - ctx.cache, - ctx, - std::move(mapping), - [ctx](auto &btree, auto &mapping) mutable { - auto &cursor = mapping.get_effective_cursor(); - auto iter = btree.make_partial_iter(ctx, cursor); - return iter.next(ctx).si_then([ctx, &mapping](auto iter) { - if (!iter.is_end() && iter.get_val().pladdr.is_laddr()) { - mapping = LBAMapping::create_indirect(nullptr, iter.get_cursor(ctx)); - } else { - mapping = LBAMapping::create_direct(iter.get_cursor(ctx)); - } - }); + auto mapping = co_await refresh(); + SUBDEBUGT(seastore_lba, "{}", ctx.trans, mapping); + mapping = co_await with_btree_state( + ctx.cache, + ctx, + std::move(mapping), + [ctx](auto &btree, auto &mapping) mutable { + auto &cursor = mapping.get_effective_cursor(); + auto iter = btree.make_partial_iter(ctx, cursor); + return iter.next(ctx).si_then([ctx, &mapping](auto iter) { + if (!iter.is_end() && iter.get_val().pladdr.is_laddr()) { + mapping = LBAMapping::create_indirect(nullptr, iter.get_cursor(ctx)); + } else { + mapping = LBAMapping::create_direct(iter.get_cursor(ctx)); + } }); }); + co_return mapping; } base_iertr::future LBAMapping::refresh() @@ -149,8 +149,8 @@ bool LBAMapping::is_initial_pending() const { auto leaf = direct_cursor->parent->cast(); return leaf->is_child_initial_pending( direct_cursor->ctx, - direct_cursor->pos, - direct_cursor->key); + direct_cursor->get_pos(), + direct_cursor->get_laddr()); } } // namespace crimson::os::seastore diff --git a/src/crimson/os/seastore/lba_mapping.h b/src/crimson/os/seastore/lba_mapping.h index 9c81a92808a..d4dcf0e479d 100644 --- a/src/crimson/os/seastore/lba_mapping.h +++ b/src/crimson/os/seastore/lba_mapping.h @@ -23,6 +23,8 @@ class BtreeLBAManager; } class LBAMapping { + using LBACursorRef = lba::LBACursorRef; + using LBACursor = lba::LBACursor; LBAMapping(LBACursorRef direct, LBACursorRef indirect) : direct_cursor(std::move(direct)), indirect_cursor(std::move(indirect)) @@ -31,8 +33,8 @@ class LBAMapping { assert(!indirect_cursor || indirect_cursor->is_indirect()); // if the mapping is indirect, it mustn't be at the end if (is_indirect() && is_linked_direct()) { - assert((bool)direct_cursor->val - && direct_cursor->key != L_ADDR_NULL); + assert(!direct_cursor->is_end() + && direct_cursor->get_laddr() != L_ADDR_NULL); } } @@ -78,13 +80,9 @@ public: } bool is_end() const { - bool end = !is_indirect() && !direct_cursor->val; // if the mapping is at the end, it can't be indirect and // the physical cursor must be L_ADDR_NULL - assert(end - ? (!indirect_cursor && direct_cursor->key == L_ADDR_NULL) - : true); - return end; + return !is_indirect() && direct_cursor->is_end(); } bool is_indirect() const { @@ -143,10 +141,8 @@ public: laddr_t get_key() const { assert(!is_null()); if (is_indirect()) { - assert(!indirect_cursor->is_end()); return indirect_cursor->get_laddr(); } - assert(!direct_cursor->is_end()); return direct_cursor->get_laddr(); } diff --git a/src/crimson/os/seastore/object_data_handler.cc b/src/crimson/os/seastore/object_data_handler.cc index 680cb0f5f17..871780288ba 100644 --- a/src/crimson/os/seastore/object_data_handler.cc +++ b/src/crimson/os/seastore/object_data_handler.cc @@ -324,10 +324,11 @@ ObjectDataHandler::write_ret do_zero( ObjectDataHandler::clone_ret do_clonerange( context_t ctx, LBAMapping write_pos, - const overwrite_range_t &overwrite_range, + overwrite_range_t &overwrite_range, data_t &data) { LOG_PREFIX(ObjectDataHandler::do_clonerange); + co_await overwrite_range.clonerange_info->refresh(); DEBUGT("{} {} write_pos={}", ctx.t, overwrite_range, data, write_pos); ceph_assert(overwrite_range.clonerange_info.has_value()); assert(write_pos.is_end() || @@ -362,6 +363,7 @@ ObjectDataHandler::clone_ret do_clonerange( ); } // clone the src mappings + co_await overwrite_range.clonerange_info->refresh(); auto src = overwrite_range.clonerange_info->first_src_mapping; auto offset = overwrite_range.clonerange_info->offset; auto len = overwrite_range.clonerange_info->len; diff --git a/src/crimson/os/seastore/object_data_handler.h b/src/crimson/os/seastore/object_data_handler.h index 42abeafb3ed..91d0d29b5e8 100644 --- a/src/crimson/os/seastore/object_data_handler.h +++ b/src/crimson/os/seastore/object_data_handler.h @@ -85,6 +85,9 @@ struct clone_range_t { laddr_t dest_base = L_ADDR_NULL; extent_len_t offset = 0; extent_len_t len = 0; + base_iertr::future<> refresh() { + first_src_mapping = co_await first_src_mapping.refresh(); + } }; std::ostream& operator<<(std::ostream &out, const clone_range_t &); diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 1ba7599051f..048cd501b86 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -637,11 +637,11 @@ public: bool updateref) { LOG_PREFIX(TransactionManager::clone_range); + co_await pos.co_refresh(); + mapping = co_await mapping.refresh(); SUBDEBUGT(seastore_tm, "src_base={}, dst_base={}, {}~{}, mapping={}, pos={}, updateref={}", t, src_base, dst_base, offset, len, mapping, pos, updateref); - co_await pos.co_refresh(); - mapping = co_await mapping.refresh(); auto left = len; bool shared_direct = false; auto cloned_to = offset; @@ -1118,9 +1118,9 @@ public: remove_mappings_param_t params) { LOG_PREFIX(TransactionManager::remove_mappings_in_range); - SUBDEBUGT(seastore_tm, "{}~{}, first_mapping: {}", - t, start, unaligned_len, first_mapping); auto mapping = co_await first_mapping.refresh(); + SUBDEBUGT(seastore_tm, "{}~{}, first_mapping: {}", + t, start, unaligned_len, mapping); while (!mapping.is_end()) { assert(mapping.get_key() >= start); auto mapping_end = (mapping.get_key() + mapping.get_length() diff --git a/src/test/crimson/seastore/test_transaction_manager.cc b/src/test/crimson/seastore/test_transaction_manager.cc index b98b9c4b7ce..137dbe0d0d1 100644 --- a/src/test/crimson/seastore/test_transaction_manager.cc +++ b/src/test/crimson/seastore/test_transaction_manager.cc @@ -798,10 +798,24 @@ struct transaction_manager_test_t : }).unsafe_get(); } - LBAMapping refresh_lba_mapping(test_transaction_t &t, LBAMapping mapping) { - return with_trans_intr(*t.t, [mapping=std::move(mapping)](auto &t) mutable { - return mapping.refresh(); - }).unsafe_get(); + std::optional refresh_lba_mapping( + test_transaction_t &t, LBAMapping mapping) + { + std::optional pin = with_trans_intr( + *t.t, + [mapping=std::move(mapping)](auto &t) mutable { + return mapping.refresh().si_then([](auto m) { + return std::make_optional(std::move(m)); + }); + } + ).handle_error(crimson::ct_error::eagain::handle([] { + return base_iertr::make_ready_future< + std::optional>(); + }), crimson::ct_error::pass_further_all{}).unsafe_get(); + if (t.t->is_conflicted()) { + return std::nullopt; + } + return pin; } bool try_submit_transaction(test_transaction_t t) { @@ -1418,7 +1432,6 @@ struct transaction_manager_test_t : { auto t = create_transaction(); auto lpin = get_pin(t, l_offset); - auto rpin = get_pin(t, r_offset); //split left auto pin1 = remap_pin(t, std::move(lpin), 0, 16 << 10); ASSERT_TRUE(pin1); @@ -1432,6 +1445,7 @@ struct transaction_manager_test_t : ASSERT_TRUE(mlext->is_exist_mutation_pending()); ASSERT_TRUE(mlext.get() == lext.get()); + auto rpin = get_pin(t, r_offset); //split right auto pin4 = remap_pin(t, std::move(rpin), 16 << 10, 16 << 10); ASSERT_TRUE(pin4); @@ -1480,7 +1494,7 @@ struct transaction_manager_test_t : auto l_clone_pin = clone_pin( t, std::move(l_clone_pos), std::move(lpin), l_clone_offset); //split left - l_clone_pin = refresh_lba_mapping(t, std::move(l_clone_pin)); + l_clone_pin = *refresh_lba_mapping(t, std::move(l_clone_pin)); auto pin1 = remap_pin(t, std::move(l_clone_pin), 0, 16 << 10); ASSERT_TRUE(pin1); auto pin2 = remap_pin(t, std::move(*pin1), 0, 8 << 10); @@ -1495,7 +1509,7 @@ struct transaction_manager_test_t : auto r_clone_pin = clone_pin( t, std::move(r_clone_pos), std::move(rpin), r_clone_offset); //split right - r_clone_pin = refresh_lba_mapping(t, std::move(r_clone_pin)); + r_clone_pin = *refresh_lba_mapping(t, std::move(r_clone_pin)); auto pin4 = remap_pin(t, std::move(r_clone_pin), 16 << 10, 16 << 10); ASSERT_TRUE(pin4); auto pin5 = remap_pin(t, std::move(*pin4), 8 << 10, 8 << 10); @@ -1548,12 +1562,16 @@ struct transaction_manager_test_t : mbl3.append(ceph::bufferptr(ceph::buffer::create(12 << 10, 0))); auto [mlp1, mext1, mrp1] = overwrite_pin( t, std::move(mpin), 8 << 10 , 8 << 10, mbl1); + auto mlp1_key = mlp1->get_key(); + auto mlp1_length = mlp1->get_length(); auto [mlp2, mext2, mrp2] = overwrite_pin( t, std::move(*mrp1), 4 << 10 , 16 << 10, mbl2); + auto mlp2_key = mlp2->get_key(); + auto mlp2_length = mlp2->get_length(); auto [mlpin3, me3, mrpin3] = overwrite_pin( t, std::move(*mrp2), 4 << 10 , 12 << 10, mbl3); - auto mlext1 = get_extent(t, mlp1->get_key(), mlp1->get_length()); - auto mlext2 = get_extent(t, mlp2->get_key(), mlp2->get_length()); + auto mlext1 = get_extent(t, mlp1_key, mlp1_length); + auto mlext2 = get_extent(t, mlp2_key, mlp2_length); auto mlext3 = get_extent(t, mlpin3->get_key(), mlpin3->get_length()); auto mrext3 = get_extent(t, mrpin3->get_key(), mrpin3->get_length()); EXPECT_EQ('a', mlext1->get_bptr().c_str()[0]); @@ -1575,6 +1593,7 @@ struct transaction_manager_test_t : bufferlist lbl1, rbl1; lbl1.append(ceph::bufferptr(ceph::buffer::create(32 << 10, 0))); + lpin = *refresh_lba_mapping(t, lpin); auto [llp1, lext1, lrp1] = overwrite_pin( t, std::move(lpin), 0 , 32 << 10, lbl1); EXPECT_FALSE(llp1); @@ -1582,6 +1601,7 @@ struct transaction_manager_test_t : EXPECT_TRUE(lext1); rbl1.append(ceph::bufferptr(ceph::buffer::create(32 << 10, 0))); + rpin = *refresh_lba_mapping(t, rpin); auto [rlp1, rext1, rrp1] = overwrite_pin( t, std::move(rpin), 32 << 10 , 32 << 10, rbl1); EXPECT_TRUE(rlp1); @@ -1720,6 +1740,11 @@ struct transaction_manager_test_t : auto last_rpin = *pin0; ASSERT_TRUE(!split_points.empty()); while(!split_points.empty()) { + pin0 = refresh_lba_mapping(t, *pin0); + if (!pin0) { + conflicted++; + return; + } // new overwrite area: start_off ~ end_off auto start_off = split_points.front() + 4 /*RootMetaBlock*/; split_points.pop_front(); @@ -2216,7 +2241,7 @@ TEST_P(tm_single_device_test_t, invalid_lba_mapping_detect) assert(pin.is_viewable()); std::ignore = alloc_extent(t, get_laddr_hint((LEAF_NODE_CAPACITY + 1) * 4096), 4096, 'a'); assert(!pin.is_viewable()); - pin = refresh_lba_mapping(t, pin); + pin = *refresh_lba_mapping(t, pin); auto extent2 = with_trans_intr(*(t.t), [&pin](auto& trans) { auto v = pin.get_logical_extent(trans); assert(v.has_child()); -- 2.47.3