From 8f76bbfcdce93c6572a5f55e39dd3e2a6f35d278 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Wed, 26 Mar 2025 11:33:27 +0800 Subject: [PATCH] crimson/os/seastore/object_data_handler: LBACursor based clones Avoiding unnecessary lba tree searches Signed-off-by: Xuehan Xu --- .../os/seastore/lba/btree_lba_manager.cc | 111 ++++++++++++++ .../os/seastore/lba/btree_lba_manager.h | 40 ++--- src/crimson/os/seastore/lba_manager.h | 26 +++- src/crimson/os/seastore/lba_mapping.h | 10 +- .../os/seastore/object_data_handler.cc | 138 ++++++------------ src/crimson/os/seastore/transaction_manager.h | 114 ++++++++++++--- .../seastore/test_transaction_manager.cc | 39 +++-- 7 files changed, 306 insertions(+), 172 deletions(-) diff --git a/src/crimson/os/seastore/lba/btree_lba_manager.cc b/src/crimson/os/seastore/lba/btree_lba_manager.cc index 33d8dfc3dbb..5e9d71db52c 100644 --- a/src/crimson/os/seastore/lba/btree_lba_manager.cc +++ b/src/crimson/os/seastore/lba/btree_lba_manager.cc @@ -413,6 +413,98 @@ BtreeLBAManager::alloc_extents( }); } +BtreeLBAManager::clone_mapping_ret +BtreeLBAManager::clone_mapping( + Transaction &t, + LBAMapping pos, + LBAMapping mapping, + laddr_t laddr, + bool updateref) +{ + LOG_PREFIX(BtreeLBAManager::clone_mapping); + assert(pos.is_viewable()); + assert(mapping.is_viewable()); + DEBUGT("pos={}, mapping={}, laddr={}, updateref={}", + t, pos, mapping, laddr, updateref); + struct state_t { + LBAMapping pos; + LBAMapping mapping; + laddr_t laddr; + extent_len_t len; + }; + auto c = get_context(t); + return seastar::do_with( + std::move(mapping), + [this, updateref, c](auto &mapping) { + if (updateref) { + auto fut = update_refcount_iertr::make_ready_future< + LBACursorRef>(mapping.direct_cursor); + if (!mapping.direct_cursor) { + fut = resolve_indirect_cursor(c, *mapping.indirect_cursor); + } + return fut.si_then([this, c, &mapping](auto cursor) { + mapping.direct_cursor = cursor; + assert(mapping.direct_cursor->is_viewable()); + return update_refcount(c.trans, cursor.get(), 1, false + ).si_then([&mapping](auto res) { + assert(!res.result.mapping.is_indirect()); + mapping.direct_cursor = std::move(res.result.mapping.direct_cursor); + return std::move(mapping); + }); + }); + } else { + return update_refcount_iertr::make_ready_future< + LBAMapping>(std::move(mapping)); + } + }).si_then([c, this, pos=std::move(pos), + laddr](auto mapping) mutable { + auto len = mapping.get_length(); + return seastar::do_with( + state_t{std::move(pos), std::move(mapping), laddr, len}, + [this, c](auto &state) { + return with_btree( + cache, + c, + [c, &state](auto &btree) mutable { + auto &cursor = state.pos.get_effective_cursor(); + return cursor.refresh( + ).si_then([&state, c, &btree]() mutable { + auto &cursor = state.pos.get_effective_cursor(); + assert(state.laddr + state.len <= cursor.key); + return btree.insert( + c, + btree.make_partial_iter(c, cursor), + state.laddr, + lba_map_val_t{ + state.len, + state.mapping.is_indirect() + ? state.mapping.get_intermediate_key() + : state.mapping.get_key(), + EXTENT_DEFAULT_REF_COUNT, + 0}); + }).si_then([c, &state](auto p) { + auto &[iter, inserted] = p; + auto &leaf_node = *iter.get_leaf_node(); + leaf_node.insert_child_ptr( + iter.get_leaf_pos(), + get_reserved_ptr(), + leaf_node.get_size() - 1 /*the size before the insert*/); + auto cursor = iter.get_cursor(c); + return state.mapping.refresh( + ).si_then([cursor=std::move(cursor)](auto mapping) mutable { + return clone_mapping_ret_t{ + LBAMapping(mapping.direct_cursor, std::move(cursor)), + mapping.duplicate()}; + }); + }); + }); + }); + }).handle_error_interruptible( + clone_mapping_iertr::pass_further{}, + crimson::ct_error::assert_all{"unexpected error"} + ); +} + BtreeLBAManager::_get_cursor_ret BtreeLBAManager::get_cursor( op_context_t c, @@ -1176,6 +1268,25 @@ BtreeLBAManager::get_containing_cursor( }); } +#ifdef UNIT_TESTS_BUILT +BtreeLBAManager::get_end_mapping_ret +BtreeLBAManager::get_end_mapping( + Transaction &t) +{ + LOG_PREFIX(BtreeLBAManager::get_end_mapping); + DEBUGT("", t); + auto c = get_context(t); + return with_btree( + cache, + c, + [c](auto &btree) { + return btree.end(c).si_then([c](auto iter) { + return LBAMapping::create_direct(iter.get_cursor(c)); + }); + }); +} +#endif + BtreeLBAManager::remap_ret BtreeLBAManager::remap_mappings( Transaction &t, diff --git a/src/crimson/os/seastore/lba/btree_lba_manager.h b/src/crimson/os/seastore/lba/btree_lba_manager.h index d0680b0477b..721f6e94b16 100644 --- a/src/crimson/os/seastore/lba/btree_lba_manager.h +++ b/src/crimson/os/seastore/lba/btree_lba_manager.h @@ -100,38 +100,16 @@ public: }); } - alloc_extent_ret clone_mapping( + clone_mapping_ret clone_mapping( Transaction &t, + LBAMapping pos, + LBAMapping mapping, laddr_t laddr, - extent_len_t len, - laddr_t intermediate_key, - laddr_t intermediate_base) final - { - std::vector alloc_infos = { - alloc_mapping_info_t::create_indirect( - laddr, len, intermediate_key)}; - return seastar::do_with( - std::move(alloc_infos), - [this, &t, laddr, intermediate_base](auto &infos) { - return alloc_sparse_mappings( - t, laddr, infos, alloc_policy_t::deterministic - ).si_then([this, &t, intermediate_base](auto cursors) { - ceph_assert(cursors.size() == 1); - ceph_assert(cursors.front()->is_indirect()); - return update_refcount(t, intermediate_base, 1, false - ).si_then([cursors=std::move(cursors)](auto p) mutable { - assert(p.is_alive_mapping()); - auto mapping = LBAMapping::create_indirect( - p.take_cursor(), std::move(cursors.front())); - ceph_assert(mapping.is_stable()); - return alloc_extent_iertr::make_ready_future< - LBAMapping>(std::move(mapping)); - }); - }); - }).handle_error_interruptible( - crimson::ct_error::input_output_error::pass_further{}, - crimson::ct_error::assert_all{"unexpect enoent"}); - } + bool updateref) final; + +#ifdef UNIT_TESTS_BUILT + get_end_mapping_ret get_end_mapping(Transaction &t) final; +#endif alloc_extents_ret alloc_extents( Transaction &t, @@ -436,7 +414,7 @@ private: val.refcount, val.pladdr, val.len, - (!v.next->is_end() && v.next->is_indirect()) + v.next->is_indirect() ? LBAMapping::create_indirect(nullptr, std::move(v.next)) : LBAMapping::create_direct(std::move(v.next))}; } else { diff --git a/src/crimson/os/seastore/lba_manager.h b/src/crimson/os/seastore/lba_manager.h index 797a511d356..b4cbc44518d 100644 --- a/src/crimson/os/seastore/lba_manager.h +++ b/src/crimson/os/seastore/lba_manager.h @@ -74,6 +74,12 @@ public: LogicalChildNode &extent) = 0; +#ifdef UNIT_TESTS_BUILT + using get_end_mapping_iertr = base_iertr; + using get_end_mapping_ret = get_end_mapping_iertr::future; + virtual get_end_mapping_ret get_end_mapping(Transaction &t) = 0; +#endif + /** * Allocates a new mapping referenced by LBARef * @@ -106,12 +112,22 @@ public: LBAMapping pos, std::vector ext) = 0; - virtual alloc_extent_ret clone_mapping( + struct clone_mapping_ret_t { + LBAMapping cloned_mapping; + LBAMapping orig_mapping; + }; + using clone_mapping_iertr = alloc_extent_iertr; + using clone_mapping_ret = clone_mapping_iertr::future; + /* + * Clones "mapping" at the position "pos" with new laddr "laddr", if updateref + * is true, update the refcount of the mapping "mapping" + */ + virtual clone_mapping_ret clone_mapping( Transaction &t, - laddr_t hint, - extent_len_t len, - laddr_t intermediate_key, - laddr_t intermediate_base) = 0; + LBAMapping pos, + LBAMapping mapping, + laddr_t laddr, + bool updateref) = 0; virtual alloc_extent_ret reserve_region( Transaction &t, diff --git a/src/crimson/os/seastore/lba_mapping.h b/src/crimson/os/seastore/lba_mapping.h index aa3b0306a93..cab0d8034df 100644 --- a/src/crimson/os/seastore/lba_mapping.h +++ b/src/crimson/os/seastore/lba_mapping.h @@ -72,9 +72,13 @@ public: bool is_viewable() const { assert(!is_null()); - return is_indirect() - ? indirect_cursor->is_viewable() - : direct_cursor->is_viewable(); + if (is_complete_indirect()) { + return indirect_cursor->is_viewable() && direct_cursor->is_viewable(); + } + if (is_indirect()) { + return indirect_cursor->is_viewable(); + } + return direct_cursor->is_viewable(); } // For reserved mappings, the return values are diff --git a/src/crimson/os/seastore/object_data_handler.cc b/src/crimson/os/seastore/object_data_handler.cc index 7121070cb2b..8b42c5da7c0 100644 --- a/src/crimson/os/seastore/object_data_handler.cc +++ b/src/crimson/os/seastore/object_data_handler.cc @@ -1377,69 +1377,6 @@ ObjectDataHandler::clear_ret ObjectDataHandler::clear( }); } -ObjectDataHandler::clone_ret ObjectDataHandler::clone_extents( - context_t ctx, - object_data_t &object_data, - lba_mapping_list_t &pins, - laddr_t data_base) -{ - LOG_PREFIX(ObjectDataHandler::clone_extents); - TRACET("object_data: {}~0x{:x}, data_base: 0x{:x}", - ctx.t, - object_data.get_reserved_data_base(), - object_data.get_reserved_data_len(), - data_base); - return ctx.tm.remove( - ctx.t, - object_data.get_reserved_data_base() - ).si_then( - [&pins, &object_data, ctx, data_base](auto) mutable { - return seastar::do_with( - (extent_len_t)0, - [&object_data, ctx, data_base, &pins](auto &last_pos) { - return trans_intr::do_for_each( - pins, - [&last_pos, &object_data, ctx, data_base](auto &pin) { - auto offset = pin.get_key().template get_byte_distance< - extent_len_t>(data_base); - ceph_assert(offset == last_pos); - laddr_t addr = (object_data.get_reserved_data_base() + offset) - .checked_to_laddr(); - return seastar::futurize_invoke([ctx, addr, &pin] { - if (pin.get_val().is_zero()) { - return ctx.tm.reserve_region(ctx.t, addr, pin.get_length()); - } else { - return ctx.tm.clone_pin(ctx.t, addr, pin.duplicate()); - } - }).si_then( - [&pin, &last_pos, offset](auto) { - last_pos = offset + pin.get_length(); - return seastar::now(); - }).handle_error_interruptible( - crimson::ct_error::input_output_error::pass_further(), - crimson::ct_error::assert_all("not possible") - ); - }).si_then([&last_pos, &object_data, ctx] { - if (last_pos != object_data.get_reserved_data_len()) { - return ctx.tm.reserve_region( - ctx.t, - (object_data.get_reserved_data_base() + last_pos).checked_to_laddr(), - object_data.get_reserved_data_len() - last_pos - ).si_then([](auto) { - return seastar::now(); - }); - } - return TransactionManager::reserve_extent_iertr::now(); - }); - }); - } - ).handle_error_interruptible( - ObjectDataHandler::write_iertr::pass_further{}, - crimson::ct_error::assert_all{ - "object_data_handler::clone invalid error" - }); -} - ObjectDataHandler::clone_ret ObjectDataHandler::clone( context_t ctx) { @@ -1458,45 +1395,52 @@ ObjectDataHandler::clone_ret ObjectDataHandler::clone( if (object_data.is_null()) { return clone_iertr::now(); } - return prepare_data_reservation( - ctx, - d_object_data, - object_data.get_reserved_data_len() - ).si_then([&object_data, &d_object_data, ctx, this](auto) { - assert(!object_data.is_null()); - auto base = object_data.get_reserved_data_base(); - auto len = object_data.get_reserved_data_len(); - object_data.clear(); - LOG_PREFIX(ObjectDataHandler::clone); - DEBUGT("cloned obj reserve_data_base: {}, len 0x{:x}", - ctx.t, - d_object_data.get_reserved_data_base(), - d_object_data.get_reserved_data_len()); + return ctx.tm.get_pin(ctx.t, object_data.get_reserved_data_base() + ).si_then([this, &object_data, &d_object_data, ctx](auto mapping) { return prepare_data_reservation( ctx, - object_data, - d_object_data.get_reserved_data_len() - ).si_then([&d_object_data, ctx, &object_data, base, len, this](auto) { - LOG_PREFIX("ObjectDataHandler::clone"); - DEBUGT("head obj reserve_data_base: {}, len 0x{:x}", + d_object_data, + object_data.get_reserved_data_len() + ).si_then([&object_data, &d_object_data, ctx](auto mapping) { + assert(!object_data.is_null()); + assert(mapping); + LOG_PREFIX(ObjectDataHandler::clone); + DEBUGT("cloned obj reserve_data_base: {}, len 0x{:x}", ctx.t, - object_data.get_reserved_data_base(), - object_data.get_reserved_data_len()); - return ctx.tm.get_pins(ctx.t, base, len - ).si_then([ctx, &object_data, &d_object_data, base, this](auto pins) { - return seastar::do_with( - std::move(pins), - [ctx, &object_data, &d_object_data, base, this](auto &pins) { - return clone_extents(ctx, object_data, pins, base - ).si_then([ctx, &d_object_data, base, &pins, this] { - return clone_extents(ctx, d_object_data, pins, base); - }).si_then([&pins, ctx] { - return do_removals(ctx, pins); - }); - }); + d_object_data.get_reserved_data_base(), + d_object_data.get_reserved_data_len()); + return ctx.tm.remove(ctx.t, std::move(*mapping)); + }).si_then([mapping=mapping.duplicate(), + &d_object_data, ctx](auto pos) mutable { + auto base = d_object_data.get_reserved_data_base(); + auto len = d_object_data.get_reserved_data_len(); + return ctx.tm.clone_range( + ctx.t, base, len, std::move(pos), std::move(mapping), true); + }).si_then([ctx, &object_data, &d_object_data, this] { + object_data.clear(); + return prepare_data_reservation( + ctx, + object_data, + d_object_data.get_reserved_data_len() + ).si_then([ctx, &object_data](auto mapping) { + LOG_PREFIX("ObjectDataHandler::clone"); + DEBUGT("head obj reserve_data_base: {}, len 0x{:x}", + ctx.t, + object_data.get_reserved_data_base(), + object_data.get_reserved_data_len()); + return ctx.tm.remove(ctx.t, std::move(*mapping)); }); + }).si_then([ctx, &object_data, + mapping=std::move(mapping)](auto pos) mutable { + auto base = object_data.get_reserved_data_base(); + auto len = object_data.get_reserved_data_len(); + return ctx.tm.clone_range( + ctx.t, base, len, std::move(pos), std::move(mapping), false); }); - }); + }).handle_error_interruptible( + clone_iertr::pass_further{}, + crimson::ct_error::assert_all{"unexpected enoent"} + ); }); } diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index a178ab6d17f..704d547683c 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -729,34 +729,100 @@ public: * for the definition of "indirect lba mapping" and "direct lba mapping". * Note that the cloned extent must be stable */ - using clone_extent_iertr = alloc_extent_iertr; - using clone_extent_ret = clone_extent_iertr::future; + using clone_extent_iertr = LBAManager::clone_mapping_iertr; + using clone_extent_ret = LBAManager::clone_mapping_ret; clone_extent_ret clone_pin( Transaction &t, + LBAMapping pos, + LBAMapping mapping, laddr_t hint, - LBAMapping mapping) { + bool updateref) { LOG_PREFIX(TransactionManager::clone_pin); - SUBDEBUGT(seastore_tm, "{} clone to hint {} ...", t, mapping, hint); - return lba_manager->refresh_lba_mapping(t, std::move(mapping) - ).si_then([FNAME, this, &t, hint](auto mapping) { - auto intermediate_key = - mapping.is_indirect() - ? mapping.get_intermediate_key() - : mapping.get_key(); - auto intermediate_base = - mapping.is_indirect() - ? mapping.get_intermediate_base() - : mapping.get_key(); - - return lba_manager->clone_mapping( - t, - hint, - mapping.get_length(), - intermediate_key, - intermediate_base - ).si_then([FNAME, &t](auto pin) { - SUBDEBUGT(seastore_tm, "cloned as {}", t, pin); - return pin; + SUBDEBUGT(seastore_tm, "{} clone to hint {} ... pos={}, updateref={}", + t, mapping, hint, pos, updateref); + return seastar::do_with( + std::move(pos), + std::move(mapping), + [FNAME, this, &t, hint, updateref](auto &pos, auto &mapping) { + return pos.refresh( + ).si_then([&pos, &mapping](auto m) { + pos = std::move(m); + return mapping.refresh(); + }).si_then([FNAME, this, &pos, &t, hint, updateref](auto mapping) { + return lba_manager->clone_mapping( + t, + std::move(pos), + std::move(mapping), + hint, + updateref + ).si_then([FNAME, &t](auto ret) { + SUBDEBUGT(seastore_tm, "cloned as {}", t, ret.cloned_mapping); + return ret; + }); + }); + }); + } + + using clone_iertr = base_iertr; + using clone_ret = clone_iertr::future<>; + clone_ret clone_range( + Transaction &t, + laddr_t base, + extent_len_t len, + LBAMapping pos, + LBAMapping mapping, + bool updateref) + { + LOG_PREFIX(TransactionManager::clone_range); + SUBDEBUGT(seastore_tm, "object_data={}~{} mapping={} updateref={}", + t, base, len, mapping, updateref); + return seastar::do_with( + std::move(pos), + std::move(mapping), + (extent_len_t)0, + [&t, this, updateref, base, len](auto &pos, auto &mapping, auto &offset) { + return trans_intr::repeat( + [&t, this, &pos, &mapping, &offset, updateref, base, len]() + -> clone_iertr::future { + if (offset >= len) { + return clone_iertr::make_ready_future< + seastar::stop_iteration>(seastar::stop_iteration::yes); + } + if (!mapping.is_indirect() && mapping.is_zero_reserved()) { + return reserve_region( + t, + std::move(pos), + (base + offset).checked_to_laddr(), + mapping.get_length() + ).si_then([base, &offset](auto r) { + assert((base + offset).checked_to_laddr() == r.get_key()); + offset += r.get_length(); + return r.next(); + }).si_then([&pos, &mapping](auto r) { + pos = std::move(r); + return mapping.next(); + }).si_then([&mapping](auto p) { + mapping = std::move(p); + return seastar::stop_iteration::no; + }).handle_error_interruptible( + clone_iertr::pass_further{}, + crimson::ct_error::assert_all{"unexpected error"} + ); + } + return clone_pin( + t, std::move(pos), std::move(mapping), + (base + offset).checked_to_laddr(), updateref + ).si_then([&offset, &pos, &mapping](auto ret) { + offset += ret.cloned_mapping.get_length(); + return ret.cloned_mapping.next( + ).si_then([&pos, ret=std::move(ret)](auto p) mutable { + pos = std::move(p); + return ret.orig_mapping.next(); + }).si_then([&mapping](auto p) { + mapping = std::move(p); + return seastar::stop_iteration::no; + }); + }); }); }); } diff --git a/src/test/crimson/seastore/test_transaction_manager.cc b/src/test/crimson/seastore/test_transaction_manager.cc index 6459a03fd01..7e6f55c54ff 100644 --- a/src/test/crimson/seastore/test_transaction_manager.cc +++ b/src/test/crimson/seastore/test_transaction_manager.cc @@ -703,20 +703,31 @@ struct transaction_manager_test_t : LBAMapping clone_pin( test_transaction_t &t, - laddr_t offset, - LBAMapping mapping) { - auto pin = with_trans_intr( - *(t.t), - [this, offset, mapping=std::move(mapping)](auto &trans) mutable { - return tm->clone_pin(trans, offset, std::move(mapping)); - }).unsafe_get(); + LBAMapping pos, + LBAMapping mapping, + laddr_t offset) { + auto key = mapping.get_key(); + auto pin = with_trans_intr(*(t.t), [&](auto &trans) { + return tm->clone_pin( + trans, + std::move(pos), + std::move(mapping), + offset, + true); + }).unsafe_get().cloned_mapping; EXPECT_EQ(offset, pin.get_key()); - EXPECT_EQ(mapping.get_key(), pin.get_intermediate_key()); - EXPECT_EQ(mapping.get_key(), pin.get_intermediate_base()); + EXPECT_EQ(key, pin.get_intermediate_key()); + EXPECT_EQ(key, pin.get_intermediate_base()); test_mappings.inc_ref(pin.get_intermediate_key(), t.mapping_delta); return pin; } + LBAMapping get_end(test_transaction_t &t) { + return with_trans_intr(*(t.t), [&](auto &trans) { + return lba_manager->get_end_mapping(trans); + }).unsafe_get(); + } + std::optional try_get_pin( test_transaction_t &t, laddr_t offset) { @@ -1462,9 +1473,9 @@ struct transaction_manager_test_t : { auto t = create_transaction(); auto lpin = get_pin(t, l_offset); - auto rpin = get_pin(t, r_offset); - auto l_clone_pin = clone_pin(t, l_clone_offset, std::move(lpin)); - auto r_clone_pin = clone_pin(t, r_clone_offset, std::move(rpin)); + auto l_clone_pos = get_end(t); + auto l_clone_pin = clone_pin( + t, std::move(l_clone_pos), std::move(lpin), l_clone_offset); //split left l_clone_pin = refresh_lba_mapping(t, std::move(l_clone_pin)); auto pin1 = remap_pin(t, std::move(l_clone_pin), 0, 16 << 10); @@ -1476,6 +1487,10 @@ struct transaction_manager_test_t : auto lext = read_pin(t, std::move(*pin3)); EXPECT_EQ('l', lext->get_bptr().c_str()[0]); + auto rpin = get_pin(t, r_offset); + auto r_clone_pos = get_end(t); + auto r_clone_pin = clone_pin( + t, std::move(r_clone_pos), std::move(rpin), r_clone_offset); //split right r_clone_pin = refresh_lba_mapping(t, std::move(r_clone_pin)); auto pin4 = remap_pin(t, std::move(r_clone_pin), 16 << 10, 16 << 10); -- 2.39.5