From 2beb38219df17285d11728264da2e0625fb9e81c Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Wed, 11 Jun 2025 11:49:51 +0800 Subject: [PATCH] crimson/os/seastore/lba_manager: add LBACursor related interfaces Signed-off-by: Xuehan Xu --- src/crimson/os/seastore/btree/btree_types.h | 3 + .../os/seastore/btree/fixed_kv_btree.h | 78 +- .../os/seastore/lba/btree_lba_manager.cc | 667 +++++++++++++----- .../os/seastore/lba/btree_lba_manager.h | 155 +++- src/crimson/os/seastore/lba_manager.h | 102 ++- src/crimson/os/seastore/lba_mapping.h | 4 +- .../os/seastore/object_data_handler.cc | 2 +- .../os/seastore/transaction_manager.cc | 125 +++- src/crimson/os/seastore/transaction_manager.h | 196 +++-- .../seastore/test_btree_lba_manager.cc | 6 +- .../seastore/test_transaction_manager.cc | 14 +- 11 files changed, 1010 insertions(+), 342 deletions(-) diff --git a/src/crimson/os/seastore/btree/btree_types.h b/src/crimson/os/seastore/btree/btree_types.h index 9266a3ad04b..87f4e72bb16 100644 --- a/src/crimson/os/seastore/btree/btree_types.h +++ b/src/crimson/os/seastore/btree/btree_types.h @@ -274,6 +274,9 @@ struct LBACursor : BtreeCursor { assert(!is_indirect()); return val->checksum; } + bool contains(laddr_t laddr) const { + return get_laddr() <= laddr && get_laddr() + get_length() > laddr; + } extent_ref_count_t get_refcount() const { assert(!is_end()); assert(!is_indirect()); diff --git a/src/crimson/os/seastore/btree/fixed_kv_btree.h b/src/crimson/os/seastore/btree/fixed_kv_btree.h index ea7c6ee6148..f61c93e3ee8 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_btree.h +++ b/src/crimson/os/seastore/btree/fixed_kv_btree.h @@ -490,32 +490,39 @@ public: } iterator make_partial_iter( + op_context_t c, + cursor_t &cursor) + { + return make_partial_iter( + c, + cursor.parent->template cast(), + cursor.key, + cursor.pos); + } + + std::unique_ptr get_cursor( + op_context_t c, + TCachedExtentRef leaf, + node_key_t key) + { + auto it = leaf->lower_bound(key); + assert(it != leaf->end()); + return new cursor_t( + c, leaf, leaf->modifications, + key, it.get_val(), it.get_offset()); + } + + boost::intrusive_ptr get_cursor( op_context_t c, TCachedExtentRef leaf, node_key_t key, uint16_t pos) { - assert(leaf->is_valid()); - assert(leaf->is_viewable_by_trans(c.trans).first); - - auto depth = get_root().get_depth(); -#ifndef NDEBUG - auto ret = iterator( - depth, - depth == 1 - ? iterator::state_t::FULL - : iterator::state_t::PARTIAL); -#else - auto ret = iterator(depth); -#endif - ret.leaf.node = leaf; - ret.leaf.pos = pos; - if (ret.is_end()) { - ceph_assert(key == min_max_t::max); - } else { - ceph_assert(key == ret.get_key()); - } - return ret; + assert(leaf->get_size() != pos); + auto it = leaf->iter_idx(pos); + assert(it.get_key() == key); + return std::make_unique( + c, leaf, leaf->modifications, key, it.get_val(), pos); } /** @@ -1358,6 +1365,35 @@ public: private: RootBlockRef root_block; + iterator make_partial_iter( + op_context_t c, + TCachedExtentRef leaf, + node_key_t key, + uint16_t pos) + { + assert(leaf->is_valid()); + assert(leaf->is_viewable_by_trans(c.trans).first); + + auto depth = get_root().get_depth(); +#ifndef NDEBUG + auto ret = iterator( + depth, + depth == 1 + ? iterator::state_t::FULL + : iterator::state_t::PARTIAL); +#else + auto ret = iterator(depth); +#endif + ret.leaf.node = leaf; + ret.leaf.pos = pos; + if (ret.is_end()) { + ceph_assert(key == min_max_t::max); + } else { + ceph_assert(key == ret.get_key()); + } + return ret; + } + template using node_position_t = typename iterator::template node_position_t; diff --git a/src/crimson/os/seastore/lba/btree_lba_manager.cc b/src/crimson/os/seastore/lba/btree_lba_manager.cc index 56b497a57fb..81da404d633 100644 --- a/src/crimson/os/seastore/lba/btree_lba_manager.cc +++ b/src/crimson/os/seastore/lba/btree_lba_manager.cc @@ -154,7 +154,7 @@ BtreeLBAManager::get_mappings( } assert(cursor->val->refcount == EXTENT_DEFAULT_REF_COUNT); assert(cursor->val->checksum == 0); - return resolve_indirect_cursor(c, btree, *cursor + return this->resolve_indirect_cursor(c, btree, *cursor ).si_then([FNAME, c, &ret, &cursor, laddr, length](auto direct) { ret.emplace_back(LBAMapping::create_indirect( std::move(direct), std::move(cursor))); @@ -233,17 +233,24 @@ BtreeLBAManager::resolve_indirect_cursor( BtreeLBAManager::get_mapping_ret BtreeLBAManager::get_mapping( Transaction &t, - laddr_t laddr) + laddr_t laddr, + bool search_containing) { LOG_PREFIX(BtreeLBAManager::get_mapping); - TRACET("{} ...", t, laddr); + TRACET("{} ... search_containing={}", t, laddr, search_containing); auto c = get_context(t); return with_btree( cache, c, - [FNAME, this, c, laddr](auto& btree) + [FNAME, this, c, laddr, search_containing](auto& btree) { - return get_cursor(c, btree, laddr - ).si_then([FNAME, this, c, laddr, &btree](LBACursorRef cursor) { + auto fut = get_mapping_iertr::make_ready_future(); + if (search_containing) { + fut = get_containing_cursor(c, btree, laddr); + } else { + fut = get_cursor(c, btree, laddr); + } + return fut.si_then([FNAME, laddr, &btree, c, this, + search_containing](LBACursorRef cursor) { if (!cursor->is_indirect()) { TRACET("{} got direct cursor {}", c.trans, laddr, *cursor); @@ -251,7 +258,11 @@ BtreeLBAManager::get_mapping( return get_mapping_iertr::make_ready_future< LBAMapping>(std::move(mapping)); } - assert(laddr == cursor->get_laddr()); + if (search_containing) { + assert(cursor->contains(laddr)); + } else { + assert(laddr == cursor->get_laddr()); + } assert(cursor->val->refcount == EXTENT_DEFAULT_REF_COUNT); assert(cursor->val->checksum == 0); return resolve_indirect_cursor(c, btree, *cursor @@ -268,6 +279,138 @@ BtreeLBAManager::get_mapping( }); } +BtreeLBAManager::get_mapping_ret +BtreeLBAManager::get_mapping( + Transaction &t, + LogicalChildNode &extent) +{ + LOG_PREFIX(BtreeLBAManager::get_mapping); + TRACET("{}", t, extent); + assert(extent.peek_parent_node()->is_valid()); + auto c = get_context(t); + return with_btree( + cache, + c, + [c, &extent, FNAME](auto &btree) { + return extent.get_parent_node(c.trans, c.cache + ).si_then([&btree, c, &extent, FNAME](auto leaf) { + if (leaf->is_pending()) { + TRACET("find pending extent {} for {}", + c.trans, (void*)leaf.get(), extent); + } +#ifndef NDEBUG + auto it = leaf->lower_bound(extent.get_laddr()); + assert(it != leaf->end() && it.get_key() == extent.get_laddr()); +#endif + return get_mapping_iertr::make_ready_future< + LBAMapping>(LBAMapping::create_direct( + btree.get_cursor(c, leaf, extent.get_laddr()))); + }); + }); +} + +BtreeLBAManager::alloc_extent_ret +BtreeLBAManager::reserve_region( + Transaction &t, + LBAMapping pos, + laddr_t addr, + extent_len_t len) +{ + LOG_PREFIX(BtreeLBAManager::reserve_region); + DEBUGT("{} {}~{}", t, pos, addr, len); + assert(pos.is_viewable()); + auto c = get_context(t); + return with_btree( + cache, + c, + [pos=std::move(pos), c, addr, len](auto &btree) mutable { + auto &cursor = pos.get_effective_cursor(); + auto iter = btree.make_partial_iter(c, cursor); + lba_map_val_t val{len, P_ADDR_ZERO, EXTENT_DEFAULT_REF_COUNT, 0}; + return btree.insert(c, iter, addr, val + ).si_then([c](auto p) { + auto &[iter, inserted] = p; + ceph_assert(inserted); + auto &leaf_node = *iter.get_leaf_node(); + leaf_node.insert_child_ptr( + iter.get_leaf_pos(), + get_reserved_ptr(), + leaf_node.get_size() - 1 /*the size before the insert*/); + return LBAMapping::create_direct(iter.get_cursor(c)); + }); + }); +} + +BtreeLBAManager::alloc_extents_ret +BtreeLBAManager::alloc_extents( + Transaction &t, + LBAMapping pos, + std::vector extents) +{ + LOG_PREFIX(BtreeLBAManager::alloc_extents); + DEBUGT("{}", t, pos); + assert(pos.is_viewable()); + auto c = get_context(t); + return with_btree( + cache, + c, + [c, FNAME, pos=std::move(pos), this, + extents=std::move(extents)](auto &btree) mutable { + auto &cursor = pos.get_effective_cursor(); + return cursor.refresh( + ).si_then( + [&cursor, &btree, extents=std::move(extents), + pos=std::move(pos), c, FNAME, this] { + return seastar::do_with( + std::move(extents), + btree.make_partial_iter(c, cursor), + std::vector(), + [c, &btree, FNAME, this] + (auto &extents, auto &iter, auto &ret) mutable { + return trans_intr::do_for_each( + extents.rbegin(), + extents.rend(), + [&btree, FNAME, &iter, c, &ret, this](auto ext) { + assert(ext->has_laddr()); + stats.num_alloc_extents += ext->get_length(); + return btree.insert( + c, + iter, + ext->get_laddr(), + lba_map_val_t{ + ext->get_length(), + ext->get_paddr(), + EXTENT_DEFAULT_REF_COUNT, + ext->get_last_committed_crc()} + ).si_then([ext, c, FNAME, &iter, &ret](auto p) { + auto &[it, inserted] = p; + ceph_assert(inserted); + auto &leaf_node = *it.get_leaf_node(); + leaf_node.insert_child_ptr( + it.get_leaf_pos(), + ext.get(), + leaf_node.get_size() - 1 /*the size before the insert*/); + TRACET("inserted {}", c.trans, *ext); + ret.emplace(ret.begin(), LBAMapping::create_direct(it.get_cursor(c))); + iter = it; + }); +#ifndef NDEBUG + }).si_then([&iter, c] { + if (iter.is_begin()) { + return base_iertr::now(); + } + auto key = iter.get_key(); + return iter.prev(c).si_then([key](auto it) { + assert(key >= it.get_key() + it.get_val().len); + return base_iertr::now(); + }); +#endif + }).si_then([&ret] { return std::move(ret); }); + }); + }); + }); +} + BtreeLBAManager::_get_cursor_ret BtreeLBAManager::get_cursor( op_context_t c, @@ -611,12 +754,13 @@ BtreeLBAManager::rewrite_extent( BtreeLBAManager::update_mapping_ret BtreeLBAManager::update_mapping( Transaction& t, - laddr_t laddr, + LBAMapping mapping, extent_len_t prev_len, paddr_t prev_addr, LogicalChildNode& nextent) { LOG_PREFIX(BtreeLBAManager::update_mapping); + auto laddr = mapping.get_key(); auto addr = nextent.get_paddr(); auto len = nextent.get_length(); auto checksum = nextent.get_last_committed_crc(); @@ -624,80 +768,111 @@ BtreeLBAManager::update_mapping( t, laddr, prev_addr, prev_len, addr, len, checksum); assert(laddr == nextent.get_laddr()); assert(!addr.is_null()); - return _update_mapping( - t, - laddr, - [prev_addr, addr, prev_len, len, checksum] - (const lba_map_val_t &in) { - lba_map_val_t ret = in; - ceph_assert(in.pladdr.is_paddr()); - ceph_assert(in.pladdr.get_paddr() == prev_addr); - ceph_assert(in.len == prev_len); - ret.pladdr = addr; - ret.len = len; - ret.checksum = checksum; - return ret; - }, - &nextent - ).si_then([&t, laddr, prev_addr, prev_len, addr, len, checksum, FNAME](auto res) { - assert(res.is_alive_mapping()); - DEBUGT("laddr={}, paddr {}~0x{:x} => {}~0x{:x}, crc=0x{:x} done -- {}", - t, laddr, prev_addr, prev_len, addr, len, checksum, res.get_cursor()); - return update_mapping_iertr::make_ready_future< - extent_ref_count_t>(res.get_cursor().get_refcount()); - }, - update_mapping_iertr::pass_further{}, - /* ENOENT in particular should be impossible */ - crimson::ct_error::assert_all{ - "Invalid error in BtreeLBAManager::update_mapping" - } - ); -} - -BtreeLBAManager::update_mappings_ret -BtreeLBAManager::update_mappings( - Transaction& t, - const std::list& extents) -{ - return trans_intr::do_for_each(extents, [this, &t](auto &extent) { - LOG_PREFIX(BtreeLBAManager::update_mappings); - auto laddr = extent->get_laddr(); - auto prev_addr = extent->get_prior_paddr_and_reset(); - auto len = extent->get_length(); - auto addr = extent->get_paddr(); - auto checksum = extent->get_last_committed_crc(); - TRACET("laddr={}, paddr {}~0x{:x} => {}, crc=0x{:x}", - t, laddr, prev_addr, len, addr, checksum); - assert(!addr.is_null()); + assert(mapping.is_viewable()); + assert(!mapping.is_indirect()); + return seastar::do_with( + std::move(mapping), + [&t, this, prev_len, prev_addr, len, FNAME, + laddr, addr, checksum, &nextent](auto &mapping) { + auto &cursor = mapping.get_effective_cursor(); return _update_mapping( t, - laddr, - [prev_addr, addr, len, checksum]( - const lba_map_val_t &in) { - lba_map_val_t ret = in; - ceph_assert(in.pladdr.is_paddr()); - ceph_assert(in.pladdr.get_paddr() == prev_addr); - ceph_assert(in.len == len); - ret.pladdr = addr; - ret.checksum = checksum; - return ret; + cursor, + [prev_addr, addr, prev_len, len, checksum]( + const lba_map_val_t &in) { + assert(!addr.is_null()); + lba_map_val_t ret = in; + ceph_assert(in.pladdr.is_paddr()); + ceph_assert(in.pladdr.get_paddr() == prev_addr); + ceph_assert(in.len == prev_len); + ret.pladdr = addr; + ret.len = len; + ret.checksum = checksum; + return ret; }, - nullptr // all the extents should have already been - // added to the fixed_kv_btree - ).si_then([&t, laddr, prev_addr, len, addr, checksum, FNAME](auto res) { - DEBUGT("laddr={}, paddr {}~0x{:x} => {}, crc=0x{:x} done -- {}", - t, laddr, prev_addr, len, addr, checksum, res.get_cursor()); - return update_mapping_iertr::make_ready_future(); + &nextent + ).si_then([&t, laddr, prev_addr, prev_len, addr, len, checksum, FNAME](auto res) { + assert(res.is_alive_mapping()); + DEBUGT("laddr={}, paddr {}~0x{:x} => {}~0x{:x}, crc=0x{:x} done -- {}", + t, laddr, prev_addr, prev_len, addr, len, checksum, res.get_cursor()); + return update_mapping_iertr::make_ready_future< + extent_ref_count_t>(res.get_cursor().get_refcount()); }, update_mapping_iertr::pass_further{}, /* ENOENT in particular should be impossible */ crimson::ct_error::assert_all{ - "Invalid error in BtreeLBAManager::update_mappings" + "Invalid error in BtreeLBAManager::update_mapping" } ); }); } +BtreeLBAManager::update_mappings_ret +BtreeLBAManager::update_mappings( + Transaction& t, + const std::list& extents) +{ + LOG_PREFIX(BtreeLBAManager::update_mappings); + auto c = get_context(t); + return with_btree( + cache, + c, + [c, &extents, FNAME, this](auto &btree) { + return trans_intr::do_for_each( + extents, + [this, FNAME, c, &btree](auto &extent) { + return extent->get_parent_node(c.trans, c.cache + ).si_then([c, &extent, FNAME, &btree, this](auto leaf) { + if (leaf->is_pending()) { + TRACET("find pending extent {} for {}", + c.trans, (void*)leaf.get(), *extent); + } + return seastar::do_with( + btree.get_cursor(c, leaf, extent->get_laddr()), + [this, c, &extent, FNAME](auto &cursor) { + assert(!cursor->is_end() && + cursor->get_laddr() == extent->get_laddr()); + auto prev_addr = extent->get_prior_paddr_and_reset(); + auto len = extent->get_length(); + auto addr = extent->get_paddr(); + auto checksum = extent->get_last_committed_crc(); + TRACET("cursor={}, paddr {}~0x{:x} => {}, crc=0x{:x}", + c.trans, *cursor, prev_addr, len, addr, checksum); + assert(!addr.is_null()); + return this->_update_mapping( + c.trans, + *cursor, + [prev_addr, addr, len, checksum]( + const lba_map_val_t &in) { + lba_map_val_t ret = in; + ceph_assert(in.pladdr.is_paddr()); + ceph_assert(in.pladdr.get_paddr() == prev_addr); + ceph_assert(in.len == len); + ret.pladdr = addr; + ret.checksum = checksum; + return ret; + }, + nullptr // all the extents should have already been + // added to the fixed_kv_btree + ).si_then([c, &cursor, prev_addr, len, addr, + checksum, FNAME](auto res) { + DEBUGT("cursor={}, paddr {}~0x{:x} => {}, crc=0x{:x} done -- {}", + c.trans, *cursor, prev_addr, len, + addr, checksum, res.get_cursor()); + return update_mapping_iertr::make_ready_future(); + }, + update_mapping_iertr::pass_further{}, + /* ENOENT in particular should be impossible */ + crimson::ct_error::assert_all{ + "Invalid error in BtreeLBAManager::update_mappings" + } + ); + }); + }); + }); + }); +} + BtreeLBAManager::get_physical_extent_if_live_ret BtreeLBAManager::get_physical_extent_if_live( Transaction &t, @@ -725,6 +900,30 @@ BtreeLBAManager::get_physical_extent_if_live( }); } +BtreeLBAManager::complete_lba_mapping_ret +BtreeLBAManager::complete_indirect_lba_mapping( + Transaction &t, + LBAMapping mapping) +{ + assert(mapping.is_viewable()); + assert(mapping.is_indirect()); + if (mapping.is_complete_indirect()) { + return complete_lba_mapping_iertr::make_ready_future< + LBAMapping>(std::move(mapping)); + } + auto c = get_context(t); + return with_btree_state( + cache, + c, + std::move(mapping), + [this, c](auto &btree, auto &mapping) { + return resolve_indirect_cursor(c, btree, *mapping.indirect_cursor + ).si_then([&mapping](auto cursor) { + mapping.direct_cursor = std::move(cursor); + }); + }); +} + void BtreeLBAManager::register_metrics() { LOG_PREFIX(BtreeLBAManager::register_metrics); @@ -777,9 +976,9 @@ BtreeLBAManager::_decref_intermediate( if (val.refcount == 0) { return btree.remove(c, iter - ).si_then([key, val](auto) { + ).si_then([key, val, c](auto iter) { return ref_iertr::make_ready_future< - update_mapping_ret_bare_t>(key, val); + update_mapping_ret_bare_t>(key, val, iter.get_cursor(c)); }); } else { return btree.update(c, iter, val @@ -792,138 +991,54 @@ BtreeLBAManager::_decref_intermediate( }); } -BtreeLBAManager::remap_ret -BtreeLBAManager::remap_mappings( - Transaction &t, - LBAMapping orig_mapping, - std::vector remaps, - std::vector extents) -{ - LOG_PREFIX(BtreeLBAManager::remap_mappings); - struct state_t { - LBAMapping orig_mapping; - std::vector remaps; - std::vector extents; - std::vector alloc_infos; - std::vector ret; - }; - return seastar::do_with( - state_t(std::move(orig_mapping), std::move(remaps), std::move(extents), {}, {}), - [this, &t, FNAME](state_t &state) - { - return update_refcount( - t, state.orig_mapping.get_key(), -1, false - ).si_then([this, &t, &state, FNAME](auto ret) { - // Remapping the shared direct mapping is prohibited, - // the refcount of indirect mapping should always be 1. - ceph_assert(ret.is_removed_mapping()); - - auto orig_laddr = state.orig_mapping.get_key(); - if (!state.orig_mapping.is_indirect()) { - auto &addr = ret.get_removed_mapping().map_value.pladdr; - ceph_assert(addr.is_paddr() && !addr.get_paddr().is_zero()); - return alloc_extents( - t, - (state.remaps.front().offset + orig_laddr).checked_to_laddr(), - std::move(state.extents), - EXTENT_DEFAULT_REF_COUNT - ).si_then([&state](auto ret) { - state.ret = std::move(ret); - return remap_iertr::make_ready_future(); - }); - } - - extent_len_t orig_len = state.orig_mapping.get_length(); - auto intermediate_key = state.orig_mapping.get_intermediate_key(); - ceph_assert(intermediate_key != L_ADDR_NULL); - DEBUGT("remap indirect mapping {}", t, state.orig_mapping); - for (auto &remap : state.remaps) { - DEBUGT("remap 0x{:x}~0x{:x}", t, remap.offset, remap.len); - ceph_assert(remap.len != 0); - ceph_assert(remap.offset + remap.len <= orig_len); - auto remapped_laddr = (orig_laddr + remap.offset) - .checked_to_laddr(); - auto remapped_intermediate_key = (intermediate_key + remap.offset) - .checked_to_laddr(); - state.alloc_infos.emplace_back( - alloc_mapping_info_t::create_indirect( - remapped_laddr, remap.len, remapped_intermediate_key)); - } - - return alloc_sparse_mappings( - t, state.alloc_infos.front().key, state.alloc_infos, - alloc_policy_t::deterministic - ).si_then([&t, &state, this](std::list cursors) { - return seastar::futurize_invoke([&t, &state, this] { - if (state.remaps.size() > 1) { - auto base = state.orig_mapping.get_intermediate_base(); - return update_refcount( - t, base, state.remaps.size() - 1, false - ).si_then([](update_mapping_ret_bare_t ret) { - return ret.take_cursor(); - }); - } else { - return remap_iertr::make_ready_future< - LBACursorRef>(state.orig_mapping.direct_cursor->duplicate()); - } - }).si_then([&state, cursors=std::move(cursors)](auto direct) mutable { - for (auto &cursor : cursors) { - state.ret.emplace_back(LBAMapping::create_indirect( - direct->duplicate(), std::move(cursor))); - } - return remap_iertr::make_ready_future(); - }); - }); - }).si_then([&state] { - assert(state.ret.size() == state.remaps.size()); -#ifndef NDEBUG - auto mapping_it = state.ret.begin(); - auto remap_it = state.remaps.begin(); - for (;mapping_it != state.ret.end(); mapping_it++, remap_it++) { - auto &mapping = *mapping_it; - auto &remap = *remap_it; - assert(mapping.get_key() == state.orig_mapping.get_key() + remap.offset); - assert(mapping.get_length() == remap.len); - } -#endif - return remap_iertr::make_ready_future< - std::vector>(std::move(state.ret)); - }); - }); -} - BtreeLBAManager::update_refcount_ret BtreeLBAManager::update_refcount( Transaction &t, - laddr_t addr, + std::variant addr_or_cursor, int delta, bool cascade_remove) { + auto addr = addr_or_cursor.index() == 0 + ? std::get<0>(addr_or_cursor) + : std::get<1>(addr_or_cursor)->key; LOG_PREFIX(BtreeLBAManager::update_refcount); TRACET("laddr={}, delta={}", t, addr, delta); - return _update_mapping( - t, - addr, + auto fut = _update_mapping_iertr::make_ready_future< + update_mapping_ret_bare_t>(); + auto update_func = [delta](const lba_map_val_t &in) { lba_map_val_t out = in; ceph_assert((int)out.refcount + delta >= 0); out.refcount += delta; return out; - }, - nullptr - ).si_then([&t, addr, delta, FNAME, this, cascade_remove](auto res) { + }; + if (addr_or_cursor.index() == 0) { + fut = _update_mapping(t, addr, std::move(update_func), nullptr); + } else { + auto &cursor = std::get<1>(addr_or_cursor); + fut = _update_mapping(t, *cursor, std::move(update_func), nullptr); + } + return fut.si_then([delta, &t, addr, FNAME, this, cascade_remove](auto res) { DEBUGT("laddr={}, delta={} done -- {}", t, addr, delta, res.is_alive_mapping() ? res.get_cursor().val : res.get_removed_mapping().map_value); + if (res.is_removed_mapping() && cascade_remove && res.get_removed_mapping().map_value.pladdr.is_laddr()) { auto &val = res.get_removed_mapping().map_value; TRACET("decref intermediate {} -> {}", t, addr, val.pladdr.get_laddr()); return _decref_intermediate(t, val.pladdr.get_laddr(), val.len - ).handle_error_interruptible( + ).si_then([indirect_res=std::move(res), this](auto res) mutable { + return indirect_res.get_removed_mapping().next->refresh( + ).si_then([this, res=std::move(res), + ires=std::move(indirect_res)]() mutable { + return update_mapping_iertr::make_ready_future< + ref_update_result_t>(get_ref_update_result(ires, std::move(res))); + }); + }).handle_error_interruptible( update_mapping_iertr::pass_further{}, crimson::ct_error::assert_all{ "unexpect ENOENT" @@ -931,7 +1046,58 @@ BtreeLBAManager::update_refcount( ); } return update_mapping_iertr::make_ready_future< - update_mapping_ret_bare_t>(std::move(res)); + ref_update_result_t>(get_ref_update_result(res, std::nullopt)); + }); +} + +BtreeLBAManager::_update_mapping_ret +BtreeLBAManager::_update_mapping( + Transaction &t, + LBACursor &cursor, + update_func_t &&f, + LogicalChildNode* nextent) +{ + assert(cursor.is_viewable()); + auto c = get_context(t); + return with_btree( + cache, + c, + [c, f=std::move(f), &cursor, nextent](auto &btree) { + auto iter = btree.make_partial_iter(c, cursor); + auto ret = f(iter.get_val()); + if (ret.refcount == 0) { + return btree.remove( + c, + iter + ).si_then([ret, c, laddr=cursor.key](auto iter) { + if (iter.is_end()) { + return update_mapping_ret_bare_t{ + L_ADDR_NULL, std::move(ret), nullptr}; + } else { + return update_mapping_ret_bare_t{ + laddr, std::move(ret), iter.get_cursor(c)}; + } + }); + } else { + return btree.update( + c, + iter, + ret + ).si_then([c, nextent](auto iter) { + // child-ptr may already be correct, + // see LBAManager::update_mappings() + if (nextent && !nextent->has_parent_tracker()) { + iter.get_leaf_node()->update_child_ptr( + iter.get_leaf_pos(), nextent); + } + assert(!nextent || + (nextent->has_parent_tracker() + && nextent->peek_parent_node().get() == iter.get_leaf_node().get())); + LBACursorRef cursor = iter.get_cursor(c); + assert(cursor->val); + return update_mapping_ret_bare_t{std::move(cursor)}; + }); + } }); } @@ -963,8 +1129,8 @@ BtreeLBAManager::_update_mapping( return btree.remove( c, iter - ).si_then([addr, ret](auto) { - return update_mapping_ret_bare_t(addr, ret); + ).si_then([addr, ret, c](auto iter) { + return update_mapping_ret_bare_t(addr, ret, iter.get_cursor(c)); }); } else { return btree.update( @@ -989,6 +1155,141 @@ BtreeLBAManager::_update_mapping( }); } +BtreeLBAManager::_get_cursor_ret +BtreeLBAManager::get_containing_cursor( + op_context_t c, + LBABtree &btree, + laddr_t laddr) +{ + LOG_PREFIX(BtreeLBAManager::get_containing_cursor); + TRACET("{}", c.trans, laddr); + return btree.upper_bound_right(c, laddr + ).si_then([c, laddr, FNAME](LBABtree::iterator iter) + -> _get_cursor_ret { + if (iter.is_end() || + iter.get_key() > laddr || + iter.get_key() + iter.get_val().len <=laddr) { + ERRORT("laddr={} doesn't exist", c.trans, laddr); + return crimson::ct_error::enoent::make(); + } + TRACET("{} got {}, {}", + c.trans, laddr, iter.get_key(), iter.get_val()); + return get_mapping_iertr::make_ready_future< + LBACursorRef>(iter.get_cursor(c)); + }); +} + +BtreeLBAManager::remap_ret +BtreeLBAManager::remap_mappings( + Transaction &t, + LBAMapping mapping, + std::vector remaps) +{ + LOG_PREFIX(BtreeLBAManager::remap_mappings); + DEBUGT("{}", t, mapping); + assert(mapping.is_viewable()); + assert(mapping.is_indirect() == mapping.is_complete_indirect()); + auto c = get_context(t); + return with_btree( + cache, + c, + [mapping=std::move(mapping), c, this, + remaps=std::move(remaps)](auto &btree) mutable { + auto &cursor = mapping.get_effective_cursor(); + return seastar::do_with( + std::move(remaps), + std::move(mapping), + btree.make_partial_iter(c, cursor), + std::vector(), + [c, &btree, this, &cursor](auto &remaps, auto &mapping, auto &iter, auto &ret) { + auto val = iter.get_val(); + assert(val.refcount == EXTENT_DEFAULT_REF_COUNT); + assert(mapping.is_indirect() || + (val.pladdr.is_paddr() && + val.pladdr.get_paddr().is_absolute())); + return update_refcount(c.trans, &cursor, -1, false + ).si_then([&mapping, &btree, &iter, c, &ret, + &remaps, pladdr=val.pladdr](auto r) { + assert(r.result.refcount == 0); + auto &cursor = r.result.mapping.get_effective_cursor(); + iter = btree.make_partial_iter(c, cursor); + return trans_intr::do_for_each( + remaps, + [&mapping, &btree, &iter, c, &ret, pladdr](auto &remap) { + assert(remap.offset + remap.len <= mapping.get_length()); + assert((bool)remap.extent == !mapping.is_indirect()); + lba_map_val_t val; + auto old_key = mapping.get_key(); + auto new_key = (old_key + remap.offset).checked_to_laddr(); + val.len = remap.len; + if (pladdr.is_laddr()) { + auto laddr = pladdr.get_laddr(); + val.pladdr = (laddr + remap.offset).checked_to_laddr(); + } else { + auto paddr = pladdr.get_paddr(); + val.pladdr = paddr + remap.offset; + } + val.refcount = EXTENT_DEFAULT_REF_COUNT; + val.checksum = 0; // the checksum should be updated later when + // committing the transaction + return btree.insert(c, iter, new_key, std::move(val) + ).si_then([c, &remap, &mapping, &ret, &iter](auto p) { + auto &[it, inserted] = p; + ceph_assert(inserted); + auto &leaf_node = *it.get_leaf_node(); + if (mapping.is_indirect()) { + leaf_node.insert_child_ptr( + it.get_leaf_pos(), + get_reserved_ptr(), + leaf_node.get_size() - 1 /*the size before the insert*/); + ret.push_back( + LBAMapping::create_indirect(nullptr, it.get_cursor(c))); + } else { + leaf_node.insert_child_ptr( + it.get_leaf_pos(), + remap.extent, + leaf_node.get_size() - 1 /*the size before the insert*/); + ret.push_back( + LBAMapping::create_direct(it.get_cursor(c))); + } + return it.next(c).si_then([&iter](auto it) { + iter = std::move(it); + }); + }); + }); + }).si_then([&mapping, &ret] { + if (mapping.is_indirect()) { + auto &cursor = mapping.direct_cursor; + return cursor->refresh( + ).si_then([&ret, &mapping] { + for (auto &m : ret) { + m.direct_cursor = mapping.direct_cursor->duplicate(); + } + }); + } + return base_iertr::now(); + }).si_then([this, c, &mapping, &remaps] { + if (remaps.size() > 1 && mapping.is_indirect()) { + auto &cursor = mapping.direct_cursor; + assert(cursor->is_viewable()); + return update_refcount( + c.trans, cursor.get(), 1, false).discard_result(); + } + return update_refcount_iertr::now(); + }).si_then([&ret] { + return trans_intr::parallel_for_each( + ret, + [](auto &remapped_mapping) { + return remapped_mapping.refresh( + ).si_then([&remapped_mapping](auto mapping) { + remapped_mapping = std::move(mapping); + }); + }); + }).si_then([&ret] { + return std::move(ret); + }); + }); + }); } } diff --git a/src/crimson/os/seastore/lba/btree_lba_manager.h b/src/crimson/os/seastore/lba/btree_lba_manager.h index 1e4848f683e..d0680b0477b 100644 --- a/src/crimson/os/seastore/lba/btree_lba_manager.h +++ b/src/crimson/os/seastore/lba/btree_lba_manager.h @@ -68,7 +68,18 @@ public: get_mapping_ret get_mapping( Transaction &t, - laddr_t offset) final; + laddr_t offset, + bool search_containing = false) final; + + get_mapping_ret get_mapping( + Transaction &t, + LogicalChildNode &extent) final; + + alloc_extent_ret reserve_region( + Transaction &t, + LBAMapping pos, + laddr_t laddr, + extent_len_t len) final; alloc_extent_ret reserve_region( Transaction &t, @@ -122,6 +133,11 @@ public: crimson::ct_error::assert_all{"unexpect enoent"}); } + alloc_extents_ret alloc_extents( + Transaction &t, + LBAMapping pos, + std::vector ext) final; + alloc_extent_ret alloc_extent( Transaction &t, laddr_t hint, @@ -209,17 +225,43 @@ public: ref_ret remove_mapping( Transaction &t, laddr_t addr) final { - return update_refcount(t, addr, -1, true - ).si_then([](auto res) { - return ref_update_result_t(res); + return update_refcount(t, addr, -1, true); + } + + ref_ret remove_mapping( + Transaction &t, + LBAMapping mapping) final { + assert(mapping.is_viewable()); + return seastar::do_with( + std::move(mapping), + [&t, this](auto &mapping) { + auto &cursor = mapping.get_effective_cursor(); + return update_refcount(t, &cursor, -1, true); + }); + } + + ref_ret incref_extent( + Transaction &t, + laddr_t addr) final { + return update_refcount(t, addr, 1, false); + } + + ref_ret incref_extent( + Transaction &t, + LBAMapping mapping) final { + assert(mapping.is_viewable()); + return seastar::do_with( + std::move(mapping), + [&t, this](auto &mapping) { + auto &cursor = mapping.get_effective_cursor(); + return update_refcount(t, &cursor, 1, false); }); } remap_ret remap_mappings( Transaction &t, - LBAMapping orig_mapping, - std::vector remaps, - std::vector extents) final; + LBAMapping mapping, + std::vector remaps) final; /** * init_cached_extent @@ -249,7 +291,7 @@ public: update_mapping_ret update_mapping( Transaction& t, - laddr_t laddr, + LBAMapping mapping, extent_len_t prev_len, paddr_t prev_addr, LogicalChildNode&) final; @@ -265,6 +307,10 @@ public: laddr_t laddr, extent_len_t len) final; + complete_lba_mapping_ret complete_indirect_lba_mapping( + Transaction &t, + LBAMapping mapping) final; + private: Cache &cache; @@ -336,12 +382,14 @@ private: update_mapping_ret_bare_t(LBACursorRef cursor) : ret(std::move(cursor)) {} - update_mapping_ret_bare_t(laddr_t laddr, lba_map_val_t value) - : ret(removed_mapping_t{laddr, value}) {} + update_mapping_ret_bare_t( + laddr_t laddr, lba_map_val_t value, LBACursorRef &&cursor) + : ret(removed_mapping_t{laddr, value, std::move(cursor)}) {} struct removed_mapping_t { laddr_t laddr; lba_map_val_t map_value; + LBACursorRef next; }; std::variant ret; @@ -358,6 +406,11 @@ private: } } + removed_mapping_t &get_removed_mapping() { + assert(is_removed_mapping()); + return std::get<0>(ret); + } + const removed_mapping_t& get_removed_mapping() const { assert(is_removed_mapping()); return std::get<0>(ret); @@ -372,29 +425,52 @@ private: assert(is_alive_mapping()); return std::move(std::get<1>(ret)); } + }; - explicit operator ref_update_result_t() const { - if (is_removed_mapping()) { - auto v = get_removed_mapping(); - auto &val = v.map_value; - ceph_assert(val.pladdr.is_paddr()); - return {v.laddr, val.refcount, val.pladdr, val.len}; - } else { - assert(is_alive_mapping()); - auto &c = get_cursor(); - assert(c.val); - ceph_assert(!c.is_indirect()); - return {c.get_laddr(), c.val->refcount, c.val->pladdr, c.val->len}; - } + mapping_update_result_t get_mapping_update_result( + update_mapping_ret_bare_t &result) { + if (result.is_removed_mapping()) { + auto &v = result.get_removed_mapping(); + auto &val = v.map_value; + return {v.laddr, + val.refcount, + val.pladdr, + val.len, + (!v.next->is_end() && v.next->is_indirect()) + ? LBAMapping::create_indirect(nullptr, std::move(v.next)) + : LBAMapping::create_direct(std::move(v.next))}; + } else { + assert(result.is_alive_mapping()); + auto &c = result.get_cursor(); + assert(c.val); + ceph_assert(!c.is_indirect()); + return {c.get_laddr(), c.val->refcount, + c.val->pladdr, c.val->len, + LBAMapping::create_direct(result.take_cursor())}; } - }; + } + + ref_update_result_t get_ref_update_result( + update_mapping_ret_bare_t &result, + std::optional direct_result) { + mapping_update_result_t primary_r = get_mapping_update_result(result); + + if (direct_result) { + // only removing indirect mapping can have direct_result + assert(result.is_removed_mapping()); + assert(result.get_removed_mapping().map_value.pladdr.is_laddr()); + auto direct_r = get_mapping_update_result(*direct_result); + return ref_update_result_t{std::move(primary_r), std::move(direct_r)}; + } + return ref_update_result_t{std::move(primary_r), std::nullopt}; + } using update_refcount_iertr = ref_iertr; using update_refcount_ret = update_refcount_iertr::future< - update_mapping_ret_bare_t>; + ref_update_result_t>; update_refcount_ret update_refcount( Transaction &t, - laddr_t addr, + std::variant addr_or_cursor, int delta, bool cascade_remove); @@ -414,6 +490,11 @@ private: laddr_t addr, update_func_t &&f, LogicalChildNode*); + _update_mapping_ret _update_mapping( + Transaction &t, + LBACursor &cursor, + update_func_t &&f, + LogicalChildNode*); struct insert_position_t { laddr_t laddr; @@ -487,10 +568,7 @@ private: laddr_t addr, int delta) { ceph_assert(delta > 0); - return update_refcount(t, addr, delta, false - ).si_then([](auto res) { - return ref_update_result_t(res); - }); + return update_refcount(t, addr, delta, false); } using _get_cursor_ret = get_mapping_iertr::future; @@ -499,6 +577,11 @@ private: LBABtree& btree, laddr_t offset); + _get_cursor_ret get_containing_cursor( + op_context_t c, + LBABtree &btree, + laddr_t laddr); + using _get_cursors_ret = get_mappings_iertr::future>; _get_cursors_ret get_cursors( op_context_t c, @@ -512,6 +595,18 @@ private: LBABtree& btree, const LBACursor& indirect_cursor); + resolve_indirect_cursor_ret resolve_indirect_cursor( + op_context_t c, + const LBACursor& indirect_cursor) { + assert(indirect_cursor.is_indirect()); + return with_btree( + cache, + c, + [c, &indirect_cursor, this](auto &btree) { + return resolve_indirect_cursor(c, btree, indirect_cursor); + }); + } + using _decref_intermediate_ret = ref_iertr::future< update_mapping_ret_bare_t>; _decref_intermediate_ret _decref_intermediate( diff --git a/src/crimson/os/seastore/lba_manager.h b/src/crimson/os/seastore/lba_manager.h index b01e89633d1..7b75d5790e1 100644 --- a/src/crimson/os/seastore/lba_manager.h +++ b/src/crimson/os/seastore/lba_manager.h @@ -62,7 +62,17 @@ public: using get_mapping_ret = get_mapping_iertr::future; virtual get_mapping_ret get_mapping( Transaction &t, - laddr_t offset) = 0; + laddr_t offset, + bool search_containing = false) = 0; + + /* + * Fetches the mapping corresponding to the "extent" + * + */ + virtual get_mapping_ret get_mapping( + Transaction &t, + LogicalChildNode &extent) = 0; + /** * Allocates a new mapping referenced by LBARef @@ -86,6 +96,15 @@ public: laddr_t hint, std::vector extents, extent_ref_count_t refcount) = 0; + /* + * Allocate extents at "pos" + * + * Returns the inserted lba mappings + */ + virtual alloc_extents_ret alloc_extents( + Transaction &t, + LBAMapping pos, + std::vector ext) = 0; virtual alloc_extent_ret clone_mapping( Transaction &t, @@ -99,11 +118,32 @@ public: laddr_t hint, extent_len_t len) = 0; - struct ref_update_result_t { - laddr_t direct_key; + /* + * Inserts a zero mapping at the position "pos" with + * the key "laddr" and length "len" + */ + virtual alloc_extent_ret reserve_region( + Transaction &t, + LBAMapping pos, + laddr_t hint, + extent_len_t len) = 0; + + struct mapping_update_result_t { + laddr_t key; extent_ref_count_t refcount = 0; pladdr_t addr; extent_len_t length = 0; + LBAMapping mapping; // the mapping pointing to the updated lba entry if + // refcount is non-zero; the next lba entry otherwise; + // null mapping if the mapping is the last one and + // is removed + bool need_to_remove_extent() const { + return refcount == 0 && addr.is_paddr() && !addr.get_paddr().is_zero(); + } + }; + struct ref_update_result_t { + mapping_update_result_t result; + std::optional direct_result; }; using ref_iertr = base_iertr::extend< crimson::ct_error::enoent>; @@ -112,19 +152,47 @@ public: /** * Removes a mapping and deal with indirection * - * @return returns resulting refcount + * @return returns the information about the removed + * mappings including the corresponding direct mapping + * if the mapping of laddr is indirect. + */ + virtual ref_ret remove_mapping( + Transaction &t, + laddr_t addr) = 0; + + /* + * Removes the mapping and deal with indirection + * + * @return returns the information about the removed + * mappings including the corresponding direct mapping + * if the mapping of laddr is indirect. */ virtual ref_ret remove_mapping( + Transaction &t, + LBAMapping mapping) = 0; + + /** + * Increments ref count on extent + * + * @return returns resulting refcount + */ + virtual ref_ret incref_extent( Transaction &t, laddr_t addr) = 0; + virtual ref_ret incref_extent( + Transaction &t, + LBAMapping mapping) = 0; struct remap_entry_t { extent_len_t offset; extent_len_t len; - remap_entry_t(extent_len_t _offset, extent_len_t _len) { - offset = _offset; - len = _len; - } + LogicalChildNode* extent = nullptr; + remap_entry_t( + extent_len_t _offset, + extent_len_t _len, + LogicalChildNode *extent = nullptr) + : offset(_offset), len(_len), extent(extent) + {} }; using remap_iertr = ref_iertr; using remap_ret = remap_iertr::future>; @@ -138,9 +206,7 @@ public: virtual remap_ret remap_mappings( Transaction &t, LBAMapping orig_mapping, - std::vector remaps, - std::vector extents // Required if and only - // if pin isn't indirect + std::vector remaps ) = 0; /** @@ -195,7 +261,7 @@ public: using update_mapping_ret = base_iertr::future; virtual update_mapping_ret update_mapping( Transaction& t, - laddr_t laddr, + LBAMapping mapping, extent_len_t prev_len, paddr_t prev_addr, LogicalChildNode& nextent) = 0; @@ -230,6 +296,18 @@ public: laddr_t laddr, extent_len_t len) = 0; + using complete_lba_mapping_iertr = get_mappings_iertr; + using complete_lba_mapping_ret = + complete_lba_mapping_iertr::future; + /* + * Completes an incomplete indirect mappings + * + * No effect if the indirect mapping is already complete + */ + virtual complete_lba_mapping_ret complete_indirect_lba_mapping( + Transaction &t, + LBAMapping mapping) = 0; + virtual ~LBAManager() {} }; using LBAManagerRef = std::unique_ptr; diff --git a/src/crimson/os/seastore/lba_mapping.h b/src/crimson/os/seastore/lba_mapping.h index f0dfb06ff78..b57cf9ae138 100644 --- a/src/crimson/os/seastore/lba_mapping.h +++ b/src/crimson/os/seastore/lba_mapping.h @@ -30,9 +30,11 @@ public: } static LBAMapping create_direct(LBACursorRef direct) { + assert(!direct->is_indirect()); return LBAMapping(std::move(direct), nullptr); } + LBAMapping() = delete; LBAMapping(const LBAMapping &) = delete; LBAMapping(LBAMapping &&) = default; LBAMapping &operator=(const LBAMapping &) = delete; @@ -150,7 +152,7 @@ private: } bool is_null() const { - return !indirect_cursor && !direct_cursor; + return !direct_cursor && !indirect_cursor; } bool is_complete_indirect() const { diff --git a/src/crimson/os/seastore/object_data_handler.cc b/src/crimson/os/seastore/object_data_handler.cc index bdfff469918..782e0b764a1 100644 --- a/src/crimson/os/seastore/object_data_handler.cc +++ b/src/crimson/os/seastore/object_data_handler.cc @@ -1737,7 +1737,7 @@ ObjectDataHandler::clone_ret ObjectDataHandler::clone_extents( if (pin.get_val().is_zero()) { return ctx.tm.reserve_region(ctx.t, addr, pin.get_length()); } else { - return ctx.tm.clone_pin(ctx.t, addr, pin); + return ctx.tm.clone_pin(ctx.t, addr, pin.duplicate()); } }).si_then( [&pin, &last_pos, offset](auto) { diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 53acf6c937a..0ffa865a0fc 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -207,12 +207,15 @@ TransactionManager::ref_ret TransactionManager::remove( DEBUGT("{} ...", t, *ref); return lba_manager->remove_mapping(t, ref->get_laddr() ).si_then([this, FNAME, &t, ref](auto result) { - if (result.refcount == 0) { + assert(!result.direct_result); + auto &primary_result = result.result; + if (primary_result.refcount == 0) { cache->retire_extent(t, ref); } DEBUGT("removed {}~0x{:x} refcount={} -- {}", - t, result.addr, result.length, result.refcount, *ref); - return result.refcount; + t, primary_result.addr, primary_result.length, + primary_result.refcount, *ref); + return primary_result.refcount; }); } @@ -225,17 +228,72 @@ TransactionManager::ref_ret TransactionManager::remove( return lba_manager->remove_mapping(t, offset ).si_then([this, FNAME, offset, &t](auto result) -> ref_ret { auto fut = ref_iertr::now(); - if (result.refcount == 0) { - if (result.addr.is_paddr() && - !result.addr.get_paddr().is_zero()) { - fut = cache->retire_extent_addr( - t, result.addr.get_paddr(), result.length); - } + auto &primary_result = result.result; + assert(primary_result.refcount == 0); + if (primary_result.need_to_remove_extent()) { + ceph_assert(!result.direct_result); + fut = cache->retire_extent_addr( + t, primary_result.addr.get_paddr(), primary_result.length); + } else if (auto &direct_result = result.direct_result; + direct_result.has_value() && + direct_result->need_to_remove_extent()) { + fut = cache->retire_extent_addr( + t, direct_result->addr.get_paddr(), direct_result->length); } return fut.si_then([result=std::move(result), offset, &t, FNAME] { DEBUGT("removed {}~0x{:x} refcount={} -- offset={}", - t, result.addr, result.length, result.refcount, offset); - return result.refcount; + t, result.result.addr, result.result.length, + result.result.refcount, offset); + return result.result.refcount; + }); + }); +} + +TransactionManager::ref_iertr::future TransactionManager::remove( + Transaction &t, + LBAMapping mapping) +{ + LOG_PREFIX(TransactionManager::remove); + return mapping.refresh().si_then([&t, this, FNAME](auto mapping) { + auto fut = base_iertr::make_ready_future(); + if (!mapping.is_indirect() && mapping.get_val().is_real_location()) { + auto ret = get_extent_if_linked(t, mapping.duplicate()); + if (ret.index() == 1) { + fut = std::move(std::get<1>(ret)); + } + } + return fut.si_then([mapping=std::move(mapping), + FNAME, this, &t](auto extent) mutable { + auto offset = mapping.get_key(); + return lba_manager->remove_mapping(t, std::move(mapping) + ).si_then([FNAME, this, extent, &t, offset](auto result) { + auto fut = ref_iertr::now(); + auto &primary_result = result.result; + assert(primary_result.refcount == 0); + if (primary_result.need_to_remove_extent()) { + ceph_assert(!result.direct_result); + if (extent) { + cache->retire_extent(t, extent); + } else { + fut = cache->retire_extent_addr( + t, primary_result.addr.get_paddr(), primary_result.length); + } + } else if (auto &direct_result = result.direct_result; + direct_result.has_value() && + direct_result->need_to_remove_extent()) { + ceph_assert(!extent); + fut = cache->retire_extent_addr( + t, direct_result->addr.get_paddr(), direct_result->length); + } else { + ceph_assert(!extent); + } + return fut.si_then([result=std::move(result), &t, FNAME, offset]() mutable { + DEBUGT("removed {}~0x{:x} refcount={} -- offset={}", + t, result.result.addr, result.result.length, + result.result.refcount, offset); + return std::move(result.result.mapping); + }); + }); }); }); } @@ -522,13 +580,19 @@ TransactionManager::rewrite_logical_extent( * extents since we're going to do it again once we either do the ool write * or allocate a relative inline addr. TODO: refactor AsyncCleaner to * avoid this complication. */ - return lba_manager->update_mapping( - t, - extent->get_laddr(), - extent->get_length(), - extent->get_paddr(), - *nextent - ).discard_result(); + return lba_manager->get_mapping(t, *extent + ).si_then([this, &t, extent, nextent](auto mapping) { + return lba_manager->update_mapping( + t, + std::move(mapping), + extent->get_length(), + extent->get_paddr(), + *nextent + ).discard_result(); + }).handle_error_interruptible( + rewrite_extent_iertr::pass_further{}, + crimson::ct_error::assert_all{"unexpected enoent"} + ); } else { assert(get_extent_category(extent->get_type()) == data_category_t::DATA); auto length = extent->get_length(); @@ -569,15 +633,22 @@ TransactionManager::rewrite_logical_extent( auto fut = base_iertr::now(); if (first_extent) { assert(off == 0); - fut = lba_manager->update_mapping( - t, - extent->get_laddr(), - extent->get_length(), - extent->get_paddr(), - *nextent - ).si_then([&refcount](auto c) { - refcount = c; - }); + fut = lba_manager->get_mapping(t, *extent + ).si_then([this, &t, extent, nextent, + &refcount](auto mapping) { + return lba_manager->update_mapping( + t, + std::move(mapping), + extent->get_length(), + extent->get_paddr(), + *nextent + ).si_then([&refcount](auto c) { + refcount = c; + }); + }).handle_error_interruptible( + rewrite_extent_iertr::pass_further{}, + crimson::ct_error::assert_all{"unexpected enoent"} + ); } else { ceph_assert(refcount != 0); fut = lba_manager->alloc_extent( diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 6a273e7c49c..00dadef0023 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -108,7 +108,24 @@ public: laddr_t offset) { LOG_PREFIX(TransactionManager::get_pin); SUBDEBUGT(seastore_tm, "{} ...", t, offset); - return lba_manager->get_mapping(t, offset + return lba_manager->get_mapping(t, offset, false + ).si_then([FNAME, &t](LBAMapping pin) { + SUBDEBUGT(seastore_tm, "got {}", t, pin); + return pin; + }); + } + + /** + * get_containing_pin + * + * Get the logical pin containing laddr + */ + get_pin_ret get_containing_pin( + Transaction &t, + laddr_t laddr) { + LOG_PREFIX(TransactionManager::get_containing_pin); + SUBDEBUGT(seastore_tm, "{} ...", t, laddr); + return lba_manager->get_mapping(t, laddr, true ).si_then([FNAME, &t](LBAMapping pin) { SUBDEBUGT(seastore_tm, "got {}", t, pin); return pin; @@ -349,6 +366,10 @@ public: Transaction &t, laddr_t offset); + ref_iertr::future remove( + Transaction &t, + LBAMapping mapping); + /// remove refcount for list of offset using refs_ret = ref_iertr::future>; refs_ret remove( @@ -410,33 +431,59 @@ public: Transaction &t, laddr_t laddr_hint, extent_len_t len, + std::optional pos = std::nullopt, placement_hint_t placement_hint = placement_hint_t::HOT) { static_assert(is_data_type(T::TYPE)); LOG_PREFIX(TransactionManager::alloc_data_extents); SUBDEBUGT(seastore_tm, "{} hint {}~0x{:x} phint={} ...", t, T::TYPE, laddr_hint, len, placement_hint); - auto exts = cache->alloc_new_data_extents( - t, - len, - placement_hint, - INIT_GENERATION); - // user must initialize the logical extent themselves - assert(is_user_transaction(t.get_src())); - for (auto& ext : exts) { - ext->set_seen_by_users(); - } - return lba_manager->alloc_extents( - t, - laddr_hint, - std::vector( - exts.begin(), exts.end()), - EXTENT_DEFAULT_REF_COUNT - ).si_then([exts=std::move(exts), &t, FNAME](auto &&) mutable { - for (auto &ext : exts) { - SUBDEBUGT(seastore_tm, "allocated {}", t, *ext); + return seastar::do_with( + cache->alloc_new_data_extents( + t, + len, + placement_hint, + INIT_GENERATION), + [pos=std::move(pos), this, &t, + FNAME, laddr_hint](auto &exts) mutable { + // user must initialize the logical extent themselves + assert(is_user_transaction(t.get_src())); + for (auto& ext : exts) { + ext->set_seen_by_users(); + } + if (pos) { + // laddr_hint is determined + auto off = laddr_hint; + for (auto &extent : exts) { + extent->set_laddr(off); + off = (off + extent->get_length()).checked_to_laddr(); + } + } + auto fut = alloc_extents_iertr::make_ready_future< + std::vector>(); + if (pos) { + fut = pos->refresh( + ).si_then([&t, &exts, this](auto pos) { + return lba_manager->alloc_extents( + t, + std::move(pos), + std::vector( + exts.begin(), exts.end())); + }); + } else { + fut = lba_manager->alloc_extents( + t, + laddr_hint, + std::vector( + exts.begin(), exts.end()), + EXTENT_DEFAULT_REF_COUNT); } - return alloc_extent_iertr::make_ready_future< - std::vector>>(std::move(exts)); + return fut.si_then([&exts, &t, FNAME](auto &&) mutable { + for (auto &ext : exts) { + SUBDEBUGT(seastore_tm, "allocated {}", t, *ext); + } + return alloc_extent_iertr::make_ready_future< + std::vector>>(std::move(exts)); + }); }); } @@ -485,6 +532,7 @@ public: // must be user-oriented required by (the potential) maybe_init assert(is_user_transaction(t.get_src())); + LOG_PREFIX(TransactionManager::remap_pin); #ifndef NDEBUG std::sort(remaps.begin(), remaps.end(), [](remap_entry_t x, remap_entry_t y) { @@ -512,23 +560,31 @@ public: #endif return seastar::do_with( - std::vector(), std::move(pin), std::move(remaps), - [&t, this](auto &extents, auto &pin, auto &remaps) { - laddr_t original_laddr = pin.get_key(); - extent_len_t original_len = pin.get_length(); - paddr_t original_paddr = pin.get_val(); - LOG_PREFIX(TransactionManager::remap_pin); - SUBDEBUGT(seastore_tm, "{}~0x{:x} {} into {} remaps ... {}", - t, original_laddr, original_len, original_paddr, remaps.size(), pin); + [FNAME, &t, this](auto &pin, auto &remaps) { // The according extent might be stable or pending. auto fut = base_iertr::now(); - if (!pin.is_indirect()) { + if (pin.is_indirect()) { + SUBDEBUGT(seastore_tm, "{} into {} remaps ...", + t, pin, remaps.size()); + fut = lba_manager->refresh_lba_mapping(t, std::move(pin) + ).si_then([this, &pin, &t](auto mapping) { + return lba_manager->complete_indirect_lba_mapping( + t, std::move(mapping) + ).si_then([&pin](auto mapping) { + pin = std::move(mapping); + }); + }); + } else { + laddr_t original_laddr = pin.get_key(); + extent_len_t original_len = pin.get_length(); + paddr_t original_paddr = pin.get_val(); + SUBDEBUGT(seastore_tm, "{}~0x{:x} {} into {} remaps ... {}", + t, original_laddr, original_len, original_paddr, remaps.size(), pin); ceph_assert(!pin.is_clone()); - fut = fut.si_then([this, &t, &pin]() mutable { - return lba_manager->refresh_lba_mapping(t, std::move(pin)); - }).si_then([this, &t, &pin, original_paddr, original_len](auto newpin) { + fut = lba_manager->refresh_lba_mapping(t, std::move(pin) + ).si_then([this, &t, &pin, original_paddr, original_len](auto newpin) { pin = std::move(newpin); if (full_extent_integrity_check) { return read_pin(t, pin.duplicate() @@ -555,8 +611,7 @@ public: } } }).si_then([this, &t, &remaps, original_paddr, - original_laddr, original_len, - &extents, FNAME](auto ext) mutable { + original_laddr, original_len, FNAME](auto ext) mutable { ceph_assert(full_extent_integrity_check ? (ext && ext->is_fully_loaded()) : true); @@ -593,16 +648,15 @@ public: original_bptr); // user must initialize the logical extent themselves. extent->set_seen_by_users(); - extents.emplace_back(std::move(extent)); + remap.extent = extent.get(); } }); } - return fut.si_then([this, &t, &pin, &remaps, &extents, FNAME] { + return fut.si_then([this, &t, &pin, &remaps, FNAME] { return lba_manager->remap_mappings( t, std::move(pin), - std::vector(remaps.begin(), remaps.end()), - std::move(extents) + std::vector(remaps.begin(), remaps.end()) ).si_then([FNAME, &t](auto ret) { SUBDEBUGT(seastore_tm, "remapped {} pins", t, ret.size()); return Cache::retire_extent_iertr::make_ready_future< @@ -635,6 +689,27 @@ public: }); } + reserve_extent_ret reserve_region( + Transaction &t, + LBAMapping pos, + laddr_t hint, + extent_len_t len) { + LOG_PREFIX(TransactionManager::reserve_region); + SUBDEBUGT(seastore_tm, "hint {}~0x{:x} ...", t, hint, len); + return pos.refresh( + ).si_then([FNAME, this, &t, hint, len](auto pos) { + return lba_manager->reserve_region( + t, + std::move(pos), + hint, + len + ).si_then([FNAME, &t](auto pin) { + SUBDEBUGT(seastore_tm, "reserved {}", t, pin); + return pin; + }); + }); + } + /* * clone_mapping * @@ -648,27 +723,30 @@ public: clone_extent_ret clone_pin( Transaction &t, laddr_t hint, - const LBAMapping &mapping) { - auto intermediate_key = - mapping.is_indirect() - ? mapping.get_intermediate_key() - : mapping.get_key(); - auto intermediate_base = - mapping.is_indirect() - ? mapping.get_intermediate_base() - : mapping.get_key(); - + LBAMapping mapping) { LOG_PREFIX(TransactionManager::clone_pin); SUBDEBUGT(seastore_tm, "{} clone to hint {} ...", t, mapping, hint); - return lba_manager->clone_mapping( - t, - hint, - mapping.get_length(), - intermediate_key, - intermediate_base - ).si_then([FNAME, &t](auto pin) { - SUBDEBUGT(seastore_tm, "cloned as {}", t, pin); - return pin; + return lba_manager->refresh_lba_mapping(t, std::move(mapping) + ).si_then([FNAME, this, &t, hint](auto mapping) { + auto intermediate_key = + mapping.is_indirect() + ? mapping.get_intermediate_key() + : mapping.get_key(); + auto intermediate_base = + mapping.is_indirect() + ? mapping.get_intermediate_base() + : mapping.get_key(); + + return lba_manager->clone_mapping( + t, + hint, + mapping.get_length(), + intermediate_key, + intermediate_base + ).si_then([FNAME, &t](auto pin) { + SUBDEBUGT(seastore_tm, "cloned as {}", t, pin); + return pin; + }); }); } diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc index b70f24f79c9..63f1d6465d5 100644 --- a/src/test/crimson/seastore/test_btree_lba_manager.cc +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -602,10 +602,10 @@ struct btree_lba_manager_test : btree_test_base { t, target->first ).si_then([this, &t, target](auto result) { - EXPECT_EQ(result.refcount, target->second.refcount); - if (result.refcount == 0) { + EXPECT_EQ(result.result.refcount, target->second.refcount); + if (result.result.refcount == 0) { return cache->retire_extent_addr( - t, result.addr.get_paddr(), result.length); + t, result.result.addr.get_paddr(), result.result.length); } return Cache::retire_extent_iertr::now(); }); diff --git a/src/test/crimson/seastore/test_transaction_manager.cc b/src/test/crimson/seastore/test_transaction_manager.cc index e6120c4745f..536d3b14529 100644 --- a/src/test/crimson/seastore/test_transaction_manager.cc +++ b/src/test/crimson/seastore/test_transaction_manager.cc @@ -704,9 +704,11 @@ struct transaction_manager_test_t : LBAMapping clone_pin( test_transaction_t &t, laddr_t offset, - const LBAMapping &mapping) { - auto pin = with_trans_intr(*(t.t), [&](auto &trans) { - return tm->clone_pin(trans, offset, mapping); + LBAMapping mapping) { + auto pin = with_trans_intr( + *(t.t), + [this, offset, mapping=std::move(mapping)](auto &trans) mutable { + return tm->clone_pin(trans, offset, std::move(mapping)); }).unsafe_get(); EXPECT_EQ(offset, pin.get_key()); EXPECT_EQ(mapping.get_key(), pin.get_intermediate_key()); @@ -1461,9 +1463,10 @@ struct transaction_manager_test_t : auto t = create_transaction(); auto lpin = get_pin(t, l_offset); auto rpin = get_pin(t, r_offset); - auto l_clone_pin = clone_pin(t, l_clone_offset, lpin); - auto r_clone_pin = clone_pin(t, r_clone_offset, rpin); + auto l_clone_pin = clone_pin(t, l_clone_offset, std::move(lpin)); + auto r_clone_pin = clone_pin(t, r_clone_offset, std::move(rpin)); //split left + l_clone_pin = refresh_lba_mapping(t, std::move(l_clone_pin)); auto pin1 = remap_pin(t, std::move(l_clone_pin), 0, 16 << 10); ASSERT_TRUE(pin1); auto pin2 = remap_pin(t, std::move(*pin1), 0, 8 << 10); @@ -1474,6 +1477,7 @@ struct transaction_manager_test_t : EXPECT_EQ('l', lext->get_bptr().c_str()[0]); //split right + r_clone_pin = refresh_lba_mapping(t, std::move(r_clone_pin)); auto pin4 = remap_pin(t, std::move(r_clone_pin), 16 << 10, 16 << 10); ASSERT_TRUE(pin4); auto pin5 = remap_pin(t, std::move(*pin4), 8 << 10, 8 << 10); -- 2.39.5