From: Zhang Song Date: Fri, 9 May 2025 08:12:16 +0000 (+0800) Subject: crimson/os/seastore/lba: flatten lba namespace to keep consistent with backref X-Git-Tag: v20.1.0~222^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d313b3a65a7607732b80be412bdddfcdfb30e6d5;p=ceph.git crimson/os/seastore/lba: flatten lba namespace to keep consistent with backref Signed-off-by: Zhang Song (cherry picked from commit 4593e5177d20ce22943123ae986c3c86e8d010a1) --- diff --git a/src/crimson/os/seastore/CMakeLists.txt b/src/crimson/os/seastore/CMakeLists.txt index 0a7a11668c8..d5ca12d8f06 100644 --- a/src/crimson/os/seastore/CMakeLists.txt +++ b/src/crimson/os/seastore/CMakeLists.txt @@ -15,8 +15,8 @@ set(crimson_seastore_srcs btree/btree_types.cc backref_manager.cc backref/btree_backref_manager.cc - lba_manager/btree/btree_lba_manager.cc - lba_manager/btree/lba_btree_node.cc + lba/btree_lba_manager.cc + lba/lba_btree_node.cc omap_manager.cc omap_manager/btree/btree_omap_manager.cc omap_manager/btree/omap_btree_node_impl.cc diff --git a/src/crimson/os/seastore/btree/btree_types.cc b/src/crimson/os/seastore/btree/btree_types.cc index a8d6e883153..7665b4a1b87 100644 --- a/src/crimson/os/seastore/btree/btree_types.cc +++ b/src/crimson/os/seastore/btree/btree_types.cc @@ -2,12 +2,12 @@ // vim: ts=8 sw=2 smarttab #include "crimson/os/seastore/btree/btree_types.h" -#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" +#include "crimson/os/seastore/lba/lba_btree_node.h" #include "crimson/os/seastore/backref/backref_tree_node.h" namespace crimson::os::seastore { -namespace lba_manager::btree { +namespace lba { std::ostream& operator<<(std::ostream& out, const lba_map_val_t& v) { @@ -19,7 +19,7 @@ std::ostream& operator<<(std::ostream& out, const lba_map_val_t& v) << ")"; } -} // namespace lba_manager::btree +} // namespace lba namespace backref { @@ -36,7 +36,7 @@ namespace { template bool modified_since(T &&extent, uint64_t iter_modifications) { using backref::BackrefLeafNode; - using lba_manager::btree::LBALeafNode; + using lba::LBALeafNode; if constexpr (std::is_same_v) { assert(extent->get_type() == extent_types_t::LADDR_LEAF); auto leaf = extent->template cast(); @@ -64,7 +64,7 @@ bool BtreeCursor::is_viewable() const { return viewable; } -template struct BtreeCursor; +template struct BtreeCursor; template struct BtreeCursor; } // namespace crimson::os::seastore diff --git a/src/crimson/os/seastore/btree/btree_types.h b/src/crimson/os/seastore/btree/btree_types.h index cd616ee6e96..1a0d45fbc2a 100644 --- a/src/crimson/os/seastore/btree/btree_types.h +++ b/src/crimson/os/seastore/btree/btree_types.h @@ -100,7 +100,7 @@ struct __attribute__((packed)) fixed_kv_node_meta_le_t { } }; -namespace lba_manager::btree { +namespace lba { /** * lba_map_val_t @@ -150,7 +150,7 @@ struct __attribute__((packed)) lba_map_val_le_t { } }; -} // namespace lba_manager::btree +} // namespace lba namespace backref { @@ -216,7 +216,7 @@ struct BtreeCursor { pos(pos) { if constexpr (std::is_same_v) { - static_assert(std::is_same_v, + static_assert(std::is_same_v, "the value type of laddr_t for BtreeCursor should be lba_map_val_t"); } else { static_assert(std::is_same_v, @@ -251,8 +251,8 @@ struct BtreeCursor { } }; -struct LBACursor : BtreeCursor { - using Base = BtreeCursor; +struct LBACursor : BtreeCursor { + using Base = BtreeCursor; using Base::BtreeCursor; bool is_indirect() const { assert(!is_end()); diff --git a/src/crimson/os/seastore/btree/fixed_kv_btree.h b/src/crimson/os/seastore/btree/fixed_kv_btree.h index f9ca4186329..103e24ad151 100644 --- a/src/crimson/os/seastore/btree/fixed_kv_btree.h +++ b/src/crimson/os/seastore/btree/fixed_kv_btree.h @@ -178,7 +178,7 @@ public: assert(!is_end()); auto ret = leaf.node->iter_idx(leaf.pos).get_val(); if constexpr ( - std::is_same_v) { if (ret.pladdr.is_paddr()) { ret.pladdr = ret.pladdr.get_paddr().maybe_relative_to( diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 7762d1344de..cf83fa93042 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -14,7 +14,7 @@ // included for get_extent_by_type #include "crimson/os/seastore/collection_manager/collection_flat_node.h" -#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" +#include "crimson/os/seastore/lba/lba_btree_node.h" #include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h" #include "crimson/os/seastore/object_data_handler.h" #include "crimson/os/seastore/collection_manager/collection_flat_node.h" @@ -1090,9 +1090,9 @@ CachedExtentRef Cache::alloc_new_non_data_extent_by_type( ceph_assert(0 == "ROOT is never directly alloc'd"); return CachedExtentRef(); case extent_types_t::LADDR_INTERNAL: - return alloc_new_non_data_extent(t, length, hint, gen); + return alloc_new_non_data_extent(t, length, hint, gen); case extent_types_t::LADDR_LEAF: - return alloc_new_non_data_extent( + return alloc_new_non_data_extent( t, length, hint, gen); case extent_types_t::ROOT_META: return alloc_new_non_data_extent( @@ -1449,7 +1449,7 @@ record_t Cache::prepare_record( if (i->is_logical()) { fresh_laddr = i->cast()->get_laddr(); } else if (is_lba_node(i->get_type())) { - fresh_laddr = i->cast()->get_node_meta().begin; + fresh_laddr = i->cast()->get_node_meta().begin; } else { fresh_laddr = L_ADDR_NULL; } @@ -1468,7 +1468,7 @@ record_t Cache::prepare_record( if (i->is_logical()) { alloc_laddr = i->cast()->get_laddr(); } else if (is_lba_node(i->get_type())) { - alloc_laddr = i->cast()->get_node_meta().begin; + alloc_laddr = i->cast()->get_node_meta().begin; } else { assert(i->get_type() == extent_types_t::TEST_BLOCK_PHYSICAL); alloc_laddr = L_ADDR_MIN; @@ -1494,7 +1494,7 @@ record_t Cache::prepare_record( alloc_laddr = i->cast()->get_laddr(); } else { assert(is_lba_node(i->get_type())); - alloc_laddr = i->cast()->get_node_meta().begin; + alloc_laddr = i->cast()->get_node_meta().begin; } alloc_delta.alloc_blk_ranges.emplace_back( alloc_blk_t::create_alloc( @@ -1808,7 +1808,7 @@ void Cache::complete_commit( if (i->is_logical()) { alloc_laddr = i->cast()->get_laddr(); } else if (is_lba_node(i->get_type())) { - alloc_laddr = i->cast()->get_node_meta().begin; + alloc_laddr = i->cast()->get_node_meta().begin; } else { assert(i->get_type() == extent_types_t::TEST_BLOCK_PHYSICAL); alloc_laddr = L_ADDR_MIN; @@ -2253,13 +2253,13 @@ Cache::do_get_caching_extent_by_type( return CachedExtentRef(extent.detach(), false /* add_ref */); }); case extent_types_t::LADDR_INTERNAL: - return do_get_caching_extent( + return do_get_caching_extent( offset, length, std::move(extent_init_func), std::move(on_cache), p_src ).safe_then([](auto extent) { return CachedExtentRef(extent.detach(), false /* add_ref */); }); case extent_types_t::LADDR_LEAF: - return do_get_caching_extent( + return do_get_caching_extent( offset, length, std::move(extent_init_func), std::move(on_cache), p_src ).safe_then([](auto extent) { return CachedExtentRef(extent.detach(), false /* add_ref */); diff --git a/src/crimson/os/seastore/lba/btree_lba_manager.cc b/src/crimson/os/seastore/lba/btree_lba_manager.cc new file mode 100644 index 00000000000..3c613dbbd27 --- /dev/null +++ b/src/crimson/os/seastore/lba/btree_lba_manager.cc @@ -0,0 +1,1102 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include +#include + +#include + +#include "include/buffer.h" +#include "crimson/os/seastore/lba/btree_lba_manager.h" +#include "crimson/os/seastore/lba/lba_btree_node.h" +#include "crimson/os/seastore/logging.h" + +SET_SUBSYS(seastore_lba); +/* + * levels: + * - INFO: mkfs + * - DEBUG: modification operations + * - TRACE: read operations, DEBUG details + */ + +template <> struct fmt::formatter< + crimson::os::seastore::lba::LBABtree::iterator> + : public fmt::formatter +{ + using Iter = crimson::os::seastore::lba::LBABtree::iterator; + + template + auto format(const Iter &iter, FmtCtx &ctx) const + -> decltype(ctx.out()) { + if (iter.is_end()) { + return fmt::format_to(ctx.out(), "end"); + } + return fmt::format_to(ctx.out(), "{}~{}", iter.get_key(), iter.get_val()); + } +}; + +namespace crimson::os::seastore { + +template +Transaction::tree_stats_t& get_tree_stats(Transaction &t) +{ + return t.get_lba_tree_stats(); +} + +template Transaction::tree_stats_t& +get_tree_stats< + crimson::os::seastore::lba::LBABtree>( + Transaction &t); + +template +phy_tree_root_t& get_phy_tree_root(root_t &r) +{ + return r.lba_root; +} + +template phy_tree_root_t& +get_phy_tree_root< + crimson::os::seastore::lba::LBABtree>(root_t &r); + +template <> +const get_phy_tree_root_node_ret get_phy_tree_root_node< + crimson::os::seastore::lba::LBABtree>( + const RootBlockRef &root_block, op_context_t c) +{ + auto lba_root = root_block->lba_root_node; + if (lba_root) { + ceph_assert(lba_root->is_initial_pending() + == root_block->is_pending()); + return {true, + c.cache.get_extent_viewable_by_trans(c.trans, lba_root)}; + } else if (root_block->is_pending()) { + auto &prior = static_cast(*root_block->get_prior_instance()); + lba_root = prior.lba_root_node; + if (lba_root) { + return {true, + c.cache.get_extent_viewable_by_trans(c.trans, lba_root)}; + } else { + return {false, + Cache::get_extent_iertr::make_ready_future()}; + } + } else { + return {false, + Cache::get_extent_iertr::make_ready_future()}; + } +} + +template +class TreeRootLinker { +public: + static void link_root(RootBlockRef &root_block, RootT* lba_root) { + root_block->lba_root_node = lba_root; + ceph_assert(lba_root != nullptr); + lba_root->parent_of_root = root_block; + } + static void unlink_root(RootBlockRef &root_block) { + root_block->lba_root_node = nullptr; + } +}; + +template class TreeRootLinker; +template class TreeRootLinker; + +} + +namespace crimson::os::seastore::lba { + +BtreeLBAManager::mkfs_ret +BtreeLBAManager::mkfs( + Transaction &t) +{ + LOG_PREFIX(BtreeLBAManager::mkfs); + INFOT("start", t); + return cache.get_root(t).si_then([this, &t](auto croot) { + assert(croot->is_mutation_pending()); + croot->get_root().lba_root = LBABtree::mkfs(croot, get_context(t)); + return mkfs_iertr::now(); + }).handle_error_interruptible( + mkfs_iertr::pass_further{}, + crimson::ct_error::assert_all{ + "Invalid error in BtreeLBAManager::mkfs" + } + ); +} + +BtreeLBAManager::get_mappings_ret +BtreeLBAManager::get_mappings( + Transaction &t, + laddr_t laddr, + extent_len_t length) +{ + LOG_PREFIX(BtreeLBAManager::get_mappings); + TRACET("{}~0x{:x} ...", t, laddr, length); + auto c = get_context(t); + return with_btree_state( + cache, c, + [FNAME, this, c, laddr, length](auto& btree, auto& ret) + { + return get_cursors(c, btree, laddr, length + ).si_then([FNAME, this, c, laddr, length, &btree, &ret](auto cursors) { + return seastar::do_with( + std::move(cursors), + [FNAME, this, c, laddr, length, &btree, &ret](auto& cursors) + { + return trans_intr::do_for_each( + cursors, + [FNAME, this, c, laddr, length, &btree, &ret](auto& cursor) + { + if (!cursor->is_indirect()) { + ret.emplace_back(LBAMapping::create_direct(std::move(cursor))); + TRACET("{}~0x{:x} got {}", + c.trans, laddr, length, ret.back()); + return get_mappings_iertr::now(); + } + assert(cursor->val->refcount == EXTENT_DEFAULT_REF_COUNT); + assert(cursor->val->checksum == 0); + return resolve_indirect_cursor(c, btree, *cursor + ).si_then([FNAME, c, &ret, &cursor, laddr, length](auto direct) { + ret.emplace_back(LBAMapping::create_indirect( + std::move(direct), std::move(cursor))); + TRACET("{}~0x{:x} got {}", + c.trans, laddr, length, ret.back()); + return get_mappings_iertr::now(); + }); + }); + }); + }); + }); +} + +BtreeLBAManager::_get_cursors_ret +BtreeLBAManager::get_cursors( + op_context_t c, + LBABtree& btree, + laddr_t laddr, + extent_len_t length) +{ + LOG_PREFIX(BtreeLBAManager::get_cursors); + TRACET("{}~0x{:x} ...", c.trans, laddr, length); + return seastar::do_with( + std::list(), + [FNAME, c, laddr, length, &btree](auto& ret) + { + return LBABtree::iterate_repeat( + c, + btree.upper_bound_right(c, laddr), + [FNAME, c, laddr, length, &ret](auto& pos) + { + if (pos.is_end() || pos.get_key() >= (laddr + length)) { + TRACET("{}~0x{:x} done with {} results, stop at {}", + c.trans, laddr, length, ret.size(), pos); + return LBABtree::iterate_repeat_ret_inner( + interruptible::ready_future_marker{}, + seastar::stop_iteration::yes); + } + TRACET("{}~0x{:x} got {}, repeat ...", + c.trans, laddr, length, pos); + ceph_assert((pos.get_key() + pos.get_val().len) > laddr); + ret.emplace_back(pos.get_cursor(c)); + return LBABtree::iterate_repeat_ret_inner( + interruptible::ready_future_marker{}, + seastar::stop_iteration::no); + }).si_then([&ret] { + return std::move(ret); + }); + }); +} + +BtreeLBAManager::resolve_indirect_cursor_ret +BtreeLBAManager::resolve_indirect_cursor( + op_context_t c, + LBABtree& btree, + const LBACursor &indirect_cursor) +{ + ceph_assert(indirect_cursor.is_indirect()); + return get_cursors( + c, + btree, + indirect_cursor.get_intermediate_key(), + indirect_cursor.get_length() + ).si_then([&indirect_cursor](auto cursors) { + ceph_assert(cursors.size() == 1); + auto& direct_cursor = cursors.front(); + auto intermediate_key = indirect_cursor.get_intermediate_key(); + assert(!direct_cursor->is_indirect()); + assert(direct_cursor->get_laddr() <= intermediate_key); + assert(direct_cursor->get_laddr() + direct_cursor->get_length() + >= intermediate_key + indirect_cursor.get_length()); + return std::move(direct_cursor); + }); +} + +BtreeLBAManager::get_mapping_ret +BtreeLBAManager::get_mapping( + Transaction &t, + laddr_t laddr) +{ + LOG_PREFIX(BtreeLBAManager::get_mapping); + TRACET("{} ...", t, laddr); + auto c = get_context(t); + return with_btree( + cache, c, + [FNAME, this, c, laddr](auto& btree) + { + return get_cursor(c, btree, laddr + ).si_then([FNAME, this, c, laddr, &btree](LBACursorRef cursor) { + if (!cursor->is_indirect()) { + TRACET("{} got direct cursor {}", + c.trans, laddr, *cursor); + auto mapping = LBAMapping::create_direct(std::move(cursor)); + return get_mapping_iertr::make_ready_future< + LBAMapping>(std::move(mapping)); + } + assert(laddr == cursor->get_laddr()); + assert(cursor->val->refcount == EXTENT_DEFAULT_REF_COUNT); + assert(cursor->val->checksum == 0); + return resolve_indirect_cursor(c, btree, *cursor + ).si_then([FNAME, c, laddr, indirect=std::move(cursor)] + (auto direct) mutable { + auto mapping = LBAMapping::create_indirect( + std::move(direct), std::move(indirect)); + TRACET("{} got indirect mapping {}", + c.trans, laddr, mapping); + return get_mapping_iertr::make_ready_future< + LBAMapping>(std::move(mapping)); + }); + }); + }); +} + +BtreeLBAManager::_get_cursor_ret +BtreeLBAManager::get_cursor( + op_context_t c, + LBABtree& btree, + laddr_t laddr) +{ + LOG_PREFIX(BtreeLBAManager::get_cursor); + TRACET("{} ...", c.trans, laddr); + return btree.lower_bound( + c, laddr + ).si_then([FNAME, c, laddr](auto iter) -> _get_cursor_ret { + if (iter.is_end() || iter.get_key() != laddr) { + ERRORT("{} doesn't exist", c.trans, laddr); + return crimson::ct_error::enoent::make(); + } + TRACET("{} got value {}", c.trans, laddr, iter.get_val()); + return _get_cursor_ret( + interruptible::ready_future_marker{}, + iter.get_cursor(c)); + }); +} + +BtreeLBAManager::search_insert_position_ret +BtreeLBAManager::search_insert_position( + op_context_t c, + LBABtree &btree, + laddr_t hint, + extent_len_t length, + alloc_policy_t policy) +{ + LOG_PREFIX(BtreeLBAManager::search_insert_position); + auto lookup_attempts = stats.num_alloc_extents_iter_nexts; + using OptIter = std::optional; + return seastar::do_with( + hint, OptIter(std::nullopt), + [this, c, &btree, hint, length, lookup_attempts, policy, FNAME] + (laddr_t &last_end, OptIter &insert_iter) + { + return LBABtree::iterate_repeat( + c, + btree.upper_bound_right(c, hint), + [this, c, hint, length, lookup_attempts, policy, + &last_end, &insert_iter, FNAME](auto &iter) + { + ++stats.num_alloc_extents_iter_nexts; + if (iter.is_end() || + iter.get_key() >= (last_end + length)) { + if (policy == alloc_policy_t::deterministic) { + ceph_assert(hint == last_end); + } + DEBUGT("hint: {}~0x{:x}, allocated laddr: {}, insert position: {}, " + "done with {} attempts", + c.trans, hint, length, last_end, iter, + stats.num_alloc_extents_iter_nexts - lookup_attempts); + insert_iter.emplace(iter); + return search_insert_position_iertr::make_ready_future< + seastar::stop_iteration>(seastar::stop_iteration::yes); + } + ceph_assert(policy == alloc_policy_t::linear_search); + last_end = (iter.get_key() + iter.get_val().len).checked_to_laddr(); + TRACET("hint: {}~0x{:x}, current iter: {}, repeat ...", + c.trans, hint, length, iter); + return search_insert_position_iertr::make_ready_future< + seastar::stop_iteration>(seastar::stop_iteration::no); + }).si_then([&last_end, &insert_iter] { + ceph_assert(insert_iter); + return search_insert_position_iertr::make_ready_future< + insert_position_t>(last_end, *std::move(insert_iter)); + }); + }); +} + +BtreeLBAManager::alloc_mappings_ret +BtreeLBAManager::alloc_contiguous_mappings( + Transaction &t, + laddr_t hint, + std::vector &alloc_infos, + alloc_policy_t policy) +{ + ceph_assert(hint != L_ADDR_NULL); + extent_len_t total_len = 0; + for (auto &info : alloc_infos) { + assert(info.key == L_ADDR_NULL); + total_len += info.value.len; + } + + auto c = get_context(t); + return with_btree( + cache, + c, + [this, c, hint, &alloc_infos, total_len, policy](auto &btree) + { + return search_insert_position(c, btree, hint, total_len, policy + ).si_then([this, c, &alloc_infos, &btree](insert_position_t res) { + extent_len_t offset = 0; + for (auto &info : alloc_infos) { + info.key = (res.laddr + offset).checked_to_laddr(); + offset += info.value.len; + } + return insert_mappings( + c, btree, std::move(res.insert_iter), alloc_infos); + }); + }); +} + +BtreeLBAManager::alloc_mappings_ret +BtreeLBAManager::alloc_sparse_mappings( + Transaction &t, + laddr_t hint, + std::vector &alloc_infos, + alloc_policy_t policy) +{ + ceph_assert(hint != L_ADDR_NULL); +#ifndef NDEBUG + assert(alloc_infos.front().key != L_ADDR_NULL); + for (size_t i = 1; i < alloc_infos.size(); i++) { + auto &prev = alloc_infos[i - 1]; + auto &cur = alloc_infos[i]; + assert(cur.key != L_ADDR_NULL); + assert(prev.key + prev.value.len <= cur.key); + } +#endif + auto total_len = hint.get_byte_distance( + alloc_infos.back().key + alloc_infos.back().value.len); + auto c = get_context(t); + return with_btree( + cache, + c, + [this, c, hint, &alloc_infos, total_len, policy](auto &btree) + { + return search_insert_position(c, btree, hint, total_len, policy + ).si_then([this, c, hint, &alloc_infos, &btree, policy](auto res) { + if (policy != alloc_policy_t::deterministic) { + for (auto &info : alloc_infos) { + auto offset = info.key.get_byte_distance(hint); + info.key = (res.laddr + offset).checked_to_laddr(); + } + } // deterministic guarantees hint == res.laddr + return insert_mappings( + c, btree, std::move(res.insert_iter), alloc_infos); + }); + }); +} + +BtreeLBAManager::alloc_mappings_ret +BtreeLBAManager::insert_mappings( + op_context_t c, + LBABtree &btree, + LBABtree::iterator iter, + std::vector &alloc_infos) +{ + return seastar::do_with( + std::move(iter), std::list(), + [c, &btree, &alloc_infos] + (LBABtree::iterator &iter, std::list &ret) + { + return trans_intr::do_for_each( + alloc_infos.begin(), + alloc_infos.end(), + [c, &btree, &iter, &ret](auto &info) + { + assert(info.key != L_ADDR_NULL); + return btree.insert( + c, iter, info.key, info.value + ).si_then([c, &iter, &ret, &info](auto p) { + ceph_assert(p.second); + iter = std::move(p.first); + auto &leaf_node = *iter.get_leaf_node(); + leaf_node.insert_child_ptr( + iter.get_leaf_pos(), + info.extent, + leaf_node.get_size() - 1 /*the size before the insert*/); + if (is_valid_child_ptr(info.extent)) { + ceph_assert(info.value.pladdr.is_paddr()); + assert(info.value.pladdr == iter.get_val().pladdr); + assert(info.value.len == iter.get_val().len); + assert(info.extent->is_logical()); + if (info.extent->has_laddr()) { + // see TM::remap_pin() + assert(info.key == info.extent->get_laddr()); + assert(info.key == iter.get_key()); + } else { + // see TM::alloc_non_data_extent() + // TM::alloc_data_extents() + info.extent->set_laddr(iter.get_key()); + } + } + ret.push_back(iter.get_cursor(c)); + return iter.next(c).si_then([&iter](auto p) { + iter = std::move(p); + }); + }); + }).si_then([&ret] { + return alloc_mappings_iertr::make_ready_future< + std::list>(std::move(ret)); + }); + }); +} + +static bool is_lba_node(const CachedExtent &e) +{ + return is_lba_node(e.get_type()); +} + +BtreeLBAManager::base_iertr::template future<> +_init_cached_extent( + op_context_t c, + const CachedExtentRef &e, + LBABtree &btree, + bool &ret) +{ + if (e->is_logical()) { + auto logn = e->cast(); + return btree.lower_bound( + c, + logn->get_laddr() + ).si_then([e, c, logn, &ret](auto iter) { + LOG_PREFIX(BtreeLBAManager::init_cached_extent); + if (!iter.is_end() && + iter.get_key() == logn->get_laddr() && + iter.get_val().pladdr.is_paddr() && + iter.get_val().pladdr.get_paddr() == logn->get_paddr()) { + assert(!iter.get_leaf_node()->is_pending()); + iter.get_leaf_node()->link_child(logn.get(), iter.get_leaf_pos()); + logn->set_laddr(iter.get_key()); + ceph_assert(iter.get_val().len == e->get_length()); + DEBUGT("logical extent {} live", c.trans, *logn); + ret = true; + } else { + DEBUGT("logical extent {} not live", c.trans, *logn); + ret = false; + } + }); + } else { + return btree.init_cached_extent(c, e + ).si_then([&ret](bool is_alive) { + ret = is_alive; + }); + } +} + +BtreeLBAManager::init_cached_extent_ret +BtreeLBAManager::init_cached_extent( + Transaction &t, + CachedExtentRef e) +{ + LOG_PREFIX(BtreeLBAManager::init_cached_extent); + TRACET("{}", t, *e); + return seastar::do_with(bool(), [this, e, &t](bool &ret) { + auto c = get_context(t); + return with_btree( + cache, c, + [c, e, &ret](auto &btree) -> base_iertr::future<> { + LOG_PREFIX(BtreeLBAManager::init_cached_extent); + DEBUGT("extent {}", c.trans, *e); + return _init_cached_extent(c, e, btree, ret); + } + ).si_then([&ret] { return ret; }); + }); +} + +#ifdef UNIT_TESTS_BUILT +BtreeLBAManager::check_child_trackers_ret +BtreeLBAManager::check_child_trackers( + Transaction &t) { + auto c = get_context(t); + return with_btree( + cache, c, + [c](auto &btree) { + return btree.check_child_trackers(c); + }); +} +#endif + +BtreeLBAManager::scan_mappings_ret +BtreeLBAManager::scan_mappings( + Transaction &t, + laddr_t begin, + laddr_t end, + scan_mappings_func_t &&f) +{ + LOG_PREFIX(BtreeLBAManager::scan_mappings); + DEBUGT("begin: {}, end: {}", t, begin, end); + + auto c = get_context(t); + return with_btree( + cache, + c, + [c, f=std::move(f), begin, end](auto &btree) mutable { + return LBABtree::iterate_repeat( + c, + btree.upper_bound_right(c, begin), + [f=std::move(f), begin, end](auto &pos) { + if (pos.is_end() || pos.get_key() >= end) { + return typename LBABtree::iterate_repeat_ret_inner( + interruptible::ready_future_marker{}, + seastar::stop_iteration::yes); + } + ceph_assert((pos.get_key() + pos.get_val().len) > begin); + if (pos.get_val().pladdr.is_paddr()) { + f(pos.get_key(), pos.get_val().pladdr.get_paddr(), pos.get_val().len); + } + return LBABtree::iterate_repeat_ret_inner( + interruptible::ready_future_marker{}, + seastar::stop_iteration::no); + }); + }); +} + +BtreeLBAManager::rewrite_extent_ret +BtreeLBAManager::rewrite_extent( + Transaction &t, + CachedExtentRef extent) +{ + LOG_PREFIX(BtreeLBAManager::rewrite_extent); + if (extent->has_been_invalidated()) { + ERRORT("extent has been invalidated -- {}", t, *extent); + ceph_abort(); + } + assert(!extent->is_logical()); + + if (is_lba_node(*extent)) { + DEBUGT("rewriting lba extent -- {}", t, *extent); + auto c = get_context(t); + return with_btree( + cache, + c, + [c, extent](auto &btree) mutable { + return btree.rewrite_extent(c, extent); + }); + } else { + DEBUGT("skip non lba extent -- {}", t, *extent); + return rewrite_extent_iertr::now(); + } +} + +BtreeLBAManager::update_mapping_ret +BtreeLBAManager::update_mapping( + Transaction& t, + laddr_t laddr, + extent_len_t prev_len, + paddr_t prev_addr, + LogicalChildNode& nextent) +{ + LOG_PREFIX(BtreeLBAManager::update_mapping); + auto addr = nextent.get_paddr(); + auto len = nextent.get_length(); + auto checksum = nextent.get_last_committed_crc(); + TRACET("laddr={}, paddr {}~0x{:x} => {}~0x{:x}, crc=0x{:x}", + t, laddr, prev_addr, prev_len, addr, len, checksum); + assert(laddr == nextent.get_laddr()); + assert(!addr.is_null()); + return _update_mapping( + t, + laddr, + [prev_addr, addr, prev_len, len, checksum] + (const lba_map_val_t &in) { + lba_map_val_t ret = in; + ceph_assert(in.pladdr.is_paddr()); + ceph_assert(in.pladdr.get_paddr() == prev_addr); + ceph_assert(in.len == prev_len); + ret.pladdr = addr; + ret.len = len; + ret.checksum = checksum; + return ret; + }, + &nextent + ).si_then([&t, laddr, prev_addr, prev_len, addr, len, checksum, FNAME](auto res) { + assert(res.is_alive_mapping()); + DEBUGT("laddr={}, paddr {}~0x{:x} => {}~0x{:x}, crc=0x{:x} done -- {}", + t, laddr, prev_addr, prev_len, addr, len, checksum, res.get_cursor()); + return update_mapping_iertr::make_ready_future< + extent_ref_count_t>(res.get_cursor().get_refcount()); + }, + update_mapping_iertr::pass_further{}, + /* ENOENT in particular should be impossible */ + crimson::ct_error::assert_all{ + "Invalid error in BtreeLBAManager::update_mapping" + } + ); +} + +BtreeLBAManager::update_mappings_ret +BtreeLBAManager::update_mappings( + Transaction& t, + const std::list& extents) +{ + return trans_intr::do_for_each(extents, [this, &t](auto &extent) { + LOG_PREFIX(BtreeLBAManager::update_mappings); + auto laddr = extent->get_laddr(); + auto prev_addr = extent->get_prior_paddr_and_reset(); + auto len = extent->get_length(); + auto addr = extent->get_paddr(); + auto checksum = extent->get_last_committed_crc(); + TRACET("laddr={}, paddr {}~0x{:x} => {}, crc=0x{:x}", + t, laddr, prev_addr, len, addr, checksum); + assert(!addr.is_null()); + return _update_mapping( + t, + laddr, + [prev_addr, addr, len, checksum]( + const lba_map_val_t &in) { + lba_map_val_t ret = in; + ceph_assert(in.pladdr.is_paddr()); + ceph_assert(in.pladdr.get_paddr() == prev_addr); + ceph_assert(in.len == len); + ret.pladdr = addr; + ret.checksum = checksum; + return ret; + }, + nullptr // all the extents should have already been + // added to the fixed_kv_btree + ).si_then([&t, laddr, prev_addr, len, addr, checksum, FNAME](auto res) { + DEBUGT("laddr={}, paddr {}~0x{:x} => {}, crc=0x{:x} done -- {}", + t, laddr, prev_addr, len, addr, checksum, res.get_cursor()); + return update_mapping_iertr::make_ready_future(); + }, + update_mapping_iertr::pass_further{}, + /* ENOENT in particular should be impossible */ + crimson::ct_error::assert_all{ + "Invalid error in BtreeLBAManager::update_mappings" + } + ); + }); +} + +BtreeLBAManager::get_physical_extent_if_live_ret +BtreeLBAManager::get_physical_extent_if_live( + Transaction &t, + extent_types_t type, + paddr_t addr, + laddr_t laddr, + extent_len_t len) +{ + LOG_PREFIX(BtreeLBAManager::get_physical_extent_if_live); + DEBUGT("{}, laddr={}, paddr={}, length={}", + t, type, laddr, addr, len); + ceph_assert(is_lba_node(type)); + auto c = get_context(t); + return with_btree_ret( + cache, + c, + [c, type, addr, laddr, len](auto &btree) { + if (type == extent_types_t::LADDR_INTERNAL) { + return btree.get_internal_if_live(c, addr, laddr, len); + } else { + assert(type == extent_types_t::LADDR_LEAF || + type == extent_types_t::DINK_LADDR_LEAF); + return btree.get_leaf_if_live(c, addr, laddr, len); + } + }); +} + +BtreeLBAManager::refresh_lba_mapping_ret +BtreeLBAManager::refresh_lba_mapping(Transaction &t, LBAMapping mapping) +{ + assert(mapping.is_linked_direct()); + if (mapping.is_viewable()) { + return refresh_lba_mapping_iertr::make_ready_future< + LBAMapping>(std::move(mapping)); + } + auto c = get_context(t); + return with_btree_state( + cache, + c, + std::move(mapping), + [c, this](LBABtree &btree, LBAMapping &mapping) mutable + { + return refresh_lba_cursor(c, btree, *mapping.direct_cursor + ).si_then([c, this, &btree, &mapping] { + if (mapping.indirect_cursor) { + return refresh_lba_cursor(c, btree, *mapping.indirect_cursor); + } + return refresh_lba_cursor_iertr::make_ready_future(); +#ifndef NDEBUG + }).si_then([&mapping] { + assert(mapping.is_viewable()); +#endif + }); + }); +} + +BtreeLBAManager::refresh_lba_cursor_ret +BtreeLBAManager::refresh_lba_cursor( + op_context_t c, + LBABtree &btree, + LBACursor &cursor) +{ + LOG_PREFIX(BtreeLBAManager::refresh_lba_cursor); + stats.num_refresh_parent_total++; + + if (!cursor.parent->is_valid()) { + stats.num_refresh_invalid_parent++; + TRACET("cursor {} parent is invalid, re-search from scratch", + c.trans, cursor); + return btree.lower_bound(c, cursor.get_laddr() + ).si_then([&cursor](LBABtree::iterator iter) { + auto leaf = iter.get_leaf_node(); + cursor.parent = leaf; + cursor.modifications = leaf->modifications; + cursor.pos = iter.get_leaf_pos(); + if (!cursor.is_end()) { + ceph_assert(!iter.is_end()); + ceph_assert(iter.get_key() == cursor.get_laddr()); + cursor.val = iter.get_val(); + assert(cursor.is_viewable()); + } + }); + } + + auto [viewable, state] = cursor.parent->is_viewable_by_trans(c.trans); + auto leaf = cursor.parent->cast(); + + TRACET("cursor: {} viewable: {} state: {}", + c.trans, cursor, viewable, state); + + if (!viewable) { + stats.num_refresh_unviewable_parent++; + leaf = leaf->find_pending_version(c.trans, cursor.get_laddr()); + cursor.parent = leaf; + } + + if (!viewable || + leaf->modified_since(cursor.modifications)) { + if (viewable) { + stats.num_refresh_modified_viewable_parent++; + } + + cursor.modifications = leaf->modifications; + if (cursor.is_end()) { + cursor.pos = leaf->get_size(); + assert(!cursor.val); + } else { + auto i = leaf->lower_bound(cursor.get_laddr()); + cursor.pos = i.get_offset(); + cursor.val = i.get_val(); + + auto iter = LBALeafNode::iterator(leaf.get(), cursor.pos); + ceph_assert(iter.get_key() == cursor.key); + ceph_assert(iter.get_val() == cursor.val); + assert(cursor.is_viewable()); + } + } + + return refresh_lba_cursor_iertr::make_ready_future(); +} + +void BtreeLBAManager::register_metrics() +{ + LOG_PREFIX(BtreeLBAManager::register_metrics); + DEBUG("start"); + stats = {}; + namespace sm = seastar::metrics; + metrics.add_group( + "LBA", + { + sm::make_counter( + "alloc_extents", + stats.num_alloc_extents, + sm::description("total number of lba alloc_extent operations") + ), + sm::make_counter( + "alloc_extents_iter_nexts", + stats.num_alloc_extents_iter_nexts, + sm::description("total number of iterator next operations during extent allocation") + ), + sm::make_counter( + "refresh_parent_total", + stats.num_refresh_parent_total, + sm::description("total number of refreshed cursors") + ), + sm::make_counter( + "refresh_invalid_parent", + stats.num_refresh_invalid_parent, + sm::description("total number of refreshed cursors with invalid parents") + ), + sm::make_counter( + "refresh_unviewable_parent", + stats.num_refresh_unviewable_parent, + sm::description("total number of refreshed cursors with unviewable parents") + ), + sm::make_counter( + "refresh_modified_viewable_parent", + stats.num_refresh_modified_viewable_parent, + sm::description("total number of refreshed cursors with viewable but modified parents") + ), + } + ); +} + +BtreeLBAManager::_decref_intermediate_ret +BtreeLBAManager::_decref_intermediate( + Transaction &t, + laddr_t addr, + extent_len_t len) +{ + auto c = get_context(t); + return with_btree( + cache, + c, + [c, addr, len](auto &btree) mutable { + return btree.upper_bound_right( + c, addr + ).si_then([&btree, addr, len, c](auto iter) { + ceph_assert(!iter.is_end()); + laddr_t key = iter.get_key(); + ceph_assert(key <= addr); + auto val = iter.get_val(); + ceph_assert(key + val.len >= addr + len); + ceph_assert(val.pladdr.is_paddr()); + ceph_assert(val.refcount >= 1); + val.refcount -= 1; + + LOG_PREFIX(BtreeLBAManager::_decref_intermediate); + TRACET("decreased refcount of intermediate key {} -- {}", + c.trans, key, val); + + if (val.refcount == 0) { + return btree.remove(c, iter + ).si_then([key, val] { + return ref_iertr::make_ready_future< + update_mapping_ret_bare_t>(key, val); + }); + } else { + return btree.update(c, iter, val + ).si_then([c](auto iter) { + return ref_iertr::make_ready_future< + update_mapping_ret_bare_t>(iter.get_cursor(c)); + }); + } + }); + }); +} + +BtreeLBAManager::remap_ret +BtreeLBAManager::remap_mappings( + Transaction &t, + LBAMapping orig_mapping, + std::vector remaps, + std::vector extents) +{ + LOG_PREFIX(BtreeLBAManager::remap_mappings); + struct state_t { + LBAMapping orig_mapping; + std::vector remaps; + std::vector extents; + std::vector alloc_infos; + std::vector ret; + }; + return seastar::do_with( + state_t(std::move(orig_mapping), std::move(remaps), std::move(extents), {}, {}), + [this, &t, FNAME](state_t &state) + { + return update_refcount( + t, state.orig_mapping.get_key(), -1, false + ).si_then([this, &t, &state, FNAME](auto ret) { + // Remapping the shared direct mapping is prohibited, + // the refcount of indirect mapping should always be 1. + ceph_assert(ret.is_removed_mapping()); + + auto orig_laddr = state.orig_mapping.get_key(); + if (!state.orig_mapping.is_indirect()) { + auto &addr = ret.get_removed_mapping().map_value.pladdr; + ceph_assert(addr.is_paddr() && !addr.get_paddr().is_zero()); + return alloc_extents( + t, + (state.remaps.front().offset + orig_laddr).checked_to_laddr(), + std::move(state.extents), + EXTENT_DEFAULT_REF_COUNT + ).si_then([&state](auto ret) { + state.ret = std::move(ret); + return remap_iertr::make_ready_future(); + }); + } + + extent_len_t orig_len = state.orig_mapping.get_length(); + auto intermediate_key = state.orig_mapping.get_intermediate_key(); + ceph_assert(intermediate_key != L_ADDR_NULL); + DEBUGT("remap indirect mapping {}", t, state.orig_mapping); + for (auto &remap : state.remaps) { + DEBUGT("remap 0x{:x}~0x{:x}", t, remap.offset, remap.len); + ceph_assert(remap.len != 0); + ceph_assert(remap.offset + remap.len <= orig_len); + auto remapped_laddr = (orig_laddr + remap.offset) + .checked_to_laddr(); + auto remapped_intermediate_key = (intermediate_key + remap.offset) + .checked_to_laddr(); + state.alloc_infos.emplace_back( + alloc_mapping_info_t::create_indirect( + remapped_laddr, remap.len, remapped_intermediate_key)); + } + + return alloc_sparse_mappings( + t, state.alloc_infos.front().key, state.alloc_infos, + alloc_policy_t::deterministic + ).si_then([&t, &state, this](std::list cursors) { + return seastar::futurize_invoke([&t, &state, this] { + if (state.remaps.size() > 1) { + auto base = state.orig_mapping.get_intermediate_base(); + return update_refcount( + t, base, state.remaps.size() - 1, false + ).si_then([](update_mapping_ret_bare_t ret) { + return ret.take_cursor(); + }); + } else { + return remap_iertr::make_ready_future< + LBACursorRef>(state.orig_mapping.direct_cursor->duplicate()); + } + }).si_then([&state, cursors=std::move(cursors)](auto direct) mutable { + for (auto &cursor : cursors) { + state.ret.emplace_back(LBAMapping::create_indirect( + direct->duplicate(), std::move(cursor))); + } + return remap_iertr::make_ready_future(); + }); + }); + }).si_then([&state] { + assert(state.ret.size() == state.remaps.size()); +#ifndef NDEBUG + auto mapping_it = state.ret.begin(); + auto remap_it = state.remaps.begin(); + for (;mapping_it != state.ret.end(); mapping_it++, remap_it++) { + auto &mapping = *mapping_it; + auto &remap = *remap_it; + assert(mapping.get_key() == state.orig_mapping.get_key() + remap.offset); + assert(mapping.get_length() == remap.len); + } +#endif + return remap_iertr::make_ready_future< + std::vector>(std::move(state.ret)); + }); + }); +} + +BtreeLBAManager::update_refcount_ret +BtreeLBAManager::update_refcount( + Transaction &t, + laddr_t addr, + int delta, + bool cascade_remove) +{ + LOG_PREFIX(BtreeLBAManager::update_refcount); + TRACET("laddr={}, delta={}", t, addr, delta); + return _update_mapping( + t, + addr, + [delta](const lba_map_val_t &in) { + lba_map_val_t out = in; + ceph_assert((int)out.refcount + delta >= 0); + out.refcount += delta; + return out; + }, + nullptr + ).si_then([&t, addr, delta, FNAME, this, cascade_remove](auto res) { + DEBUGT("laddr={}, delta={} done -- {}", + t, addr, delta, + res.is_alive_mapping() + ? res.get_cursor().val + : res.get_removed_mapping().map_value); + if (res.is_removed_mapping() && cascade_remove && + res.get_removed_mapping().map_value.pladdr.is_laddr()) { + auto &val = res.get_removed_mapping().map_value; + TRACET("decref intermediate {} -> {}", + t, addr, val.pladdr.get_laddr()); + return _decref_intermediate(t, val.pladdr.get_laddr(), val.len + ).handle_error_interruptible( + update_mapping_iertr::pass_further{}, + crimson::ct_error::assert_all{ + "unexpect ENOENT" + } + ); + } + return update_mapping_iertr::make_ready_future< + update_mapping_ret_bare_t>(std::move(res)); + }); +} + +BtreeLBAManager::_update_mapping_ret +BtreeLBAManager::_update_mapping( + Transaction &t, + laddr_t addr, + update_func_t &&f, + LogicalChildNode* nextent) +{ + auto c = get_context(t); + return with_btree( + cache, + c, + [f=std::move(f), c, addr, nextent](auto &btree) mutable { + return btree.lower_bound( + c, addr + ).si_then([&btree, f=std::move(f), c, addr, nextent](auto iter) + -> _update_mapping_ret { + if (iter.is_end() || iter.get_key() != addr) { + LOG_PREFIX(BtreeLBAManager::_update_mapping); + ERRORT("laddr={} doesn't exist", c.trans, addr); + return crimson::ct_error::enoent::make(); + } + + auto ret = f(iter.get_val()); + if (ret.refcount == 0) { + assert(nextent == nullptr); + return btree.remove( + c, + iter + ).si_then([addr, ret] { + return update_mapping_ret_bare_t(addr, ret); + }); + } else { + return btree.update( + c, + iter, + ret + ).si_then([c, nextent](auto iter) { + if (nextent) { + // nextent is provided iff unlinked, + // also see TM::rewrite_logical_extent() + assert(!nextent->has_parent_tracker()); + iter.get_leaf_node()->update_child_ptr( + iter.get_leaf_pos(), nextent); + } + assert(!nextent || + (nextent->has_parent_tracker() && + nextent->get_parent_node().get() == iter.get_leaf_node().get())); + return update_mapping_ret_bare_t(iter.get_cursor(c)); + }); + } + }); + }); +} + +} diff --git a/src/crimson/os/seastore/lba/btree_lba_manager.h b/src/crimson/os/seastore/lba/btree_lba_manager.h new file mode 100644 index 00000000000..aef7325a370 --- /dev/null +++ b/src/crimson/os/seastore/lba/btree_lba_manager.h @@ -0,0 +1,532 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include + +#include +#include +#include + +#include "include/ceph_assert.h" +#include "include/buffer_fwd.h" +#include "include/interval_set.h" +#include "common/interval_map.h" +#include "crimson/osd/exceptions.h" + +#include "crimson/os/seastore/btree/fixed_kv_btree.h" +#include "crimson/os/seastore/seastore_types.h" +#include "crimson/os/seastore/lba_manager.h" +#include "crimson/os/seastore/cache.h" + +#include "crimson/os/seastore/lba/lba_btree_node.h" +#include "crimson/os/seastore/btree/btree_types.h" + +namespace crimson::os::seastore { +class LogicalCachedExtent; +} + +namespace crimson::os::seastore::lba { + +using LBABtree = FixedKVBtree< + laddr_t, lba_map_val_t, LBAInternalNode, + LBALeafNode, LBACursor, LBA_BLOCK_SIZE>; + +/** + * BtreeLBAManager + * + * Uses a wandering btree to track two things: + * 1) lba state including laddr_t -> paddr_t mapping + * 2) reverse paddr_t -> laddr_t mapping for gc (TODO) + * + * Generally, any transaction will involve + * 1) deltas against lba tree nodes + * 2) new lba tree nodes + * - Note, there must necessarily be a delta linking + * these new nodes into the tree -- might be a + * bootstrap_state_t delta if new root + * + * get_mappings, alloc_extent_*, etc populate a Transaction + * which then gets submitted + */ +class BtreeLBAManager : public LBAManager { +public: + BtreeLBAManager(Cache &cache) + : cache(cache) + { + register_metrics(); + } + + mkfs_ret mkfs( + Transaction &t) final; + + get_mappings_ret get_mappings( + Transaction &t, + laddr_t offset, extent_len_t length) final; + + get_mapping_ret get_mapping( + Transaction &t, + laddr_t offset) final; + + alloc_extent_ret reserve_region( + Transaction &t, + laddr_t hint, + extent_len_t len) final + { + std::vector alloc_infos = { + alloc_mapping_info_t::create_zero(len)}; + return seastar::do_with( + std::move(alloc_infos), + [&t, hint, this](auto &alloc_infos) { + return alloc_contiguous_mappings( + t, hint, alloc_infos, alloc_policy_t::linear_search + ).si_then([](auto cursors) { + assert(cursors.size() == 1); + return LBAMapping::create_direct(std::move(cursors.front())); + }); + }); + } + + alloc_extent_ret clone_mapping( + Transaction &t, + laddr_t laddr, + extent_len_t len, + laddr_t intermediate_key, + laddr_t intermediate_base) final + { + std::vector alloc_infos = { + alloc_mapping_info_t::create_indirect( + laddr, len, intermediate_key)}; + return seastar::do_with( + std::move(alloc_infos), + [this, &t, laddr, intermediate_base](auto &infos) { + return alloc_sparse_mappings( + t, laddr, infos, alloc_policy_t::deterministic + ).si_then([this, &t, intermediate_base](auto cursors) { + ceph_assert(cursors.size() == 1); + ceph_assert(cursors.front()->is_indirect()); + return update_refcount(t, intermediate_base, 1, false + ).si_then([cursors=std::move(cursors)](auto p) mutable { + assert(p.is_alive_mapping()); + auto mapping = LBAMapping::create_indirect( + p.take_cursor(), std::move(cursors.front())); + ceph_assert(mapping.is_stable()); + return alloc_extent_iertr::make_ready_future< + LBAMapping>(std::move(mapping)); + }); + }); + }).handle_error_interruptible( + crimson::ct_error::input_output_error::pass_further{}, + crimson::ct_error::assert_all{"unexpect enoent"}); + } + + alloc_extent_ret alloc_extent( + Transaction &t, + laddr_t hint, + LogicalChildNode &ext, + extent_ref_count_t refcount) final + { + // The real checksum will be updated upon transaction commit + assert(ext.get_last_committed_crc() == 0); + assert(!ext.has_laddr()); + std::vector alloc_infos = { + alloc_mapping_info_t::create_direct( + L_ADDR_NULL, + ext.get_length(), + ext.get_paddr(), + refcount, + ext.get_last_committed_crc(), + ext)}; + return seastar::do_with( + std::move(alloc_infos), + [this, &t, hint](auto &alloc_infos) { + return alloc_contiguous_mappings( + t, hint, alloc_infos, alloc_policy_t::linear_search + ).si_then([](auto cursors) { + assert(cursors.size() == 1); + return LBAMapping::create_direct(std::move(cursors.front())); + }); + }); + } + + alloc_extents_ret alloc_extents( + Transaction &t, + laddr_t hint, + std::vector extents, + extent_ref_count_t refcount) final + { + std::vector alloc_infos; + assert(!extents.empty()); + auto has_laddr = extents.front()->has_laddr(); + for (auto &extent : extents) { + assert(extent); + assert(extent->has_laddr() == has_laddr); + alloc_infos.emplace_back( + alloc_mapping_info_t::create_direct( + extent->has_laddr() ? extent->get_laddr() : L_ADDR_NULL, + extent->get_length(), + extent->get_paddr(), + refcount, + extent->get_last_committed_crc(), + *extent)); + } + return seastar::do_with( + std::move(alloc_infos), + [this, &t, hint, has_laddr](auto &alloc_infos) + { + if (has_laddr) { + return alloc_sparse_mappings( + t, hint, alloc_infos, alloc_policy_t::deterministic) +#ifndef NDEBUG + .si_then([&alloc_infos](std::list cursors) { + assert(alloc_infos.size() == cursors.size()); + auto info_p = alloc_infos.begin(); + auto cursor_p = cursors.begin(); + for (; info_p != alloc_infos.end(); info_p++, cursor_p++) { + auto &cursor = *cursor_p; + assert(cursor->get_laddr() == info_p->key); + } + return alloc_extent_iertr::make_ready_future< + std::list>(std::move(cursors)); + }) +#endif + ; + } else { + return alloc_contiguous_mappings( + t, hint, alloc_infos, alloc_policy_t::linear_search); + } + }).si_then([](std::list cursors) { + std::vector ret; + for (auto &cursor : cursors) { + ret.emplace_back(LBAMapping::create_direct(std::move(cursor))); + } + return ret; + }); + } + + ref_ret remove_mapping( + Transaction &t, + laddr_t addr) final { + return update_refcount(t, addr, -1, true + ).si_then([](auto res) { + return ref_update_result_t(res); + }); + } + + remap_ret remap_mappings( + Transaction &t, + LBAMapping orig_mapping, + std::vector remaps, + std::vector extents) final; + + /** + * init_cached_extent + * + * Checks whether e is live (reachable from lba tree) and drops or initializes + * accordingly. + * + * Returns if e is live. + */ + init_cached_extent_ret init_cached_extent( + Transaction &t, + CachedExtentRef e) final; + +#ifdef UNIT_TESTS_BUILT + check_child_trackers_ret check_child_trackers(Transaction &t) final; +#endif + + scan_mappings_ret scan_mappings( + Transaction &t, + laddr_t begin, + laddr_t end, + scan_mappings_func_t &&f) final; + + rewrite_extent_ret rewrite_extent( + Transaction &t, + CachedExtentRef extent) final; + + update_mapping_ret update_mapping( + Transaction& t, + laddr_t laddr, + extent_len_t prev_len, + paddr_t prev_addr, + LogicalChildNode&) final; + + update_mappings_ret update_mappings( + Transaction& t, + const std::list& extents); + + get_physical_extent_if_live_ret get_physical_extent_if_live( + Transaction &t, + extent_types_t type, + paddr_t addr, + laddr_t laddr, + extent_len_t len) final; + + refresh_lba_mapping_ret refresh_lba_mapping( + Transaction &t, + LBAMapping mapping) final; + +private: + Cache &cache; + + struct { + uint64_t num_alloc_extents = 0; + uint64_t num_alloc_extents_iter_nexts = 0; + uint64_t num_refresh_parent_total = 0; + uint64_t num_refresh_invalid_parent = 0; + uint64_t num_refresh_unviewable_parent = 0; + uint64_t num_refresh_modified_viewable_parent = 0; + } stats; + + struct alloc_mapping_info_t { + laddr_t key = L_ADDR_NULL; // once assigned, the allocation to + // key must be exact and successful + lba_map_val_t value; + LogicalChildNode* extent = nullptr; + + static alloc_mapping_info_t create_zero(extent_len_t len) { + return { + L_ADDR_NULL, + { + len, + pladdr_t(P_ADDR_ZERO), + EXTENT_DEFAULT_REF_COUNT, + 0 + }, + static_cast(get_reserved_ptr())}; + } + static alloc_mapping_info_t create_indirect( + laddr_t laddr, + extent_len_t len, + laddr_t intermediate_key) { + return { + laddr, + { + len, + pladdr_t(intermediate_key), + EXTENT_DEFAULT_REF_COUNT, + 0 // crc will only be used and checked with LBA direct mappings + // also see pin_to_extent(_by_type) + }, + static_cast(get_reserved_ptr())}; + } + static alloc_mapping_info_t create_direct( + laddr_t laddr, + extent_len_t len, + paddr_t paddr, + extent_ref_count_t refcount, + checksum_t checksum, + LogicalChildNode& extent) { + return {laddr, {len, pladdr_t(paddr), refcount, checksum}, &extent}; + } + }; + + op_context_t get_context(Transaction &t) { + return op_context_t{cache, t}; + } + + seastar::metrics::metric_group metrics; + void register_metrics(); + + struct update_mapping_ret_bare_t { + update_mapping_ret_bare_t() + : update_mapping_ret_bare_t(LBACursorRef(nullptr)) {} + + update_mapping_ret_bare_t(LBACursorRef cursor) + : ret(std::move(cursor)) {} + + update_mapping_ret_bare_t(laddr_t laddr, lba_map_val_t value) + : ret(removed_mapping_t{laddr, value}) {} + + struct removed_mapping_t { + laddr_t laddr; + lba_map_val_t map_value; + }; + std::variant ret; + + bool is_removed_mapping() const { + return ret.index() == 0; + } + + bool is_alive_mapping() const { + if (ret.index() == 1) { + assert(std::get<1>(ret)); + return true; + } else { + return false; + } + } + + const removed_mapping_t& get_removed_mapping() const { + assert(is_removed_mapping()); + return std::get<0>(ret); + } + + const LBACursor& get_cursor() const { + assert(is_alive_mapping()); + return *std::get<1>(ret); + } + + LBACursorRef take_cursor() { + assert(is_alive_mapping()); + return std::move(std::get<1>(ret)); + } + + explicit operator ref_update_result_t() const { + if (is_removed_mapping()) { + auto v = get_removed_mapping(); + auto &val = v.map_value; + ceph_assert(val.pladdr.is_paddr()); + return {v.laddr, val.refcount, val.pladdr, val.len}; + } else { + assert(is_alive_mapping()); + auto &c = get_cursor(); + assert(c.val); + ceph_assert(!c.is_indirect()); + return {c.get_laddr(), c.val->refcount, c.val->pladdr, c.val->len}; + } + } + }; + + using update_refcount_iertr = ref_iertr; + using update_refcount_ret = update_refcount_iertr::future< + update_mapping_ret_bare_t>; + update_refcount_ret update_refcount( + Transaction &t, + laddr_t addr, + int delta, + bool cascade_remove); + + /** + * _update_mapping + * + * Updates mapping, removes if f returns nullopt + */ + using _update_mapping_iertr = ref_iertr; + using _update_mapping_ret = ref_iertr::future< + update_mapping_ret_bare_t>; + using update_func_t = std::function< + lba_map_val_t(const lba_map_val_t &v) + >; + _update_mapping_ret _update_mapping( + Transaction &t, + laddr_t addr, + update_func_t &&f, + LogicalChildNode*); + + struct insert_position_t { + laddr_t laddr; + LBABtree::iterator insert_iter; + }; + enum class alloc_policy_t { + deterministic, // no conflict + linear_search, + }; + using search_insert_position_iertr = base_iertr; + using search_insert_position_ret = + search_insert_position_iertr::future; + search_insert_position_ret search_insert_position( + op_context_t c, + LBABtree &btree, + laddr_t hint, + extent_len_t length, + alloc_policy_t policy); + + using alloc_mappings_iertr = base_iertr; + using alloc_mappings_ret = + alloc_mappings_iertr::future>; + /** + * alloc_contiguous_mappings + * + * Insert a range of contiguous mappings into the LBA btree. + * + * hint is a non-null laddr hint for allocation. All alloc_infos' key + * should be L_ADDR_NULL, the final laddr is relative to the allocated + * laddr based on preceding mappings' total length. + */ + alloc_mappings_ret alloc_contiguous_mappings( + Transaction &t, + laddr_t hint, + std::vector &alloc_infos, + alloc_policy_t policy); + + /** + * alloc_sparse_mappings + * + * Insert a range of sparse mappings into the LBA btree. + * + * hint is a non-null laddr hint for allocation. All of alloc_infos' key + * are non-null laddr hints and must be incremental, each mapping's final + * laddr maintains same offset to allocated laddr as original to hint. + */ + alloc_mappings_ret alloc_sparse_mappings( + Transaction &t, + laddr_t hint, + std::vector &alloc_infos, + alloc_policy_t policy); + + /** + * insert_mappings + * + * Insert all lba mappings built from alloc_infos into LBA btree before + * iter and return the inserted LBACursors. + * + * NOTE: There is no guarantee that the returned cursors are all valid + * since the successive insertion is possible to invalidate the parent + * extent of predecessively returned LBACursor. + */ + alloc_mappings_ret insert_mappings( + op_context_t c, + LBABtree &btree, + LBABtree::iterator iter, + std::vector &alloc_infos); + + ref_ret _incref_extent( + Transaction &t, + laddr_t addr, + int delta) { + ceph_assert(delta > 0); + return update_refcount(t, addr, delta, false + ).si_then([](auto res) { + return ref_update_result_t(res); + }); + } + + using _get_cursor_ret = get_mapping_iertr::future; + _get_cursor_ret get_cursor( + op_context_t c, + LBABtree& btree, + laddr_t offset); + + using _get_cursors_ret = get_mappings_iertr::future>; + _get_cursors_ret get_cursors( + op_context_t c, + LBABtree& btree, + laddr_t offset, + extent_len_t length); + + using resolve_indirect_cursor_ret = get_mappings_iertr::future; + resolve_indirect_cursor_ret resolve_indirect_cursor( + op_context_t c, + LBABtree& btree, + const LBACursor& indirect_cursor); + + using _decref_intermediate_ret = ref_iertr::future< + update_mapping_ret_bare_t>; + _decref_intermediate_ret _decref_intermediate( + Transaction &t, + laddr_t addr, + extent_len_t len); + + using refresh_lba_cursor_iertr = base_iertr; + using refresh_lba_cursor_ret = refresh_lba_cursor_iertr::future<>; + refresh_lba_cursor_ret refresh_lba_cursor( + op_context_t c, + LBABtree &btree, + LBACursor &cursor); +}; +using BtreeLBAManagerRef = std::unique_ptr; + +} diff --git a/src/crimson/os/seastore/lba/lba_btree_node.cc b/src/crimson/os/seastore/lba/lba_btree_node.cc new file mode 100644 index 00000000000..5ee0db2a138 --- /dev/null +++ b/src/crimson/os/seastore/lba/lba_btree_node.cc @@ -0,0 +1,87 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include +#include + +#include +#include + +#include "include/buffer.h" +#include "include/byteorder.h" + +#include "crimson/os/seastore/lba/btree_lba_manager.h" +#include "crimson/os/seastore/logging.h" +#include "crimson/os/seastore/logical_child_node.h" + +SET_SUBSYS(seastore_lba); + +namespace crimson::os::seastore::lba { + +std::ostream &LBALeafNode::print_detail(std::ostream &out) const +{ + out << ", size=" << this->get_size() + << ", meta=" << this->get_meta() + << ", modifications=" << this->modifications + << ", my_tracker=" << (void*)this->my_tracker; + if (this->my_tracker) { + out << ", my_tracker->parent=" << (void*)this->my_tracker->get_parent().get(); + } + return out << ", root_block=" << (void*)this->parent_of_root.get(); +} + +void LBALeafNode::resolve_relative_addrs(paddr_t base) +{ + LOG_PREFIX(LBALeafNode::resolve_relative_addrs); + for (auto i: *this) { + auto val = i->get_val(); + if (val.pladdr.is_paddr() && + val.pladdr.get_paddr().is_relative()) { + val.pladdr = base.add_relative(val.pladdr.get_paddr()); + TRACE("{} -> {}", i->get_val().pladdr, val.pladdr); + i->set_val(val); + } + } +} + +void LBALeafNode::update( + internal_const_iterator_t iter, + lba_map_val_t val) +{ + LOG_PREFIX(LBALeafNode::update); + SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}", + this->pending_for_transaction, + iter.get_offset()); + this->on_modify(); + if (val.pladdr.is_paddr()) { + val.pladdr = maybe_generate_relative(val.pladdr.get_paddr()); + } + return this->journal_update( + iter, + val, + this->maybe_get_delta_buffer()); +} + +LBALeafNode::internal_const_iterator_t LBALeafNode::insert( + internal_const_iterator_t iter, + laddr_t addr, + lba_map_val_t val) +{ + LOG_PREFIX(LBALeafNode::insert); + SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}", + this->pending_for_transaction, + iter.get_offset(), + addr); + this->on_modify(); + if (val.pladdr.is_paddr()) { + val.pladdr = maybe_generate_relative(val.pladdr.get_paddr()); + } + this->journal_insert( + iter, + addr, + val, + this->maybe_get_delta_buffer()); + return iter; +} + +} diff --git a/src/crimson/os/seastore/lba/lba_btree_node.h b/src/crimson/os/seastore/lba/lba_btree_node.h new file mode 100644 index 00000000000..05f26b6b292 --- /dev/null +++ b/src/crimson/os/seastore/lba/lba_btree_node.h @@ -0,0 +1,291 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include +#include +#include + + +#include "include/buffer.h" + +#include "crimson/common/fixed_kv_node_layout.h" +#include "crimson/common/errorator.h" +#include "crimson/os/seastore/seastore_types.h" +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/cached_extent.h" + +#include "crimson/os/seastore/btree/btree_types.h" +#include "crimson/os/seastore/btree/fixed_kv_btree.h" +#include "crimson/os/seastore/btree/fixed_kv_node.h" + +namespace crimson::os::seastore { +class LogicalChildNode; +} + +namespace crimson::os::seastore::lba { + +using base_iertr = Cache::base_iertr; +using LBANode = FixedKVNode; + +class BtreeLBAMapping; + +constexpr size_t LBA_BLOCK_SIZE = 4096; + +using lba_node_meta_t = fixed_kv_node_meta_t; + +using lba_node_meta_le_t = fixed_kv_node_meta_le_t; + +/** + * LBAInternalNode + * + * Abstracts operations on and layout of internal nodes for the + * LBA Tree. + * + * Layout (4KiB): + * checksum : ceph_le32[1] 4B + * size : ceph_le32[1] 4B + * meta : lba_node_meta_le_t[1] 20B + * keys : laddr_le_t[CAPACITY] (254*8)B + * values : paddr_le_t[CAPACITY] (254*8)B + * = 4092B + + * TODO: make the above capacity calculation part of FixedKVNodeLayout + * TODO: the above alignment probably isn't portable without further work + */ +constexpr size_t INTERNAL_NODE_CAPACITY = 254; +struct LBAInternalNode + : FixedKVInternalNode< + INTERNAL_NODE_CAPACITY, + laddr_t, laddr_le_t, + LBA_BLOCK_SIZE, + LBAInternalNode> { + static_assert( + check_capacity(LBA_BLOCK_SIZE), + "INTERNAL_NODE_CAPACITY doesn't fit in LBA_BLOCK_SIZE"); + using Ref = TCachedExtentRef; + using internal_iterator_t = const_iterator; + using key_type = laddr_t; + template + LBAInternalNode(T&&... t) : + FixedKVInternalNode(std::forward(t)...) {} + static constexpr uint32_t CHILD_VEC_UNIT = 0; + + static constexpr extent_types_t TYPE = extent_types_t::LADDR_INTERNAL; + + extent_types_t get_type() const final { + return TYPE; + } +}; +using LBAInternalNodeRef = LBAInternalNode::Ref; + +/** + * LBALeafNode + * + * Abstracts operations on and layout of leaf nodes for the + * LBA Tree. + * + * Layout (4KiB): + * checksum : ceph_le32[1] 4B + * size : ceph_le32[1] 4B + * meta : lba_node_meta_le_t[1] 20B + * keys : laddr_le_t[CAPACITY] (140*8)B + * values : lba_map_val_le_t[CAPACITY] (140*21)B + * = 4088B + * + * TODO: update FixedKVNodeLayout to handle the above calculation + * TODO: the above alignment probably isn't portable without further work + */ +constexpr size_t LEAF_NODE_CAPACITY = 140; + +struct LBALeafNode + : FixedKVLeafNode< + LEAF_NODE_CAPACITY, + laddr_t, laddr_le_t, + lba_map_val_t, lba_map_val_le_t, + LBA_BLOCK_SIZE, + LBAInternalNode, + LBALeafNode>, + ParentNode { + static_assert( + check_capacity(LBA_BLOCK_SIZE), + "LEAF_NODE_CAPACITY doesn't fit in LBA_BLOCK_SIZE"); + using Ref = TCachedExtentRef; + using parent_type_t = FixedKVLeafNode< + LEAF_NODE_CAPACITY, + laddr_t, laddr_le_t, + lba_map_val_t, lba_map_val_le_t, + LBA_BLOCK_SIZE, + LBAInternalNode, + LBALeafNode>; + using internal_const_iterator_t = + typename parent_type_t::node_layout_t::const_iterator; + using internal_iterator_t = + typename parent_type_t::node_layout_t::iterator; + using key_type = laddr_t; + using parent_node_t = ParentNode; + using child_t = LogicalChildNode; + static constexpr uint32_t CHILD_VEC_UNIT = 0; + LBALeafNode(ceph::bufferptr &&ptr) + : parent_type_t(std::move(ptr)), + parent_node_t(LEAF_NODE_CAPACITY) {} + explicit LBALeafNode(extent_len_t length) + : parent_type_t(length), + parent_node_t(LEAF_NODE_CAPACITY) {} + LBALeafNode(const LBALeafNode &rhs) + : parent_type_t(rhs), + parent_node_t(rhs) {} + + static constexpr extent_types_t TYPE = extent_types_t::LADDR_LEAF; + + void update( + internal_const_iterator_t iter, + lba_map_val_t val) final; + + internal_const_iterator_t insert( + internal_const_iterator_t iter, + laddr_t addr, + lba_map_val_t val) final; + + void remove(internal_const_iterator_t iter) final { + LOG_PREFIX(LBALeafNode::remove); + SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}", + this->pending_for_transaction, + iter.get_offset(), + iter.get_key()); + assert(iter != this->end()); + this->on_modify(); + this->remove_child_ptr(iter.get_offset()); + return this->journal_remove( + iter, + this->maybe_get_delta_buffer()); + } + + // See LBAInternalNode, same concept + void resolve_relative_addrs(paddr_t base) final; + void node_resolve_vals( + internal_iterator_t from, + internal_iterator_t to) const final + { + if (this->is_initial_pending()) { + for (auto i = from; i != to; ++i) { + auto val = i->get_val(); + if (val.pladdr.is_paddr() + && val.pladdr.get_paddr().is_relative()) { + assert(val.pladdr.get_paddr().is_block_relative()); + val.pladdr = this->get_paddr().add_relative(val.pladdr.get_paddr()); + i->set_val(val); + } + } + } + } + void node_unresolve_vals( + internal_iterator_t from, + internal_iterator_t to) const final + { + if (this->is_initial_pending()) { + for (auto i = from; i != to; ++i) { + auto val = i->get_val(); + if (val.pladdr.is_paddr() + && val.pladdr.get_paddr().is_relative()) { + assert(val.pladdr.get_paddr().is_record_relative()); + val.pladdr = val.pladdr.get_paddr().block_relative_to(this->get_paddr()); + i->set_val(val); + } + } + } + } + + extent_types_t get_type() const final { + return TYPE; + } + + void do_on_rewrite(Transaction &t, CachedExtent &extent) final { + this->parent_node_t::on_rewrite(t, static_cast(extent)); + } + + void do_on_replace_prior() final { + this->parent_node_t::on_replace_prior(); + } + + void do_prepare_commit() final { + this->parent_node_t::prepare_commit(); + } + + bool is_child_stable( + op_context_t c, + uint16_t pos, + laddr_t key) const { + return parent_node_t::_is_child_stable(c.trans, c.cache, pos, key); + } + bool is_child_data_stable( + op_context_t c, + uint16_t pos, + laddr_t key) const { + return parent_node_t::_is_child_stable(c.trans, c.cache, pos, key, true); + } + + void on_split( + Transaction &t, + LBALeafNode &left, + LBALeafNode &right) final { + this->split_child_ptrs(t, left, right); + } + void adjust_copy_src_dest_on_split( + Transaction &t, + LBALeafNode &left, + LBALeafNode &right) final { + this->parent_node_t::adjust_copy_src_dest_on_split(t, left, right); + } + + void on_merge( + Transaction &t, + LBALeafNode &left, + LBALeafNode &right) final { + this->merge_child_ptrs(t, left, right); + } + void adjust_copy_src_dest_on_merge( + Transaction &t, + LBALeafNode &left, + LBALeafNode &right) final { + this->parent_node_t::adjust_copy_src_dest_on_merge(t, left, right); + } + + void on_balance( + Transaction &t, + LBALeafNode &left, + LBALeafNode &right, + uint32_t pivot_idx, + LBALeafNode &replacement_left, + LBALeafNode &replacement_right) final { + this->balance_child_ptrs( + t, left, right, pivot_idx, replacement_left, replacement_right); + } + void adjust_copy_src_dest_on_balance( + Transaction &t, + LBALeafNode &left, + LBALeafNode &right, + uint32_t pivot_idx, + LBALeafNode &replacement_left, + LBALeafNode &replacement_right) final { + this->parent_node_t::adjust_copy_src_dest_on_balance( + t, left, right, pivot_idx, replacement_left, replacement_right); + } + + CachedExtentRef duplicate_for_write(Transaction&) final { + return CachedExtentRef(new LBALeafNode(*this)); + } + + std::ostream &print_detail(std::ostream &out) const final; +}; +using LBALeafNodeRef = TCachedExtentRef; + +} + +#if FMT_VERSION >= 90000 +template <> struct fmt::formatter : fmt::ostream_formatter {}; +template <> struct fmt::formatter : fmt::ostream_formatter {}; +template <> struct fmt::formatter : fmt::ostream_formatter {}; +template <> struct fmt::formatter : fmt::ostream_formatter {}; +#endif diff --git a/src/crimson/os/seastore/lba_manager.cc b/src/crimson/os/seastore/lba_manager.cc index 50850a7de56..3ebfb1dddf8 100644 --- a/src/crimson/os/seastore/lba_manager.cc +++ b/src/crimson/os/seastore/lba_manager.cc @@ -2,12 +2,12 @@ // vim: ts=8 sw=2 smarttab #include "crimson/os/seastore/lba_manager.h" -#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h" +#include "crimson/os/seastore/lba/btree_lba_manager.h" namespace crimson::os::seastore { -LBAManagerRef lba_manager::create_lba_manager(Cache &cache) { - return LBAManagerRef(new btree::BtreeLBAManager(cache)); +LBAManagerRef lba::create_lba_manager(Cache &cache) { + return LBAManagerRef(new lba::BtreeLBAManager(cache)); } } diff --git a/src/crimson/os/seastore/lba_manager.h b/src/crimson/os/seastore/lba_manager.h index 286496f3c71..d146a9932ec 100644 --- a/src/crimson/os/seastore/lba_manager.h +++ b/src/crimson/os/seastore/lba_manager.h @@ -241,7 +241,7 @@ public: using LBAManagerRef = std::unique_ptr; class Cache; -namespace lba_manager { +namespace lba { LBAManagerRef create_lba_manager(Cache &cache); } diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc deleted file mode 100644 index 3f0700fd701..00000000000 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc +++ /dev/null @@ -1,1102 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include -#include - -#include - -#include "include/buffer.h" -#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h" -#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" -#include "crimson/os/seastore/logging.h" - -SET_SUBSYS(seastore_lba); -/* - * levels: - * - INFO: mkfs - * - DEBUG: modification operations - * - TRACE: read operations, DEBUG details - */ - -template <> struct fmt::formatter< - crimson::os::seastore::lba_manager::btree::LBABtree::iterator> - : public fmt::formatter -{ - using Iter = crimson::os::seastore::lba_manager::btree::LBABtree::iterator; - - template - auto format(const Iter &iter, FmtCtx &ctx) const - -> decltype(ctx.out()) { - if (iter.is_end()) { - return fmt::format_to(ctx.out(), "end"); - } - return fmt::format_to(ctx.out(), "{}~{}", iter.get_key(), iter.get_val()); - } -}; - -namespace crimson::os::seastore { - -template -Transaction::tree_stats_t& get_tree_stats(Transaction &t) -{ - return t.get_lba_tree_stats(); -} - -template Transaction::tree_stats_t& -get_tree_stats< - crimson::os::seastore::lba_manager::btree::LBABtree>( - Transaction &t); - -template -phy_tree_root_t& get_phy_tree_root(root_t &r) -{ - return r.lba_root; -} - -template phy_tree_root_t& -get_phy_tree_root< - crimson::os::seastore::lba_manager::btree::LBABtree>(root_t &r); - -template <> -const get_phy_tree_root_node_ret get_phy_tree_root_node< - crimson::os::seastore::lba_manager::btree::LBABtree>( - const RootBlockRef &root_block, op_context_t c) -{ - auto lba_root = root_block->lba_root_node; - if (lba_root) { - ceph_assert(lba_root->is_initial_pending() - == root_block->is_pending()); - return {true, - c.cache.get_extent_viewable_by_trans(c.trans, lba_root)}; - } else if (root_block->is_pending()) { - auto &prior = static_cast(*root_block->get_prior_instance()); - lba_root = prior.lba_root_node; - if (lba_root) { - return {true, - c.cache.get_extent_viewable_by_trans(c.trans, lba_root)}; - } else { - return {false, - Cache::get_extent_iertr::make_ready_future()}; - } - } else { - return {false, - Cache::get_extent_iertr::make_ready_future()}; - } -} - -template -class TreeRootLinker { -public: - static void link_root(RootBlockRef &root_block, RootT* lba_root) { - root_block->lba_root_node = lba_root; - ceph_assert(lba_root != nullptr); - lba_root->parent_of_root = root_block; - } - static void unlink_root(RootBlockRef &root_block) { - root_block->lba_root_node = nullptr; - } -}; - -template class TreeRootLinker; -template class TreeRootLinker; - -} - -namespace crimson::os::seastore::lba_manager::btree { - -BtreeLBAManager::mkfs_ret -BtreeLBAManager::mkfs( - Transaction &t) -{ - LOG_PREFIX(BtreeLBAManager::mkfs); - INFOT("start", t); - return cache.get_root(t).si_then([this, &t](auto croot) { - assert(croot->is_mutation_pending()); - croot->get_root().lba_root = LBABtree::mkfs(croot, get_context(t)); - return mkfs_iertr::now(); - }).handle_error_interruptible( - mkfs_iertr::pass_further{}, - crimson::ct_error::assert_all{ - "Invalid error in BtreeLBAManager::mkfs" - } - ); -} - -BtreeLBAManager::get_mappings_ret -BtreeLBAManager::get_mappings( - Transaction &t, - laddr_t laddr, - extent_len_t length) -{ - LOG_PREFIX(BtreeLBAManager::get_mappings); - TRACET("{}~0x{:x} ...", t, laddr, length); - auto c = get_context(t); - return with_btree_state( - cache, c, - [FNAME, this, c, laddr, length](auto& btree, auto& ret) - { - return get_cursors(c, btree, laddr, length - ).si_then([FNAME, this, c, laddr, length, &btree, &ret](auto cursors) { - return seastar::do_with( - std::move(cursors), - [FNAME, this, c, laddr, length, &btree, &ret](auto& cursors) - { - return trans_intr::do_for_each( - cursors, - [FNAME, this, c, laddr, length, &btree, &ret](auto& cursor) - { - if (!cursor->is_indirect()) { - ret.emplace_back(LBAMapping::create_direct(std::move(cursor))); - TRACET("{}~0x{:x} got {}", - c.trans, laddr, length, ret.back()); - return get_mappings_iertr::now(); - } - assert(cursor->val->refcount == EXTENT_DEFAULT_REF_COUNT); - assert(cursor->val->checksum == 0); - return resolve_indirect_cursor(c, btree, *cursor - ).si_then([FNAME, c, &ret, &cursor, laddr, length](auto direct) { - ret.emplace_back(LBAMapping::create_indirect( - std::move(direct), std::move(cursor))); - TRACET("{}~0x{:x} got {}", - c.trans, laddr, length, ret.back()); - return get_mappings_iertr::now(); - }); - }); - }); - }); - }); -} - -BtreeLBAManager::_get_cursors_ret -BtreeLBAManager::get_cursors( - op_context_t c, - LBABtree& btree, - laddr_t laddr, - extent_len_t length) -{ - LOG_PREFIX(BtreeLBAManager::get_cursors); - TRACET("{}~0x{:x} ...", c.trans, laddr, length); - return seastar::do_with( - std::list(), - [FNAME, c, laddr, length, &btree](auto& ret) - { - return LBABtree::iterate_repeat( - c, - btree.upper_bound_right(c, laddr), - [FNAME, c, laddr, length, &ret](auto& pos) - { - if (pos.is_end() || pos.get_key() >= (laddr + length)) { - TRACET("{}~0x{:x} done with {} results, stop at {}", - c.trans, laddr, length, ret.size(), pos); - return LBABtree::iterate_repeat_ret_inner( - interruptible::ready_future_marker{}, - seastar::stop_iteration::yes); - } - TRACET("{}~0x{:x} got {}, repeat ...", - c.trans, laddr, length, pos); - ceph_assert((pos.get_key() + pos.get_val().len) > laddr); - ret.emplace_back(pos.get_cursor(c)); - return LBABtree::iterate_repeat_ret_inner( - interruptible::ready_future_marker{}, - seastar::stop_iteration::no); - }).si_then([&ret] { - return std::move(ret); - }); - }); -} - -BtreeLBAManager::resolve_indirect_cursor_ret -BtreeLBAManager::resolve_indirect_cursor( - op_context_t c, - LBABtree& btree, - const LBACursor &indirect_cursor) -{ - ceph_assert(indirect_cursor.is_indirect()); - return get_cursors( - c, - btree, - indirect_cursor.get_intermediate_key(), - indirect_cursor.get_length() - ).si_then([&indirect_cursor](auto cursors) { - ceph_assert(cursors.size() == 1); - auto& direct_cursor = cursors.front(); - auto intermediate_key = indirect_cursor.get_intermediate_key(); - assert(!direct_cursor->is_indirect()); - assert(direct_cursor->get_laddr() <= intermediate_key); - assert(direct_cursor->get_laddr() + direct_cursor->get_length() - >= intermediate_key + indirect_cursor.get_length()); - return std::move(direct_cursor); - }); -} - -BtreeLBAManager::get_mapping_ret -BtreeLBAManager::get_mapping( - Transaction &t, - laddr_t laddr) -{ - LOG_PREFIX(BtreeLBAManager::get_mapping); - TRACET("{} ...", t, laddr); - auto c = get_context(t); - return with_btree( - cache, c, - [FNAME, this, c, laddr](auto& btree) - { - return get_cursor(c, btree, laddr - ).si_then([FNAME, this, c, laddr, &btree](LBACursorRef cursor) { - if (!cursor->is_indirect()) { - TRACET("{} got direct cursor {}", - c.trans, laddr, *cursor); - auto mapping = LBAMapping::create_direct(std::move(cursor)); - return get_mapping_iertr::make_ready_future< - LBAMapping>(std::move(mapping)); - } - assert(laddr == cursor->get_laddr()); - assert(cursor->val->refcount == EXTENT_DEFAULT_REF_COUNT); - assert(cursor->val->checksum == 0); - return resolve_indirect_cursor(c, btree, *cursor - ).si_then([FNAME, c, laddr, indirect=std::move(cursor)] - (auto direct) mutable { - auto mapping = LBAMapping::create_indirect( - std::move(direct), std::move(indirect)); - TRACET("{} got indirect mapping {}", - c.trans, laddr, mapping); - return get_mapping_iertr::make_ready_future< - LBAMapping>(std::move(mapping)); - }); - }); - }); -} - -BtreeLBAManager::_get_cursor_ret -BtreeLBAManager::get_cursor( - op_context_t c, - LBABtree& btree, - laddr_t laddr) -{ - LOG_PREFIX(BtreeLBAManager::get_cursor); - TRACET("{} ...", c.trans, laddr); - return btree.lower_bound( - c, laddr - ).si_then([FNAME, c, laddr](auto iter) -> _get_cursor_ret { - if (iter.is_end() || iter.get_key() != laddr) { - ERRORT("{} doesn't exist", c.trans, laddr); - return crimson::ct_error::enoent::make(); - } - TRACET("{} got value {}", c.trans, laddr, iter.get_val()); - return _get_cursor_ret( - interruptible::ready_future_marker{}, - iter.get_cursor(c)); - }); -} - -BtreeLBAManager::search_insert_position_ret -BtreeLBAManager::search_insert_position( - op_context_t c, - LBABtree &btree, - laddr_t hint, - extent_len_t length, - alloc_policy_t policy) -{ - LOG_PREFIX(BtreeLBAManager::search_insert_position); - auto lookup_attempts = stats.num_alloc_extents_iter_nexts; - using OptIter = std::optional; - return seastar::do_with( - hint, OptIter(std::nullopt), - [this, c, &btree, hint, length, lookup_attempts, policy, FNAME] - (laddr_t &last_end, OptIter &insert_iter) - { - return LBABtree::iterate_repeat( - c, - btree.upper_bound_right(c, hint), - [this, c, hint, length, lookup_attempts, policy, - &last_end, &insert_iter, FNAME](auto &iter) - { - ++stats.num_alloc_extents_iter_nexts; - if (iter.is_end() || - iter.get_key() >= (last_end + length)) { - if (policy == alloc_policy_t::deterministic) { - ceph_assert(hint == last_end); - } - DEBUGT("hint: {}~0x{:x}, allocated laddr: {}, insert position: {}, " - "done with {} attempts", - c.trans, hint, length, last_end, iter, - stats.num_alloc_extents_iter_nexts - lookup_attempts); - insert_iter.emplace(iter); - return search_insert_position_iertr::make_ready_future< - seastar::stop_iteration>(seastar::stop_iteration::yes); - } - ceph_assert(policy == alloc_policy_t::linear_search); - last_end = (iter.get_key() + iter.get_val().len).checked_to_laddr(); - TRACET("hint: {}~0x{:x}, current iter: {}, repeat ...", - c.trans, hint, length, iter); - return search_insert_position_iertr::make_ready_future< - seastar::stop_iteration>(seastar::stop_iteration::no); - }).si_then([&last_end, &insert_iter] { - ceph_assert(insert_iter); - return search_insert_position_iertr::make_ready_future< - insert_position_t>(last_end, *std::move(insert_iter)); - }); - }); -} - -BtreeLBAManager::alloc_mappings_ret -BtreeLBAManager::alloc_contiguous_mappings( - Transaction &t, - laddr_t hint, - std::vector &alloc_infos, - alloc_policy_t policy) -{ - ceph_assert(hint != L_ADDR_NULL); - extent_len_t total_len = 0; - for (auto &info : alloc_infos) { - assert(info.key == L_ADDR_NULL); - total_len += info.value.len; - } - - auto c = get_context(t); - return with_btree( - cache, - c, - [this, c, hint, &alloc_infos, total_len, policy](auto &btree) - { - return search_insert_position(c, btree, hint, total_len, policy - ).si_then([this, c, &alloc_infos, &btree](insert_position_t res) { - extent_len_t offset = 0; - for (auto &info : alloc_infos) { - info.key = (res.laddr + offset).checked_to_laddr(); - offset += info.value.len; - } - return insert_mappings( - c, btree, std::move(res.insert_iter), alloc_infos); - }); - }); -} - -BtreeLBAManager::alloc_mappings_ret -BtreeLBAManager::alloc_sparse_mappings( - Transaction &t, - laddr_t hint, - std::vector &alloc_infos, - alloc_policy_t policy) -{ - ceph_assert(hint != L_ADDR_NULL); -#ifndef NDEBUG - assert(alloc_infos.front().key != L_ADDR_NULL); - for (size_t i = 1; i < alloc_infos.size(); i++) { - auto &prev = alloc_infos[i - 1]; - auto &cur = alloc_infos[i]; - assert(cur.key != L_ADDR_NULL); - assert(prev.key + prev.value.len <= cur.key); - } -#endif - auto total_len = hint.get_byte_distance( - alloc_infos.back().key + alloc_infos.back().value.len); - auto c = get_context(t); - return with_btree( - cache, - c, - [this, c, hint, &alloc_infos, total_len, policy](auto &btree) - { - return search_insert_position(c, btree, hint, total_len, policy - ).si_then([this, c, hint, &alloc_infos, &btree, policy](auto res) { - if (policy != alloc_policy_t::deterministic) { - for (auto &info : alloc_infos) { - auto offset = info.key.get_byte_distance(hint); - info.key = (res.laddr + offset).checked_to_laddr(); - } - } // deterministic guarantees hint == res.laddr - return insert_mappings( - c, btree, std::move(res.insert_iter), alloc_infos); - }); - }); -} - -BtreeLBAManager::alloc_mappings_ret -BtreeLBAManager::insert_mappings( - op_context_t c, - LBABtree &btree, - LBABtree::iterator iter, - std::vector &alloc_infos) -{ - return seastar::do_with( - std::move(iter), std::list(), - [c, &btree, &alloc_infos] - (LBABtree::iterator &iter, std::list &ret) - { - return trans_intr::do_for_each( - alloc_infos.begin(), - alloc_infos.end(), - [c, &btree, &iter, &ret](auto &info) - { - assert(info.key != L_ADDR_NULL); - return btree.insert( - c, iter, info.key, info.value - ).si_then([c, &iter, &ret, &info](auto p) { - ceph_assert(p.second); - iter = std::move(p.first); - auto &leaf_node = *iter.get_leaf_node(); - leaf_node.insert_child_ptr( - iter.get_leaf_pos(), - info.extent, - leaf_node.get_size() - 1 /*the size before the insert*/); - if (is_valid_child_ptr(info.extent)) { - ceph_assert(info.value.pladdr.is_paddr()); - assert(info.value.pladdr == iter.get_val().pladdr); - assert(info.value.len == iter.get_val().len); - assert(info.extent->is_logical()); - if (info.extent->has_laddr()) { - // see TM::remap_pin() - assert(info.key == info.extent->get_laddr()); - assert(info.key == iter.get_key()); - } else { - // see TM::alloc_non_data_extent() - // TM::alloc_data_extents() - info.extent->set_laddr(iter.get_key()); - } - } - ret.push_back(iter.get_cursor(c)); - return iter.next(c).si_then([&iter](auto p) { - iter = std::move(p); - }); - }); - }).si_then([&ret] { - return alloc_mappings_iertr::make_ready_future< - std::list>(std::move(ret)); - }); - }); -} - -static bool is_lba_node(const CachedExtent &e) -{ - return is_lba_node(e.get_type()); -} - -BtreeLBAManager::base_iertr::template future<> -_init_cached_extent( - op_context_t c, - const CachedExtentRef &e, - LBABtree &btree, - bool &ret) -{ - if (e->is_logical()) { - auto logn = e->cast(); - return btree.lower_bound( - c, - logn->get_laddr() - ).si_then([e, c, logn, &ret](auto iter) { - LOG_PREFIX(BtreeLBAManager::init_cached_extent); - if (!iter.is_end() && - iter.get_key() == logn->get_laddr() && - iter.get_val().pladdr.is_paddr() && - iter.get_val().pladdr.get_paddr() == logn->get_paddr()) { - assert(!iter.get_leaf_node()->is_pending()); - iter.get_leaf_node()->link_child(logn.get(), iter.get_leaf_pos()); - logn->set_laddr(iter.get_key()); - ceph_assert(iter.get_val().len == e->get_length()); - DEBUGT("logical extent {} live", c.trans, *logn); - ret = true; - } else { - DEBUGT("logical extent {} not live", c.trans, *logn); - ret = false; - } - }); - } else { - return btree.init_cached_extent(c, e - ).si_then([&ret](bool is_alive) { - ret = is_alive; - }); - } -} - -BtreeLBAManager::init_cached_extent_ret -BtreeLBAManager::init_cached_extent( - Transaction &t, - CachedExtentRef e) -{ - LOG_PREFIX(BtreeLBAManager::init_cached_extent); - TRACET("{}", t, *e); - return seastar::do_with(bool(), [this, e, &t](bool &ret) { - auto c = get_context(t); - return with_btree( - cache, c, - [c, e, &ret](auto &btree) -> base_iertr::future<> { - LOG_PREFIX(BtreeLBAManager::init_cached_extent); - DEBUGT("extent {}", c.trans, *e); - return _init_cached_extent(c, e, btree, ret); - } - ).si_then([&ret] { return ret; }); - }); -} - -#ifdef UNIT_TESTS_BUILT -BtreeLBAManager::check_child_trackers_ret -BtreeLBAManager::check_child_trackers( - Transaction &t) { - auto c = get_context(t); - return with_btree( - cache, c, - [c](auto &btree) { - return btree.check_child_trackers(c); - }); -} -#endif - -BtreeLBAManager::scan_mappings_ret -BtreeLBAManager::scan_mappings( - Transaction &t, - laddr_t begin, - laddr_t end, - scan_mappings_func_t &&f) -{ - LOG_PREFIX(BtreeLBAManager::scan_mappings); - DEBUGT("begin: {}, end: {}", t, begin, end); - - auto c = get_context(t); - return with_btree( - cache, - c, - [c, f=std::move(f), begin, end](auto &btree) mutable { - return LBABtree::iterate_repeat( - c, - btree.upper_bound_right(c, begin), - [f=std::move(f), begin, end](auto &pos) { - if (pos.is_end() || pos.get_key() >= end) { - return typename LBABtree::iterate_repeat_ret_inner( - interruptible::ready_future_marker{}, - seastar::stop_iteration::yes); - } - ceph_assert((pos.get_key() + pos.get_val().len) > begin); - if (pos.get_val().pladdr.is_paddr()) { - f(pos.get_key(), pos.get_val().pladdr.get_paddr(), pos.get_val().len); - } - return LBABtree::iterate_repeat_ret_inner( - interruptible::ready_future_marker{}, - seastar::stop_iteration::no); - }); - }); -} - -BtreeLBAManager::rewrite_extent_ret -BtreeLBAManager::rewrite_extent( - Transaction &t, - CachedExtentRef extent) -{ - LOG_PREFIX(BtreeLBAManager::rewrite_extent); - if (extent->has_been_invalidated()) { - ERRORT("extent has been invalidated -- {}", t, *extent); - ceph_abort(); - } - assert(!extent->is_logical()); - - if (is_lba_node(*extent)) { - DEBUGT("rewriting lba extent -- {}", t, *extent); - auto c = get_context(t); - return with_btree( - cache, - c, - [c, extent](auto &btree) mutable { - return btree.rewrite_extent(c, extent); - }); - } else { - DEBUGT("skip non lba extent -- {}", t, *extent); - return rewrite_extent_iertr::now(); - } -} - -BtreeLBAManager::update_mapping_ret -BtreeLBAManager::update_mapping( - Transaction& t, - laddr_t laddr, - extent_len_t prev_len, - paddr_t prev_addr, - LogicalChildNode& nextent) -{ - LOG_PREFIX(BtreeLBAManager::update_mapping); - auto addr = nextent.get_paddr(); - auto len = nextent.get_length(); - auto checksum = nextent.get_last_committed_crc(); - TRACET("laddr={}, paddr {}~0x{:x} => {}~0x{:x}, crc=0x{:x}", - t, laddr, prev_addr, prev_len, addr, len, checksum); - assert(laddr == nextent.get_laddr()); - assert(!addr.is_null()); - return _update_mapping( - t, - laddr, - [prev_addr, addr, prev_len, len, checksum] - (const lba_map_val_t &in) { - lba_map_val_t ret = in; - ceph_assert(in.pladdr.is_paddr()); - ceph_assert(in.pladdr.get_paddr() == prev_addr); - ceph_assert(in.len == prev_len); - ret.pladdr = addr; - ret.len = len; - ret.checksum = checksum; - return ret; - }, - &nextent - ).si_then([&t, laddr, prev_addr, prev_len, addr, len, checksum, FNAME](auto res) { - assert(res.is_alive_mapping()); - DEBUGT("laddr={}, paddr {}~0x{:x} => {}~0x{:x}, crc=0x{:x} done -- {}", - t, laddr, prev_addr, prev_len, addr, len, checksum, res.get_cursor()); - return update_mapping_iertr::make_ready_future< - extent_ref_count_t>(res.get_cursor().get_refcount()); - }, - update_mapping_iertr::pass_further{}, - /* ENOENT in particular should be impossible */ - crimson::ct_error::assert_all{ - "Invalid error in BtreeLBAManager::update_mapping" - } - ); -} - -BtreeLBAManager::update_mappings_ret -BtreeLBAManager::update_mappings( - Transaction& t, - const std::list& extents) -{ - return trans_intr::do_for_each(extents, [this, &t](auto &extent) { - LOG_PREFIX(BtreeLBAManager::update_mappings); - auto laddr = extent->get_laddr(); - auto prev_addr = extent->get_prior_paddr_and_reset(); - auto len = extent->get_length(); - auto addr = extent->get_paddr(); - auto checksum = extent->get_last_committed_crc(); - TRACET("laddr={}, paddr {}~0x{:x} => {}, crc=0x{:x}", - t, laddr, prev_addr, len, addr, checksum); - assert(!addr.is_null()); - return _update_mapping( - t, - laddr, - [prev_addr, addr, len, checksum]( - const lba_map_val_t &in) { - lba_map_val_t ret = in; - ceph_assert(in.pladdr.is_paddr()); - ceph_assert(in.pladdr.get_paddr() == prev_addr); - ceph_assert(in.len == len); - ret.pladdr = addr; - ret.checksum = checksum; - return ret; - }, - nullptr // all the extents should have already been - // added to the fixed_kv_btree - ).si_then([&t, laddr, prev_addr, len, addr, checksum, FNAME](auto res) { - DEBUGT("laddr={}, paddr {}~0x{:x} => {}, crc=0x{:x} done -- {}", - t, laddr, prev_addr, len, addr, checksum, res.get_cursor()); - return update_mapping_iertr::make_ready_future(); - }, - update_mapping_iertr::pass_further{}, - /* ENOENT in particular should be impossible */ - crimson::ct_error::assert_all{ - "Invalid error in BtreeLBAManager::update_mappings" - } - ); - }); -} - -BtreeLBAManager::get_physical_extent_if_live_ret -BtreeLBAManager::get_physical_extent_if_live( - Transaction &t, - extent_types_t type, - paddr_t addr, - laddr_t laddr, - extent_len_t len) -{ - LOG_PREFIX(BtreeLBAManager::get_physical_extent_if_live); - DEBUGT("{}, laddr={}, paddr={}, length={}", - t, type, laddr, addr, len); - ceph_assert(is_lba_node(type)); - auto c = get_context(t); - return with_btree_ret( - cache, - c, - [c, type, addr, laddr, len](auto &btree) { - if (type == extent_types_t::LADDR_INTERNAL) { - return btree.get_internal_if_live(c, addr, laddr, len); - } else { - assert(type == extent_types_t::LADDR_LEAF || - type == extent_types_t::DINK_LADDR_LEAF); - return btree.get_leaf_if_live(c, addr, laddr, len); - } - }); -} - -BtreeLBAManager::refresh_lba_mapping_ret -BtreeLBAManager::refresh_lba_mapping(Transaction &t, LBAMapping mapping) -{ - assert(mapping.is_linked_direct()); - if (mapping.is_viewable()) { - return refresh_lba_mapping_iertr::make_ready_future< - LBAMapping>(std::move(mapping)); - } - auto c = get_context(t); - return with_btree_state( - cache, - c, - std::move(mapping), - [c, this](LBABtree &btree, LBAMapping &mapping) mutable - { - return refresh_lba_cursor(c, btree, *mapping.direct_cursor - ).si_then([c, this, &btree, &mapping] { - if (mapping.indirect_cursor) { - return refresh_lba_cursor(c, btree, *mapping.indirect_cursor); - } - return refresh_lba_cursor_iertr::make_ready_future(); -#ifndef NDEBUG - }).si_then([&mapping] { - assert(mapping.is_viewable()); -#endif - }); - }); -} - -BtreeLBAManager::refresh_lba_cursor_ret -BtreeLBAManager::refresh_lba_cursor( - op_context_t c, - LBABtree &btree, - LBACursor &cursor) -{ - LOG_PREFIX(BtreeLBAManager::refresh_lba_cursor); - stats.num_refresh_parent_total++; - - if (!cursor.parent->is_valid()) { - stats.num_refresh_invalid_parent++; - TRACET("cursor {} parent is invalid, re-search from scratch", - c.trans, cursor); - return btree.lower_bound(c, cursor.get_laddr() - ).si_then([&cursor](LBABtree::iterator iter) { - auto leaf = iter.get_leaf_node(); - cursor.parent = leaf; - cursor.modifications = leaf->modifications; - cursor.pos = iter.get_leaf_pos(); - if (!cursor.is_end()) { - ceph_assert(!iter.is_end()); - ceph_assert(iter.get_key() == cursor.get_laddr()); - cursor.val = iter.get_val(); - assert(cursor.is_viewable()); - } - }); - } - - auto [viewable, state] = cursor.parent->is_viewable_by_trans(c.trans); - auto leaf = cursor.parent->cast(); - - TRACET("cursor: {} viewable: {} state: {}", - c.trans, cursor, viewable, state); - - if (!viewable) { - stats.num_refresh_unviewable_parent++; - leaf = leaf->find_pending_version(c.trans, cursor.get_laddr()); - cursor.parent = leaf; - } - - if (!viewable || - leaf->modified_since(cursor.modifications)) { - if (viewable) { - stats.num_refresh_modified_viewable_parent++; - } - - cursor.modifications = leaf->modifications; - if (cursor.is_end()) { - cursor.pos = leaf->get_size(); - assert(!cursor.val); - } else { - auto i = leaf->lower_bound(cursor.get_laddr()); - cursor.pos = i.get_offset(); - cursor.val = i.get_val(); - - auto iter = LBALeafNode::iterator(leaf.get(), cursor.pos); - ceph_assert(iter.get_key() == cursor.key); - ceph_assert(iter.get_val() == cursor.val); - assert(cursor.is_viewable()); - } - } - - return refresh_lba_cursor_iertr::make_ready_future(); -} - -void BtreeLBAManager::register_metrics() -{ - LOG_PREFIX(BtreeLBAManager::register_metrics); - DEBUG("start"); - stats = {}; - namespace sm = seastar::metrics; - metrics.add_group( - "LBA", - { - sm::make_counter( - "alloc_extents", - stats.num_alloc_extents, - sm::description("total number of lba alloc_extent operations") - ), - sm::make_counter( - "alloc_extents_iter_nexts", - stats.num_alloc_extents_iter_nexts, - sm::description("total number of iterator next operations during extent allocation") - ), - sm::make_counter( - "refresh_parent_total", - stats.num_refresh_parent_total, - sm::description("total number of refreshed cursors") - ), - sm::make_counter( - "refresh_invalid_parent", - stats.num_refresh_invalid_parent, - sm::description("total number of refreshed cursors with invalid parents") - ), - sm::make_counter( - "refresh_unviewable_parent", - stats.num_refresh_unviewable_parent, - sm::description("total number of refreshed cursors with unviewable parents") - ), - sm::make_counter( - "refresh_modified_viewable_parent", - stats.num_refresh_modified_viewable_parent, - sm::description("total number of refreshed cursors with viewable but modified parents") - ), - } - ); -} - -BtreeLBAManager::_decref_intermediate_ret -BtreeLBAManager::_decref_intermediate( - Transaction &t, - laddr_t addr, - extent_len_t len) -{ - auto c = get_context(t); - return with_btree( - cache, - c, - [c, addr, len](auto &btree) mutable { - return btree.upper_bound_right( - c, addr - ).si_then([&btree, addr, len, c](auto iter) { - ceph_assert(!iter.is_end()); - laddr_t key = iter.get_key(); - ceph_assert(key <= addr); - auto val = iter.get_val(); - ceph_assert(key + val.len >= addr + len); - ceph_assert(val.pladdr.is_paddr()); - ceph_assert(val.refcount >= 1); - val.refcount -= 1; - - LOG_PREFIX(BtreeLBAManager::_decref_intermediate); - TRACET("decreased refcount of intermediate key {} -- {}", - c.trans, key, val); - - if (val.refcount == 0) { - return btree.remove(c, iter - ).si_then([key, val] { - return ref_iertr::make_ready_future< - update_mapping_ret_bare_t>(key, val); - }); - } else { - return btree.update(c, iter, val - ).si_then([c](auto iter) { - return ref_iertr::make_ready_future< - update_mapping_ret_bare_t>(iter.get_cursor(c)); - }); - } - }); - }); -} - -BtreeLBAManager::remap_ret -BtreeLBAManager::remap_mappings( - Transaction &t, - LBAMapping orig_mapping, - std::vector remaps, - std::vector extents) -{ - LOG_PREFIX(BtreeLBAManager::remap_mappings); - struct state_t { - LBAMapping orig_mapping; - std::vector remaps; - std::vector extents; - std::vector alloc_infos; - std::vector ret; - }; - return seastar::do_with( - state_t(std::move(orig_mapping), std::move(remaps), std::move(extents), {}, {}), - [this, &t, FNAME](state_t &state) - { - return update_refcount( - t, state.orig_mapping.get_key(), -1, false - ).si_then([this, &t, &state, FNAME](auto ret) { - // Remapping the shared direct mapping is prohibited, - // the refcount of indirect mapping should always be 1. - ceph_assert(ret.is_removed_mapping()); - - auto orig_laddr = state.orig_mapping.get_key(); - if (!state.orig_mapping.is_indirect()) { - auto &addr = ret.get_removed_mapping().map_value.pladdr; - ceph_assert(addr.is_paddr() && !addr.get_paddr().is_zero()); - return alloc_extents( - t, - (state.remaps.front().offset + orig_laddr).checked_to_laddr(), - std::move(state.extents), - EXTENT_DEFAULT_REF_COUNT - ).si_then([&state](auto ret) { - state.ret = std::move(ret); - return remap_iertr::make_ready_future(); - }); - } - - extent_len_t orig_len = state.orig_mapping.get_length(); - auto intermediate_key = state.orig_mapping.get_intermediate_key(); - ceph_assert(intermediate_key != L_ADDR_NULL); - DEBUGT("remap indirect mapping {}", t, state.orig_mapping); - for (auto &remap : state.remaps) { - DEBUGT("remap 0x{:x}~0x{:x}", t, remap.offset, remap.len); - ceph_assert(remap.len != 0); - ceph_assert(remap.offset + remap.len <= orig_len); - auto remapped_laddr = (orig_laddr + remap.offset) - .checked_to_laddr(); - auto remapped_intermediate_key = (intermediate_key + remap.offset) - .checked_to_laddr(); - state.alloc_infos.emplace_back( - alloc_mapping_info_t::create_indirect( - remapped_laddr, remap.len, remapped_intermediate_key)); - } - - return alloc_sparse_mappings( - t, state.alloc_infos.front().key, state.alloc_infos, - alloc_policy_t::deterministic - ).si_then([&t, &state, this](std::list cursors) { - return seastar::futurize_invoke([&t, &state, this] { - if (state.remaps.size() > 1) { - auto base = state.orig_mapping.get_intermediate_base(); - return update_refcount( - t, base, state.remaps.size() - 1, false - ).si_then([](update_mapping_ret_bare_t ret) { - return ret.take_cursor(); - }); - } else { - return remap_iertr::make_ready_future< - LBACursorRef>(state.orig_mapping.direct_cursor->duplicate()); - } - }).si_then([&state, cursors=std::move(cursors)](auto direct) mutable { - for (auto &cursor : cursors) { - state.ret.emplace_back(LBAMapping::create_indirect( - direct->duplicate(), std::move(cursor))); - } - return remap_iertr::make_ready_future(); - }); - }); - }).si_then([&state] { - assert(state.ret.size() == state.remaps.size()); -#ifndef NDEBUG - auto mapping_it = state.ret.begin(); - auto remap_it = state.remaps.begin(); - for (;mapping_it != state.ret.end(); mapping_it++, remap_it++) { - auto &mapping = *mapping_it; - auto &remap = *remap_it; - assert(mapping.get_key() == state.orig_mapping.get_key() + remap.offset); - assert(mapping.get_length() == remap.len); - } -#endif - return remap_iertr::make_ready_future< - std::vector>(std::move(state.ret)); - }); - }); -} - -BtreeLBAManager::update_refcount_ret -BtreeLBAManager::update_refcount( - Transaction &t, - laddr_t addr, - int delta, - bool cascade_remove) -{ - LOG_PREFIX(BtreeLBAManager::update_refcount); - TRACET("laddr={}, delta={}", t, addr, delta); - return _update_mapping( - t, - addr, - [delta](const lba_map_val_t &in) { - lba_map_val_t out = in; - ceph_assert((int)out.refcount + delta >= 0); - out.refcount += delta; - return out; - }, - nullptr - ).si_then([&t, addr, delta, FNAME, this, cascade_remove](auto res) { - DEBUGT("laddr={}, delta={} done -- {}", - t, addr, delta, - res.is_alive_mapping() - ? res.get_cursor().val - : res.get_removed_mapping().map_value); - if (res.is_removed_mapping() && cascade_remove && - res.get_removed_mapping().map_value.pladdr.is_laddr()) { - auto &val = res.get_removed_mapping().map_value; - TRACET("decref intermediate {} -> {}", - t, addr, val.pladdr.get_laddr()); - return _decref_intermediate(t, val.pladdr.get_laddr(), val.len - ).handle_error_interruptible( - update_mapping_iertr::pass_further{}, - crimson::ct_error::assert_all{ - "unexpect ENOENT" - } - ); - } - return update_mapping_iertr::make_ready_future< - update_mapping_ret_bare_t>(std::move(res)); - }); -} - -BtreeLBAManager::_update_mapping_ret -BtreeLBAManager::_update_mapping( - Transaction &t, - laddr_t addr, - update_func_t &&f, - LogicalChildNode* nextent) -{ - auto c = get_context(t); - return with_btree( - cache, - c, - [f=std::move(f), c, addr, nextent](auto &btree) mutable { - return btree.lower_bound( - c, addr - ).si_then([&btree, f=std::move(f), c, addr, nextent](auto iter) - -> _update_mapping_ret { - if (iter.is_end() || iter.get_key() != addr) { - LOG_PREFIX(BtreeLBAManager::_update_mapping); - ERRORT("laddr={} doesn't exist", c.trans, addr); - return crimson::ct_error::enoent::make(); - } - - auto ret = f(iter.get_val()); - if (ret.refcount == 0) { - assert(nextent == nullptr); - return btree.remove( - c, - iter - ).si_then([addr, ret] { - return update_mapping_ret_bare_t(addr, ret); - }); - } else { - return btree.update( - c, - iter, - ret - ).si_then([c, nextent](auto iter) { - if (nextent) { - // nextent is provided iff unlinked, - // also see TM::rewrite_logical_extent() - assert(!nextent->has_parent_tracker()); - iter.get_leaf_node()->update_child_ptr( - iter.get_leaf_pos(), nextent); - } - assert(!nextent || - (nextent->has_parent_tracker() && - nextent->get_parent_node().get() == iter.get_leaf_node().get())); - return update_mapping_ret_bare_t(iter.get_cursor(c)); - }); - } - }); - }); -} - -} diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h deleted file mode 100644 index 8a6698072b3..00000000000 --- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h +++ /dev/null @@ -1,532 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#pragma once - -#include - -#include -#include -#include - -#include "include/ceph_assert.h" -#include "include/buffer_fwd.h" -#include "include/interval_set.h" -#include "common/interval_map.h" -#include "crimson/osd/exceptions.h" - -#include "crimson/os/seastore/btree/fixed_kv_btree.h" -#include "crimson/os/seastore/seastore_types.h" -#include "crimson/os/seastore/lba_manager.h" -#include "crimson/os/seastore/cache.h" - -#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" -#include "crimson/os/seastore/btree/btree_types.h" - -namespace crimson::os::seastore { -class LogicalCachedExtent; -} - -namespace crimson::os::seastore::lba_manager::btree { - -using LBABtree = FixedKVBtree< - laddr_t, lba_map_val_t, LBAInternalNode, - LBALeafNode, LBACursor, LBA_BLOCK_SIZE>; - -/** - * BtreeLBAManager - * - * Uses a wandering btree to track two things: - * 1) lba state including laddr_t -> paddr_t mapping - * 2) reverse paddr_t -> laddr_t mapping for gc (TODO) - * - * Generally, any transaction will involve - * 1) deltas against lba tree nodes - * 2) new lba tree nodes - * - Note, there must necessarily be a delta linking - * these new nodes into the tree -- might be a - * bootstrap_state_t delta if new root - * - * get_mappings, alloc_extent_*, etc populate a Transaction - * which then gets submitted - */ -class BtreeLBAManager : public LBAManager { -public: - BtreeLBAManager(Cache &cache) - : cache(cache) - { - register_metrics(); - } - - mkfs_ret mkfs( - Transaction &t) final; - - get_mappings_ret get_mappings( - Transaction &t, - laddr_t offset, extent_len_t length) final; - - get_mapping_ret get_mapping( - Transaction &t, - laddr_t offset) final; - - alloc_extent_ret reserve_region( - Transaction &t, - laddr_t hint, - extent_len_t len) final - { - std::vector alloc_infos = { - alloc_mapping_info_t::create_zero(len)}; - return seastar::do_with( - std::move(alloc_infos), - [&t, hint, this](auto &alloc_infos) { - return alloc_contiguous_mappings( - t, hint, alloc_infos, alloc_policy_t::linear_search - ).si_then([](auto cursors) { - assert(cursors.size() == 1); - return LBAMapping::create_direct(std::move(cursors.front())); - }); - }); - } - - alloc_extent_ret clone_mapping( - Transaction &t, - laddr_t laddr, - extent_len_t len, - laddr_t intermediate_key, - laddr_t intermediate_base) final - { - std::vector alloc_infos = { - alloc_mapping_info_t::create_indirect( - laddr, len, intermediate_key)}; - return seastar::do_with( - std::move(alloc_infos), - [this, &t, laddr, intermediate_base](auto &infos) { - return alloc_sparse_mappings( - t, laddr, infos, alloc_policy_t::deterministic - ).si_then([this, &t, intermediate_base](auto cursors) { - ceph_assert(cursors.size() == 1); - ceph_assert(cursors.front()->is_indirect()); - return update_refcount(t, intermediate_base, 1, false - ).si_then([cursors=std::move(cursors)](auto p) mutable { - assert(p.is_alive_mapping()); - auto mapping = LBAMapping::create_indirect( - p.take_cursor(), std::move(cursors.front())); - ceph_assert(mapping.is_stable()); - return alloc_extent_iertr::make_ready_future< - LBAMapping>(std::move(mapping)); - }); - }); - }).handle_error_interruptible( - crimson::ct_error::input_output_error::pass_further{}, - crimson::ct_error::assert_all{"unexpect enoent"}); - } - - alloc_extent_ret alloc_extent( - Transaction &t, - laddr_t hint, - LogicalChildNode &ext, - extent_ref_count_t refcount) final - { - // The real checksum will be updated upon transaction commit - assert(ext.get_last_committed_crc() == 0); - assert(!ext.has_laddr()); - std::vector alloc_infos = { - alloc_mapping_info_t::create_direct( - L_ADDR_NULL, - ext.get_length(), - ext.get_paddr(), - refcount, - ext.get_last_committed_crc(), - ext)}; - return seastar::do_with( - std::move(alloc_infos), - [this, &t, hint](auto &alloc_infos) { - return alloc_contiguous_mappings( - t, hint, alloc_infos, alloc_policy_t::linear_search - ).si_then([](auto cursors) { - assert(cursors.size() == 1); - return LBAMapping::create_direct(std::move(cursors.front())); - }); - }); - } - - alloc_extents_ret alloc_extents( - Transaction &t, - laddr_t hint, - std::vector extents, - extent_ref_count_t refcount) final - { - std::vector alloc_infos; - assert(!extents.empty()); - auto has_laddr = extents.front()->has_laddr(); - for (auto &extent : extents) { - assert(extent); - assert(extent->has_laddr() == has_laddr); - alloc_infos.emplace_back( - alloc_mapping_info_t::create_direct( - extent->has_laddr() ? extent->get_laddr() : L_ADDR_NULL, - extent->get_length(), - extent->get_paddr(), - refcount, - extent->get_last_committed_crc(), - *extent)); - } - return seastar::do_with( - std::move(alloc_infos), - [this, &t, hint, has_laddr](auto &alloc_infos) - { - if (has_laddr) { - return alloc_sparse_mappings( - t, hint, alloc_infos, alloc_policy_t::deterministic) -#ifndef NDEBUG - .si_then([&alloc_infos](std::list cursors) { - assert(alloc_infos.size() == cursors.size()); - auto info_p = alloc_infos.begin(); - auto cursor_p = cursors.begin(); - for (; info_p != alloc_infos.end(); info_p++, cursor_p++) { - auto &cursor = *cursor_p; - assert(cursor->get_laddr() == info_p->key); - } - return alloc_extent_iertr::make_ready_future< - std::list>(std::move(cursors)); - }) -#endif - ; - } else { - return alloc_contiguous_mappings( - t, hint, alloc_infos, alloc_policy_t::linear_search); - } - }).si_then([](std::list cursors) { - std::vector ret; - for (auto &cursor : cursors) { - ret.emplace_back(LBAMapping::create_direct(std::move(cursor))); - } - return ret; - }); - } - - ref_ret remove_mapping( - Transaction &t, - laddr_t addr) final { - return update_refcount(t, addr, -1, true - ).si_then([](auto res) { - return ref_update_result_t(res); - }); - } - - remap_ret remap_mappings( - Transaction &t, - LBAMapping orig_mapping, - std::vector remaps, - std::vector extents) final; - - /** - * init_cached_extent - * - * Checks whether e is live (reachable from lba tree) and drops or initializes - * accordingly. - * - * Returns if e is live. - */ - init_cached_extent_ret init_cached_extent( - Transaction &t, - CachedExtentRef e) final; - -#ifdef UNIT_TESTS_BUILT - check_child_trackers_ret check_child_trackers(Transaction &t) final; -#endif - - scan_mappings_ret scan_mappings( - Transaction &t, - laddr_t begin, - laddr_t end, - scan_mappings_func_t &&f) final; - - rewrite_extent_ret rewrite_extent( - Transaction &t, - CachedExtentRef extent) final; - - update_mapping_ret update_mapping( - Transaction& t, - laddr_t laddr, - extent_len_t prev_len, - paddr_t prev_addr, - LogicalChildNode&) final; - - update_mappings_ret update_mappings( - Transaction& t, - const std::list& extents); - - get_physical_extent_if_live_ret get_physical_extent_if_live( - Transaction &t, - extent_types_t type, - paddr_t addr, - laddr_t laddr, - extent_len_t len) final; - - refresh_lba_mapping_ret refresh_lba_mapping( - Transaction &t, - LBAMapping mapping) final; - -private: - Cache &cache; - - struct { - uint64_t num_alloc_extents = 0; - uint64_t num_alloc_extents_iter_nexts = 0; - uint64_t num_refresh_parent_total = 0; - uint64_t num_refresh_invalid_parent = 0; - uint64_t num_refresh_unviewable_parent = 0; - uint64_t num_refresh_modified_viewable_parent = 0; - } stats; - - struct alloc_mapping_info_t { - laddr_t key = L_ADDR_NULL; // once assigned, the allocation to - // key must be exact and successful - lba_map_val_t value; - LogicalChildNode* extent = nullptr; - - static alloc_mapping_info_t create_zero(extent_len_t len) { - return { - L_ADDR_NULL, - { - len, - pladdr_t(P_ADDR_ZERO), - EXTENT_DEFAULT_REF_COUNT, - 0 - }, - static_cast(get_reserved_ptr())}; - } - static alloc_mapping_info_t create_indirect( - laddr_t laddr, - extent_len_t len, - laddr_t intermediate_key) { - return { - laddr, - { - len, - pladdr_t(intermediate_key), - EXTENT_DEFAULT_REF_COUNT, - 0 // crc will only be used and checked with LBA direct mappings - // also see pin_to_extent(_by_type) - }, - static_cast(get_reserved_ptr())}; - } - static alloc_mapping_info_t create_direct( - laddr_t laddr, - extent_len_t len, - paddr_t paddr, - extent_ref_count_t refcount, - checksum_t checksum, - LogicalChildNode& extent) { - return {laddr, {len, pladdr_t(paddr), refcount, checksum}, &extent}; - } - }; - - op_context_t get_context(Transaction &t) { - return op_context_t{cache, t}; - } - - seastar::metrics::metric_group metrics; - void register_metrics(); - - struct update_mapping_ret_bare_t { - update_mapping_ret_bare_t() - : update_mapping_ret_bare_t(LBACursorRef(nullptr)) {} - - update_mapping_ret_bare_t(LBACursorRef cursor) - : ret(std::move(cursor)) {} - - update_mapping_ret_bare_t(laddr_t laddr, lba_map_val_t value) - : ret(removed_mapping_t{laddr, value}) {} - - struct removed_mapping_t { - laddr_t laddr; - lba_map_val_t map_value; - }; - std::variant ret; - - bool is_removed_mapping() const { - return ret.index() == 0; - } - - bool is_alive_mapping() const { - if (ret.index() == 1) { - assert(std::get<1>(ret)); - return true; - } else { - return false; - } - } - - const removed_mapping_t& get_removed_mapping() const { - assert(is_removed_mapping()); - return std::get<0>(ret); - } - - const LBACursor& get_cursor() const { - assert(is_alive_mapping()); - return *std::get<1>(ret); - } - - LBACursorRef take_cursor() { - assert(is_alive_mapping()); - return std::move(std::get<1>(ret)); - } - - explicit operator ref_update_result_t() const { - if (is_removed_mapping()) { - auto v = get_removed_mapping(); - auto &val = v.map_value; - ceph_assert(val.pladdr.is_paddr()); - return {v.laddr, val.refcount, val.pladdr, val.len}; - } else { - assert(is_alive_mapping()); - auto &c = get_cursor(); - assert(c.val); - ceph_assert(!c.is_indirect()); - return {c.get_laddr(), c.val->refcount, c.val->pladdr, c.val->len}; - } - } - }; - - using update_refcount_iertr = ref_iertr; - using update_refcount_ret = update_refcount_iertr::future< - update_mapping_ret_bare_t>; - update_refcount_ret update_refcount( - Transaction &t, - laddr_t addr, - int delta, - bool cascade_remove); - - /** - * _update_mapping - * - * Updates mapping, removes if f returns nullopt - */ - using _update_mapping_iertr = ref_iertr; - using _update_mapping_ret = ref_iertr::future< - update_mapping_ret_bare_t>; - using update_func_t = std::function< - lba_map_val_t(const lba_map_val_t &v) - >; - _update_mapping_ret _update_mapping( - Transaction &t, - laddr_t addr, - update_func_t &&f, - LogicalChildNode*); - - struct insert_position_t { - laddr_t laddr; - LBABtree::iterator insert_iter; - }; - enum class alloc_policy_t { - deterministic, // no conflict - linear_search, - }; - using search_insert_position_iertr = base_iertr; - using search_insert_position_ret = - search_insert_position_iertr::future; - search_insert_position_ret search_insert_position( - op_context_t c, - LBABtree &btree, - laddr_t hint, - extent_len_t length, - alloc_policy_t policy); - - using alloc_mappings_iertr = base_iertr; - using alloc_mappings_ret = - alloc_mappings_iertr::future>; - /** - * alloc_contiguous_mappings - * - * Insert a range of contiguous mappings into the LBA btree. - * - * hint is a non-null laddr hint for allocation. All alloc_infos' key - * should be L_ADDR_NULL, the final laddr is relative to the allocated - * laddr based on preceding mappings' total length. - */ - alloc_mappings_ret alloc_contiguous_mappings( - Transaction &t, - laddr_t hint, - std::vector &alloc_infos, - alloc_policy_t policy); - - /** - * alloc_sparse_mappings - * - * Insert a range of sparse mappings into the LBA btree. - * - * hint is a non-null laddr hint for allocation. All of alloc_infos' key - * are non-null laddr hints and must be incremental, each mapping's final - * laddr maintains same offset to allocated laddr as original to hint. - */ - alloc_mappings_ret alloc_sparse_mappings( - Transaction &t, - laddr_t hint, - std::vector &alloc_infos, - alloc_policy_t policy); - - /** - * insert_mappings - * - * Insert all lba mappings built from alloc_infos into LBA btree before - * iter and return the inserted LBACursors. - * - * NOTE: There is no guarantee that the returned cursors are all valid - * since the successive insertion is possible to invalidate the parent - * extent of predecessively returned LBACursor. - */ - alloc_mappings_ret insert_mappings( - op_context_t c, - LBABtree &btree, - LBABtree::iterator iter, - std::vector &alloc_infos); - - ref_ret _incref_extent( - Transaction &t, - laddr_t addr, - int delta) { - ceph_assert(delta > 0); - return update_refcount(t, addr, delta, false - ).si_then([](auto res) { - return ref_update_result_t(res); - }); - } - - using _get_cursor_ret = get_mapping_iertr::future; - _get_cursor_ret get_cursor( - op_context_t c, - LBABtree& btree, - laddr_t offset); - - using _get_cursors_ret = get_mappings_iertr::future>; - _get_cursors_ret get_cursors( - op_context_t c, - LBABtree& btree, - laddr_t offset, - extent_len_t length); - - using resolve_indirect_cursor_ret = get_mappings_iertr::future; - resolve_indirect_cursor_ret resolve_indirect_cursor( - op_context_t c, - LBABtree& btree, - const LBACursor& indirect_cursor); - - using _decref_intermediate_ret = ref_iertr::future< - update_mapping_ret_bare_t>; - _decref_intermediate_ret _decref_intermediate( - Transaction &t, - laddr_t addr, - extent_len_t len); - - using refresh_lba_cursor_iertr = base_iertr; - using refresh_lba_cursor_ret = refresh_lba_cursor_iertr::future<>; - refresh_lba_cursor_ret refresh_lba_cursor( - op_context_t c, - LBABtree &btree, - LBACursor &cursor); -}; -using BtreeLBAManagerRef = std::unique_ptr; - -} diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc deleted file mode 100644 index 9cb62db9a4e..00000000000 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc +++ /dev/null @@ -1,87 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include -#include - -#include -#include - -#include "include/buffer.h" -#include "include/byteorder.h" - -#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h" -#include "crimson/os/seastore/logging.h" -#include "crimson/os/seastore/logical_child_node.h" - -SET_SUBSYS(seastore_lba); - -namespace crimson::os::seastore::lba_manager::btree { - -std::ostream &LBALeafNode::print_detail(std::ostream &out) const -{ - out << ", size=" << this->get_size() - << ", meta=" << this->get_meta() - << ", modifications=" << this->modifications - << ", my_tracker=" << (void*)this->my_tracker; - if (this->my_tracker) { - out << ", my_tracker->parent=" << (void*)this->my_tracker->get_parent().get(); - } - return out << ", root_block=" << (void*)this->parent_of_root.get(); -} - -void LBALeafNode::resolve_relative_addrs(paddr_t base) -{ - LOG_PREFIX(LBALeafNode::resolve_relative_addrs); - for (auto i: *this) { - auto val = i->get_val(); - if (val.pladdr.is_paddr() && - val.pladdr.get_paddr().is_relative()) { - val.pladdr = base.add_relative(val.pladdr.get_paddr()); - TRACE("{} -> {}", i->get_val().pladdr, val.pladdr); - i->set_val(val); - } - } -} - -void LBALeafNode::update( - internal_const_iterator_t iter, - lba_map_val_t val) -{ - LOG_PREFIX(LBALeafNode::update); - SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}", - this->pending_for_transaction, - iter.get_offset()); - this->on_modify(); - if (val.pladdr.is_paddr()) { - val.pladdr = maybe_generate_relative(val.pladdr.get_paddr()); - } - return this->journal_update( - iter, - val, - this->maybe_get_delta_buffer()); -} - -LBALeafNode::internal_const_iterator_t LBALeafNode::insert( - internal_const_iterator_t iter, - laddr_t addr, - lba_map_val_t val) -{ - LOG_PREFIX(LBALeafNode::insert); - SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}", - this->pending_for_transaction, - iter.get_offset(), - addr); - this->on_modify(); - if (val.pladdr.is_paddr()) { - val.pladdr = maybe_generate_relative(val.pladdr.get_paddr()); - } - this->journal_insert( - iter, - addr, - val, - this->maybe_get_delta_buffer()); - return iter; -} - -} diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h deleted file mode 100644 index 85319466e10..00000000000 --- a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h +++ /dev/null @@ -1,291 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#pragma once - -#include -#include -#include - - -#include "include/buffer.h" - -#include "crimson/common/fixed_kv_node_layout.h" -#include "crimson/common/errorator.h" -#include "crimson/os/seastore/seastore_types.h" -#include "crimson/os/seastore/cache.h" -#include "crimson/os/seastore/cached_extent.h" - -#include "crimson/os/seastore/btree/btree_types.h" -#include "crimson/os/seastore/btree/fixed_kv_btree.h" -#include "crimson/os/seastore/btree/fixed_kv_node.h" - -namespace crimson::os::seastore { -class LogicalChildNode; -} - -namespace crimson::os::seastore::lba_manager::btree { - -using base_iertr = Cache::base_iertr; -using LBANode = FixedKVNode; - -class BtreeLBAMapping; - -constexpr size_t LBA_BLOCK_SIZE = 4096; - -using lba_node_meta_t = fixed_kv_node_meta_t; - -using lba_node_meta_le_t = fixed_kv_node_meta_le_t; - -/** - * LBAInternalNode - * - * Abstracts operations on and layout of internal nodes for the - * LBA Tree. - * - * Layout (4KiB): - * checksum : ceph_le32[1] 4B - * size : ceph_le32[1] 4B - * meta : lba_node_meta_le_t[1] 20B - * keys : laddr_le_t[CAPACITY] (254*8)B - * values : paddr_le_t[CAPACITY] (254*8)B - * = 4092B - - * TODO: make the above capacity calculation part of FixedKVNodeLayout - * TODO: the above alignment probably isn't portable without further work - */ -constexpr size_t INTERNAL_NODE_CAPACITY = 254; -struct LBAInternalNode - : FixedKVInternalNode< - INTERNAL_NODE_CAPACITY, - laddr_t, laddr_le_t, - LBA_BLOCK_SIZE, - LBAInternalNode> { - static_assert( - check_capacity(LBA_BLOCK_SIZE), - "INTERNAL_NODE_CAPACITY doesn't fit in LBA_BLOCK_SIZE"); - using Ref = TCachedExtentRef; - using internal_iterator_t = const_iterator; - using key_type = laddr_t; - template - LBAInternalNode(T&&... t) : - FixedKVInternalNode(std::forward(t)...) {} - static constexpr uint32_t CHILD_VEC_UNIT = 0; - - static constexpr extent_types_t TYPE = extent_types_t::LADDR_INTERNAL; - - extent_types_t get_type() const final { - return TYPE; - } -}; -using LBAInternalNodeRef = LBAInternalNode::Ref; - -/** - * LBALeafNode - * - * Abstracts operations on and layout of leaf nodes for the - * LBA Tree. - * - * Layout (4KiB): - * checksum : ceph_le32[1] 4B - * size : ceph_le32[1] 4B - * meta : lba_node_meta_le_t[1] 20B - * keys : laddr_le_t[CAPACITY] (140*8)B - * values : lba_map_val_le_t[CAPACITY] (140*21)B - * = 4088B - * - * TODO: update FixedKVNodeLayout to handle the above calculation - * TODO: the above alignment probably isn't portable without further work - */ -constexpr size_t LEAF_NODE_CAPACITY = 140; - -struct LBALeafNode - : FixedKVLeafNode< - LEAF_NODE_CAPACITY, - laddr_t, laddr_le_t, - lba_map_val_t, lba_map_val_le_t, - LBA_BLOCK_SIZE, - LBAInternalNode, - LBALeafNode>, - ParentNode { - static_assert( - check_capacity(LBA_BLOCK_SIZE), - "LEAF_NODE_CAPACITY doesn't fit in LBA_BLOCK_SIZE"); - using Ref = TCachedExtentRef; - using parent_type_t = FixedKVLeafNode< - LEAF_NODE_CAPACITY, - laddr_t, laddr_le_t, - lba_map_val_t, lba_map_val_le_t, - LBA_BLOCK_SIZE, - LBAInternalNode, - LBALeafNode>; - using internal_const_iterator_t = - typename parent_type_t::node_layout_t::const_iterator; - using internal_iterator_t = - typename parent_type_t::node_layout_t::iterator; - using key_type = laddr_t; - using parent_node_t = ParentNode; - using child_t = LogicalChildNode; - static constexpr uint32_t CHILD_VEC_UNIT = 0; - LBALeafNode(ceph::bufferptr &&ptr) - : parent_type_t(std::move(ptr)), - parent_node_t(LEAF_NODE_CAPACITY) {} - explicit LBALeafNode(extent_len_t length) - : parent_type_t(length), - parent_node_t(LEAF_NODE_CAPACITY) {} - LBALeafNode(const LBALeafNode &rhs) - : parent_type_t(rhs), - parent_node_t(rhs) {} - - static constexpr extent_types_t TYPE = extent_types_t::LADDR_LEAF; - - void update( - internal_const_iterator_t iter, - lba_map_val_t val) final; - - internal_const_iterator_t insert( - internal_const_iterator_t iter, - laddr_t addr, - lba_map_val_t val) final; - - void remove(internal_const_iterator_t iter) final { - LOG_PREFIX(LBALeafNode::remove); - SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}", - this->pending_for_transaction, - iter.get_offset(), - iter.get_key()); - assert(iter != this->end()); - this->on_modify(); - this->remove_child_ptr(iter.get_offset()); - return this->journal_remove( - iter, - this->maybe_get_delta_buffer()); - } - - // See LBAInternalNode, same concept - void resolve_relative_addrs(paddr_t base) final; - void node_resolve_vals( - internal_iterator_t from, - internal_iterator_t to) const final - { - if (this->is_initial_pending()) { - for (auto i = from; i != to; ++i) { - auto val = i->get_val(); - if (val.pladdr.is_paddr() - && val.pladdr.get_paddr().is_relative()) { - assert(val.pladdr.get_paddr().is_block_relative()); - val.pladdr = this->get_paddr().add_relative(val.pladdr.get_paddr()); - i->set_val(val); - } - } - } - } - void node_unresolve_vals( - internal_iterator_t from, - internal_iterator_t to) const final - { - if (this->is_initial_pending()) { - for (auto i = from; i != to; ++i) { - auto val = i->get_val(); - if (val.pladdr.is_paddr() - && val.pladdr.get_paddr().is_relative()) { - assert(val.pladdr.get_paddr().is_record_relative()); - val.pladdr = val.pladdr.get_paddr().block_relative_to(this->get_paddr()); - i->set_val(val); - } - } - } - } - - extent_types_t get_type() const final { - return TYPE; - } - - void do_on_rewrite(Transaction &t, CachedExtent &extent) final { - this->parent_node_t::on_rewrite(t, static_cast(extent)); - } - - void do_on_replace_prior() final { - this->parent_node_t::on_replace_prior(); - } - - void do_prepare_commit() final { - this->parent_node_t::prepare_commit(); - } - - bool is_child_stable( - op_context_t c, - uint16_t pos, - laddr_t key) const { - return parent_node_t::_is_child_stable(c.trans, c.cache, pos, key); - } - bool is_child_data_stable( - op_context_t c, - uint16_t pos, - laddr_t key) const { - return parent_node_t::_is_child_stable(c.trans, c.cache, pos, key, true); - } - - void on_split( - Transaction &t, - LBALeafNode &left, - LBALeafNode &right) final { - this->split_child_ptrs(t, left, right); - } - void adjust_copy_src_dest_on_split( - Transaction &t, - LBALeafNode &left, - LBALeafNode &right) final { - this->parent_node_t::adjust_copy_src_dest_on_split(t, left, right); - } - - void on_merge( - Transaction &t, - LBALeafNode &left, - LBALeafNode &right) final { - this->merge_child_ptrs(t, left, right); - } - void adjust_copy_src_dest_on_merge( - Transaction &t, - LBALeafNode &left, - LBALeafNode &right) final { - this->parent_node_t::adjust_copy_src_dest_on_merge(t, left, right); - } - - void on_balance( - Transaction &t, - LBALeafNode &left, - LBALeafNode &right, - uint32_t pivot_idx, - LBALeafNode &replacement_left, - LBALeafNode &replacement_right) final { - this->balance_child_ptrs( - t, left, right, pivot_idx, replacement_left, replacement_right); - } - void adjust_copy_src_dest_on_balance( - Transaction &t, - LBALeafNode &left, - LBALeafNode &right, - uint32_t pivot_idx, - LBALeafNode &replacement_left, - LBALeafNode &replacement_right) final { - this->parent_node_t::adjust_copy_src_dest_on_balance( - t, left, right, pivot_idx, replacement_left, replacement_right); - } - - CachedExtentRef duplicate_for_write(Transaction&) final { - return CachedExtentRef(new LBALeafNode(*this)); - } - - std::ostream &print_detail(std::ostream &out) const final; -}; -using LBALeafNodeRef = TCachedExtentRef; - -} - -#if FMT_VERSION >= 90000 -template <> struct fmt::formatter : fmt::ostream_formatter {}; -template <> struct fmt::formatter : fmt::ostream_formatter {}; -template <> struct fmt::formatter : fmt::ostream_formatter {}; -template <> struct fmt::formatter : fmt::ostream_formatter {}; -#endif diff --git a/src/crimson/os/seastore/lba_mapping.cc b/src/crimson/os/seastore/lba_mapping.cc index d52233869bb..77c564c2d7e 100644 --- a/src/crimson/os/seastore/lba_mapping.cc +++ b/src/crimson/os/seastore/lba_mapping.cc @@ -31,7 +31,7 @@ std::ostream &operator<<(std::ostream &out, const lba_mapping_list_t &rhs) return out << ']'; } -using lba_manager::btree::LBALeafNode; +using lba::LBALeafNode; get_child_ret_t LBAMapping::get_logical_extent(Transaction &t) diff --git a/src/crimson/os/seastore/lba_mapping.h b/src/crimson/os/seastore/lba_mapping.h index 05f987e8def..1f7e61d7073 100644 --- a/src/crimson/os/seastore/lba_mapping.h +++ b/src/crimson/os/seastore/lba_mapping.h @@ -5,12 +5,12 @@ #include "crimson/os/seastore/cached_extent.h" #include "crimson/os/seastore/btree/btree_types.h" -#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" +#include "crimson/os/seastore/lba/lba_btree_node.h" #include "crimson/os/seastore/logical_child_node.h" namespace crimson::os::seastore { -namespace lba_manager::btree { +namespace lba { class BtreeLBAManager; } @@ -94,7 +94,7 @@ public: return direct_cursor->get_laddr(); } - // An lba pin may be indirect, see comments in lba_manager/btree/btree_lba_manager.h + // An lba pin may be indirect, see comments in lba/btree_lba_manager.h laddr_t get_intermediate_key() const { assert(is_indirect()); return indirect_cursor->get_intermediate_key(); @@ -117,7 +117,7 @@ public: extent_len_t>(get_intermediate_key()); } - get_child_ret_t + get_child_ret_t get_logical_extent(Transaction &t); LBAMapping duplicate() const { @@ -132,7 +132,7 @@ public: } private: - friend lba_manager::btree::BtreeLBAManager; + friend lba::BtreeLBAManager; // To support cloning, there are two kinds of lba mappings: // 1. direct lba mapping: the pladdr in the value of which is the paddr of diff --git a/src/crimson/os/seastore/logical_child_node.h b/src/crimson/os/seastore/logical_child_node.h index ab4f2e67262..b17d5c17bc4 100644 --- a/src/crimson/os/seastore/logical_child_node.h +++ b/src/crimson/os/seastore/logical_child_node.h @@ -6,16 +6,16 @@ #include "crimson/os/seastore/cached_extent.h" #include "crimson/os/seastore/linked_tree_node.h" #include "crimson/os/seastore/btree/btree_types.h" -#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" +#include "crimson/os/seastore/lba/lba_btree_node.h" namespace crimson::os::seastore { class LogicalChildNode : public LogicalCachedExtent, - public ChildNode { using lba_child_node_t = ChildNode< - lba_manager::btree::LBALeafNode, LogicalChildNode, laddr_t>; + lba::LBALeafNode, LogicalChildNode, laddr_t>; public: template LogicalChildNode(T&&... t) : LogicalCachedExtent(std::forward(t)...) {} diff --git a/src/crimson/os/seastore/root_block.cc b/src/crimson/os/seastore/root_block.cc index c422442f5e0..afc5494a026 100644 --- a/src/crimson/os/seastore/root_block.cc +++ b/src/crimson/os/seastore/root_block.cc @@ -2,7 +2,7 @@ // vim: ts=8 sw=2 smarttab #include "crimson/os/seastore/root_block.h" -#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" +#include "crimson/os/seastore/lba/lba_btree_node.h" #include "crimson/os/seastore/backref/backref_tree_node.h" #include "crimson/os/seastore/linked_tree_node.h" @@ -14,17 +14,17 @@ void RootBlock::on_replace_prior() { if (prior.lba_root_node) { RootBlockRef this_ref = this; auto lba_root = static_cast< - lba_manager::btree::LBANode*>(prior.lba_root_node); + lba::LBANode*>(prior.lba_root_node); if (likely(lba_root->range.depth > 1)) { - TreeRootLinker::link_root( + TreeRootLinker::link_root( this_ref, - static_cast(prior.lba_root_node) + static_cast(prior.lba_root_node) ); } else { assert(lba_root->range.depth == 1); - TreeRootLinker::link_root( + TreeRootLinker::link_root( this_ref, - static_cast(prior.lba_root_node) + static_cast(prior.lba_root_node) ); } } diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 3a2ce9efe6e..65d70ee5e4d 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -8,7 +8,7 @@ #include "crimson/os/seastore/transaction_manager.h" #include "crimson/os/seastore/journal.h" #include "crimson/os/seastore/journal/circular_bounded_journal.h" -#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" +#include "crimson/os/seastore/lba/lba_btree_node.h" #include "crimson/os/seastore/random_block_manager/rbm_device.h" /* @@ -805,7 +805,7 @@ TransactionManagerRef make_transaction_manager( { auto epm = std::make_unique(); auto cache = std::make_unique(*epm); - auto lba_manager = lba_manager::create_lba_manager(*cache); + auto lba_manager = lba::create_lba_manager(*cache); auto sms = std::make_unique(); auto rbs = std::make_unique(); auto backref_manager = create_backref_manager(*cache); diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index 9d2c9dda882..6fca3bd2966 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -951,7 +951,7 @@ private: shard_stats_t& shard_stats; - using LBALeafNode = lba_manager::btree::LBALeafNode; + using LBALeafNode = lba::LBALeafNode; struct unlinked_child_t { LBAMapping mapping; child_pos_t child_pos; diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc index 4f055cbade4..a74186ae25f 100644 --- a/src/test/crimson/seastore/test_btree_lba_manager.cc +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -8,7 +8,7 @@ #include "crimson/os/seastore/journal.h" #include "crimson/os/seastore/cache.h" #include "crimson/os/seastore/segment_manager/ephemeral.h" -#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h" +#include "crimson/os/seastore/lba/btree_lba_manager.h" #include "test/crimson/seastore/test_block.h" @@ -21,8 +21,7 @@ namespace { using namespace crimson; using namespace crimson::os; using namespace crimson::os::seastore; -using namespace crimson::os::seastore::lba_manager; -using namespace crimson::os::seastore::lba_manager::btree; +using namespace crimson::os::seastore::lba; struct btree_test_base : public seastar_test_suite_t, SegmentProvider, JournalTrimmer { diff --git a/src/test/crimson/seastore/test_transaction_manager.cc b/src/test/crimson/seastore/test_transaction_manager.cc index 6244f6e80d9..514edc0407f 100644 --- a/src/test/crimson/seastore/test_transaction_manager.cc +++ b/src/test/crimson/seastore/test_transaction_manager.cc @@ -14,7 +14,7 @@ #include "crimson/os/seastore/segment_manager.h" #include "test/crimson/seastore/test_block.h" -#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" +#include "crimson/os/seastore/lba/lba_btree_node.h" using namespace crimson; using namespace crimson::os; @@ -2118,7 +2118,7 @@ TEST_P(tm_single_device_intergrity_check_test_t, remap_lazy_read) TEST_P(tm_single_device_test_t, invalid_lba_mapping_detect) { run_async([this] { - using namespace crimson::os::seastore::lba_manager::btree; + using namespace crimson::os::seastore::lba; { auto t = create_transaction(); for (unsigned i = 0; i < LEAF_NODE_CAPACITY; i++) {