]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/lba: flatten lba namespace to keep consistent with backref
authorZhang Song <zhangsong02@qianxin.com>
Fri, 9 May 2025 08:12:16 +0000 (16:12 +0800)
committerMatan Breizman <mbreizma@redhat.com>
Sun, 8 Jun 2025 07:02:03 +0000 (10:02 +0300)
Signed-off-by: Zhang Song <zhangsong02@qianxin.com>
(cherry picked from commit 4593e5177d20ce22943123ae986c3c86e8d010a1)

23 files changed:
src/crimson/os/seastore/CMakeLists.txt
src/crimson/os/seastore/btree/btree_types.cc
src/crimson/os/seastore/btree/btree_types.h
src/crimson/os/seastore/btree/fixed_kv_btree.h
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/lba/btree_lba_manager.cc [new file with mode: 0644]
src/crimson/os/seastore/lba/btree_lba_manager.h [new file with mode: 0644]
src/crimson/os/seastore/lba/lba_btree_node.cc [new file with mode: 0644]
src/crimson/os/seastore/lba/lba_btree_node.h [new file with mode: 0644]
src/crimson/os/seastore/lba_manager.cc
src/crimson/os/seastore/lba_manager.h
src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc [deleted file]
src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h [deleted file]
src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc [deleted file]
src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h [deleted file]
src/crimson/os/seastore/lba_mapping.cc
src/crimson/os/seastore/lba_mapping.h
src/crimson/os/seastore/logical_child_node.h
src/crimson/os/seastore/root_block.cc
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h
src/test/crimson/seastore/test_btree_lba_manager.cc
src/test/crimson/seastore/test_transaction_manager.cc

index 0a7a11668c8fb7b26c9023911cc40cbc30a897f7..d5ca12d8f0670dd7638bd075e9f63482bb4737cc 100644 (file)
@@ -15,8 +15,8 @@ set(crimson_seastore_srcs
   btree/btree_types.cc
   backref_manager.cc
   backref/btree_backref_manager.cc
-  lba_manager/btree/btree_lba_manager.cc
-  lba_manager/btree/lba_btree_node.cc
+  lba/btree_lba_manager.cc
+  lba/lba_btree_node.cc
   omap_manager.cc
   omap_manager/btree/btree_omap_manager.cc
   omap_manager/btree/omap_btree_node_impl.cc
index a8d6e883153f6ec9c21a9fefc98088729d9b29b9..7665b4a1b876e63864455654e968c8f54762c0d3 100644 (file)
@@ -2,12 +2,12 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "crimson/os/seastore/btree/btree_types.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
+#include "crimson/os/seastore/lba/lba_btree_node.h"
 #include "crimson/os/seastore/backref/backref_tree_node.h"
 
 namespace crimson::os::seastore {
 
-namespace lba_manager::btree {
+namespace lba {
 
 std::ostream& operator<<(std::ostream& out, const lba_map_val_t& v)
 {
@@ -19,7 +19,7 @@ std::ostream& operator<<(std::ostream& out, const lba_map_val_t& v)
              << ")";
 }
 
-} // namespace lba_manager::btree
+} // namespace lba
 
 namespace backref {
 
@@ -36,7 +36,7 @@ namespace {
 template <typename key_t, typename T>
 bool modified_since(T &&extent, uint64_t iter_modifications) {
   using backref::BackrefLeafNode;
-  using lba_manager::btree::LBALeafNode;
+  using lba::LBALeafNode;
   if constexpr (std::is_same_v<key_t, laddr_t>) {
     assert(extent->get_type() == extent_types_t::LADDR_LEAF);
     auto leaf = extent->template cast<LBALeafNode>();
@@ -64,7 +64,7 @@ bool BtreeCursor<key_t, val_t>::is_viewable() const {
   return viewable;
 }
 
-template struct BtreeCursor<laddr_t, lba_manager::btree::lba_map_val_t>;
+template struct BtreeCursor<laddr_t, lba::lba_map_val_t>;
 template struct BtreeCursor<paddr_t, backref::backref_map_val_t>;
 
 } // namespace crimson::os::seastore
index cd616ee6e96cac2855e518be5d9c99dff089283a..1a0d45fbc2a9e293a2172e7e5d6e68b43e581555 100644 (file)
@@ -100,7 +100,7 @@ struct __attribute__((packed)) fixed_kv_node_meta_le_t {
   }
 };
 
-namespace lba_manager::btree {
+namespace lba {
 
 /**
  * lba_map_val_t
@@ -150,7 +150,7 @@ struct __attribute__((packed)) lba_map_val_le_t {
   }
 };
 
-} // namespace lba_manager::btree
+} // namespace lba
 
 namespace backref {
 
@@ -216,7 +216,7 @@ struct BtreeCursor {
        pos(pos)
   {
     if constexpr (std::is_same_v<key_t, laddr_t>) {
-      static_assert(std::is_same_v<val_t, lba_manager::btree::lba_map_val_t>,
+      static_assert(std::is_same_v<val_t, lba::lba_map_val_t>,
         "the value type of laddr_t for BtreeCursor should be lba_map_val_t");
     } else {
       static_assert(std::is_same_v<key_t, paddr_t>,
@@ -251,8 +251,8 @@ struct BtreeCursor {
   }
 };
 
-struct LBACursor : BtreeCursor<laddr_t, lba_manager::btree::lba_map_val_t> {
-  using Base = BtreeCursor<laddr_t, lba_manager::btree::lba_map_val_t>;
+struct LBACursor : BtreeCursor<laddr_t, lba::lba_map_val_t> {
+  using Base = BtreeCursor<laddr_t, lba::lba_map_val_t>;
   using Base::BtreeCursor;
   bool is_indirect() const {
     assert(!is_end());
index f9ca41863290ffe660e085bf3acf20a627d932da..103e24ad151551bcb4ff08986f9fddd0602404fa 100644 (file)
@@ -178,7 +178,7 @@ public:
       assert(!is_end());
       auto ret = leaf.node->iter_idx(leaf.pos).get_val();
       if constexpr (
-        std::is_same_v<crimson::os::seastore::lba_manager::btree::lba_map_val_t,
+        std::is_same_v<crimson::os::seastore::lba::lba_map_val_t,
                        node_val_t>) {
         if (ret.pladdr.is_paddr()) {
           ret.pladdr = ret.pladdr.get_paddr().maybe_relative_to(
index 7762d1344de3c7ab387776ebf65cc9e7a52d5357..cf83fa93042fd06edbab4383c36a7fec73ebd3e3 100644 (file)
@@ -14,7 +14,7 @@
 
 // included for get_extent_by_type
 #include "crimson/os/seastore/collection_manager/collection_flat_node.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
+#include "crimson/os/seastore/lba/lba_btree_node.h"
 #include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h"
 #include "crimson/os/seastore/object_data_handler.h"
 #include "crimson/os/seastore/collection_manager/collection_flat_node.h"
@@ -1090,9 +1090,9 @@ CachedExtentRef Cache::alloc_new_non_data_extent_by_type(
     ceph_assert(0 == "ROOT is never directly alloc'd");
     return CachedExtentRef();
   case extent_types_t::LADDR_INTERNAL:
-    return alloc_new_non_data_extent<lba_manager::btree::LBAInternalNode>(t, length, hint, gen);
+    return alloc_new_non_data_extent<lba::LBAInternalNode>(t, length, hint, gen);
   case extent_types_t::LADDR_LEAF:
-    return alloc_new_non_data_extent<lba_manager::btree::LBALeafNode>(
+    return alloc_new_non_data_extent<lba::LBALeafNode>(
       t, length, hint, gen);
   case extent_types_t::ROOT_META:
     return alloc_new_non_data_extent<RootMetaBlock>(
@@ -1449,7 +1449,7 @@ record_t Cache::prepare_record(
     if (i->is_logical()) {
       fresh_laddr = i->cast<LogicalCachedExtent>()->get_laddr();
     } else if (is_lba_node(i->get_type())) {
-      fresh_laddr = i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin;
+      fresh_laddr = i->cast<lba::LBANode>()->get_node_meta().begin;
     } else {
       fresh_laddr = L_ADDR_NULL;
     }
@@ -1468,7 +1468,7 @@ record_t Cache::prepare_record(
       if (i->is_logical()) {
        alloc_laddr = i->cast<LogicalCachedExtent>()->get_laddr();
       } else if (is_lba_node(i->get_type())) {
-       alloc_laddr = i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin;
+       alloc_laddr = i->cast<lba::LBANode>()->get_node_meta().begin;
       } else {
        assert(i->get_type() == extent_types_t::TEST_BLOCK_PHYSICAL);
        alloc_laddr = L_ADDR_MIN;
@@ -1494,7 +1494,7 @@ record_t Cache::prepare_record(
         alloc_laddr = i->cast<LogicalCachedExtent>()->get_laddr();
       } else {
         assert(is_lba_node(i->get_type()));
-        alloc_laddr = i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin;
+        alloc_laddr = i->cast<lba::LBANode>()->get_node_meta().begin;
       }
       alloc_delta.alloc_blk_ranges.emplace_back(
        alloc_blk_t::create_alloc(
@@ -1808,7 +1808,7 @@ void Cache::complete_commit(
       if (i->is_logical()) {
        alloc_laddr = i->cast<LogicalCachedExtent>()->get_laddr();
       } else if (is_lba_node(i->get_type())) {
-       alloc_laddr = i->cast<lba_manager::btree::LBANode>()->get_node_meta().begin;
+       alloc_laddr = i->cast<lba::LBANode>()->get_node_meta().begin;
       } else {
        assert(i->get_type() == extent_types_t::TEST_BLOCK_PHYSICAL);
        alloc_laddr = L_ADDR_MIN;
@@ -2253,13 +2253,13 @@ Cache::do_get_caching_extent_by_type(
        return CachedExtentRef(extent.detach(), false /* add_ref */);
       });
     case extent_types_t::LADDR_INTERNAL:
-      return do_get_caching_extent<lba_manager::btree::LBAInternalNode>(
+      return do_get_caching_extent<lba::LBAInternalNode>(
        offset, length, std::move(extent_init_func), std::move(on_cache), p_src
       ).safe_then([](auto extent) {
        return CachedExtentRef(extent.detach(), false /* add_ref */);
       });
     case extent_types_t::LADDR_LEAF:
-      return do_get_caching_extent<lba_manager::btree::LBALeafNode>(
+      return do_get_caching_extent<lba::LBALeafNode>(
        offset, length, std::move(extent_init_func), std::move(on_cache), p_src
       ).safe_then([](auto extent) {
        return CachedExtentRef(extent.detach(), false /* add_ref */);
diff --git a/src/crimson/os/seastore/lba/btree_lba_manager.cc b/src/crimson/os/seastore/lba/btree_lba_manager.cc
new file mode 100644 (file)
index 0000000..3c613db
--- /dev/null
@@ -0,0 +1,1102 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <sys/mman.h>
+#include <string.h>
+
+#include <seastar/core/metrics.hh>
+
+#include "include/buffer.h"
+#include "crimson/os/seastore/lba/btree_lba_manager.h"
+#include "crimson/os/seastore/lba/lba_btree_node.h"
+#include "crimson/os/seastore/logging.h"
+
+SET_SUBSYS(seastore_lba);
+/*
+ * levels:
+ * - INFO:  mkfs
+ * - DEBUG: modification operations
+ * - TRACE: read operations, DEBUG details
+ */
+
+template <> struct fmt::formatter<
+  crimson::os::seastore::lba::LBABtree::iterator>
+    : public fmt::formatter<std::string_view>
+{
+  using Iter = crimson::os::seastore::lba::LBABtree::iterator;
+
+  template <typename FmtCtx>
+  auto format(const Iter &iter, FmtCtx &ctx) const
+      -> decltype(ctx.out()) {
+    if (iter.is_end()) {
+      return fmt::format_to(ctx.out(), "end");
+    }
+    return fmt::format_to(ctx.out(), "{}~{}", iter.get_key(), iter.get_val());
+  }
+};
+
+namespace crimson::os::seastore {
+
+template <typename T>
+Transaction::tree_stats_t& get_tree_stats(Transaction &t)
+{
+  return t.get_lba_tree_stats();
+}
+
+template Transaction::tree_stats_t&
+get_tree_stats<
+  crimson::os::seastore::lba::LBABtree>(
+  Transaction &t);
+
+template <typename T>
+phy_tree_root_t& get_phy_tree_root(root_t &r)
+{
+  return r.lba_root;
+}
+
+template phy_tree_root_t&
+get_phy_tree_root<
+  crimson::os::seastore::lba::LBABtree>(root_t &r);
+
+template <>
+const get_phy_tree_root_node_ret get_phy_tree_root_node<
+  crimson::os::seastore::lba::LBABtree>(
+  const RootBlockRef &root_block, op_context_t c)
+{
+  auto lba_root = root_block->lba_root_node;
+  if (lba_root) {
+    ceph_assert(lba_root->is_initial_pending()
+      == root_block->is_pending());
+    return {true,
+            c.cache.get_extent_viewable_by_trans(c.trans, lba_root)};
+  } else if (root_block->is_pending()) {
+    auto &prior = static_cast<RootBlock&>(*root_block->get_prior_instance());
+    lba_root = prior.lba_root_node;
+    if (lba_root) {
+      return {true,
+              c.cache.get_extent_viewable_by_trans(c.trans, lba_root)};
+    } else {
+      return {false,
+              Cache::get_extent_iertr::make_ready_future<CachedExtentRef>()};
+    }
+  } else {
+    return {false,
+            Cache::get_extent_iertr::make_ready_future<CachedExtentRef>()};
+  }
+}
+
+template <typename RootT>
+class TreeRootLinker<RootBlock, RootT> {
+public:
+  static void link_root(RootBlockRef &root_block, RootT* lba_root) {
+    root_block->lba_root_node = lba_root;
+    ceph_assert(lba_root != nullptr);
+    lba_root->parent_of_root = root_block;
+  }
+  static void unlink_root(RootBlockRef &root_block) {
+    root_block->lba_root_node = nullptr;
+  }
+};
+
+template class TreeRootLinker<RootBlock, lba::LBAInternalNode>;
+template class TreeRootLinker<RootBlock, lba::LBALeafNode>;
+
+}
+
+namespace crimson::os::seastore::lba {
+
+BtreeLBAManager::mkfs_ret
+BtreeLBAManager::mkfs(
+  Transaction &t)
+{
+  LOG_PREFIX(BtreeLBAManager::mkfs);
+  INFOT("start", t);
+  return cache.get_root(t).si_then([this, &t](auto croot) {
+    assert(croot->is_mutation_pending());
+    croot->get_root().lba_root = LBABtree::mkfs(croot, get_context(t));
+    return mkfs_iertr::now();
+  }).handle_error_interruptible(
+    mkfs_iertr::pass_further{},
+    crimson::ct_error::assert_all{
+      "Invalid error in BtreeLBAManager::mkfs"
+    }
+  );
+}
+
+BtreeLBAManager::get_mappings_ret
+BtreeLBAManager::get_mappings(
+  Transaction &t,
+  laddr_t laddr,
+  extent_len_t length)
+{
+  LOG_PREFIX(BtreeLBAManager::get_mappings);
+  TRACET("{}~0x{:x} ...", t, laddr, length);
+  auto c = get_context(t);
+  return with_btree_state<LBABtree, lba_mapping_list_t>(
+    cache, c,
+    [FNAME, this, c, laddr, length](auto& btree, auto& ret)
+  {
+    return get_cursors(c, btree, laddr, length
+    ).si_then([FNAME, this, c, laddr, length, &btree, &ret](auto cursors) {
+      return seastar::do_with(
+        std::move(cursors),
+        [FNAME, this, c, laddr, length, &btree, &ret](auto& cursors)
+      {
+        return trans_intr::do_for_each(
+          cursors,
+          [FNAME, this, c, laddr, length, &btree, &ret](auto& cursor)
+        {
+          if (!cursor->is_indirect()) {
+            ret.emplace_back(LBAMapping::create_direct(std::move(cursor)));
+            TRACET("{}~0x{:x} got {}",
+                   c.trans, laddr, length, ret.back());
+            return get_mappings_iertr::now();
+          }
+         assert(cursor->val->refcount == EXTENT_DEFAULT_REF_COUNT);
+         assert(cursor->val->checksum == 0);
+          return resolve_indirect_cursor(c, btree, *cursor
+          ).si_then([FNAME, c, &ret, &cursor, laddr, length](auto direct) {
+            ret.emplace_back(LBAMapping::create_indirect(
+               std::move(direct), std::move(cursor)));
+            TRACET("{}~0x{:x} got {}",
+                   c.trans, laddr, length, ret.back());
+            return get_mappings_iertr::now();
+          });
+        });
+      });
+    });
+  });
+}
+
+BtreeLBAManager::_get_cursors_ret
+BtreeLBAManager::get_cursors(
+  op_context_t c,
+  LBABtree& btree,
+  laddr_t laddr,
+  extent_len_t length)
+{
+  LOG_PREFIX(BtreeLBAManager::get_cursors);
+  TRACET("{}~0x{:x} ...", c.trans, laddr, length);
+  return seastar::do_with(
+    std::list<LBACursorRef>(),
+    [FNAME, c, laddr, length, &btree](auto& ret)
+  {
+    return LBABtree::iterate_repeat(
+      c,
+      btree.upper_bound_right(c, laddr),
+      [FNAME, c, laddr, length, &ret](auto& pos)
+    {
+      if (pos.is_end() || pos.get_key() >= (laddr + length)) {
+        TRACET("{}~0x{:x} done with {} results, stop at {}",
+               c.trans, laddr, length, ret.size(), pos);
+        return LBABtree::iterate_repeat_ret_inner(
+          interruptible::ready_future_marker{},
+          seastar::stop_iteration::yes);
+      }
+      TRACET("{}~0x{:x} got {}, repeat ...",
+             c.trans, laddr, length, pos);
+      ceph_assert((pos.get_key() + pos.get_val().len) > laddr);
+      ret.emplace_back(pos.get_cursor(c));
+      return LBABtree::iterate_repeat_ret_inner(
+        interruptible::ready_future_marker{},
+        seastar::stop_iteration::no);
+    }).si_then([&ret] {
+      return std::move(ret);
+    });
+  });
+}
+
+BtreeLBAManager::resolve_indirect_cursor_ret
+BtreeLBAManager::resolve_indirect_cursor(
+  op_context_t c,
+  LBABtree& btree,
+  const LBACursor &indirect_cursor)
+{
+  ceph_assert(indirect_cursor.is_indirect());
+  return get_cursors(
+    c,
+    btree,
+    indirect_cursor.get_intermediate_key(),
+    indirect_cursor.get_length()
+  ).si_then([&indirect_cursor](auto cursors) {
+    ceph_assert(cursors.size() == 1);
+    auto& direct_cursor = cursors.front();
+    auto intermediate_key = indirect_cursor.get_intermediate_key();
+    assert(!direct_cursor->is_indirect());
+    assert(direct_cursor->get_laddr() <= intermediate_key);
+    assert(direct_cursor->get_laddr() + direct_cursor->get_length()
+          >= intermediate_key + indirect_cursor.get_length());
+    return std::move(direct_cursor);
+  });
+}
+
+BtreeLBAManager::get_mapping_ret
+BtreeLBAManager::get_mapping(
+  Transaction &t,
+  laddr_t laddr)
+{
+  LOG_PREFIX(BtreeLBAManager::get_mapping);
+  TRACET("{} ...", t, laddr);
+  auto c = get_context(t);
+  return with_btree<LBABtree>(
+    cache, c,
+    [FNAME, this, c, laddr](auto& btree)
+  {
+    return get_cursor(c, btree, laddr
+    ).si_then([FNAME, this, c, laddr, &btree](LBACursorRef cursor) {
+      if (!cursor->is_indirect()) {
+        TRACET("{} got direct cursor {}",
+               c.trans, laddr, *cursor);
+       auto mapping = LBAMapping::create_direct(std::move(cursor));
+        return get_mapping_iertr::make_ready_future<
+         LBAMapping>(std::move(mapping));
+      }
+      assert(laddr == cursor->get_laddr());
+      assert(cursor->val->refcount == EXTENT_DEFAULT_REF_COUNT);
+      assert(cursor->val->checksum == 0);
+      return resolve_indirect_cursor(c, btree, *cursor
+      ).si_then([FNAME, c, laddr, indirect=std::move(cursor)]
+               (auto direct) mutable {
+       auto mapping = LBAMapping::create_indirect(
+         std::move(direct), std::move(indirect));
+        TRACET("{} got indirect mapping {}",
+               c.trans, laddr, mapping);
+        return get_mapping_iertr::make_ready_future<
+         LBAMapping>(std::move(mapping));
+      });
+    });
+  });
+}
+
+BtreeLBAManager::_get_cursor_ret
+BtreeLBAManager::get_cursor(
+  op_context_t c,
+  LBABtree& btree,
+  laddr_t laddr)
+{
+  LOG_PREFIX(BtreeLBAManager::get_cursor);
+  TRACET("{} ...", c.trans, laddr);
+  return btree.lower_bound(
+    c, laddr
+  ).si_then([FNAME, c, laddr](auto iter) -> _get_cursor_ret {
+    if (iter.is_end() || iter.get_key() != laddr) {
+      ERRORT("{} doesn't exist", c.trans, laddr);
+      return crimson::ct_error::enoent::make();
+    }
+    TRACET("{} got value {}", c.trans, laddr, iter.get_val());
+    return _get_cursor_ret(
+      interruptible::ready_future_marker{},
+      iter.get_cursor(c));
+  });
+}
+
+BtreeLBAManager::search_insert_position_ret
+BtreeLBAManager::search_insert_position(
+  op_context_t c,
+  LBABtree &btree,
+  laddr_t hint,
+  extent_len_t length,
+  alloc_policy_t policy)
+{
+  LOG_PREFIX(BtreeLBAManager::search_insert_position);
+  auto lookup_attempts = stats.num_alloc_extents_iter_nexts;
+  using OptIter = std::optional<LBABtree::iterator>;
+  return seastar::do_with(
+    hint, OptIter(std::nullopt),
+    [this, c, &btree, hint, length, lookup_attempts, policy, FNAME]
+    (laddr_t &last_end, OptIter &insert_iter)
+  {
+    return LBABtree::iterate_repeat(
+      c,
+      btree.upper_bound_right(c, hint),
+      [this, c, hint, length, lookup_attempts, policy,
+       &last_end, &insert_iter, FNAME](auto &iter)
+    {
+      ++stats.num_alloc_extents_iter_nexts;
+      if (iter.is_end() ||
+         iter.get_key() >= (last_end + length)) {
+       if (policy == alloc_policy_t::deterministic) {
+         ceph_assert(hint == last_end);
+       }
+       DEBUGT("hint: {}~0x{:x}, allocated laddr: {}, insert position: {}, "
+              "done with {} attempts",
+              c.trans, hint, length, last_end, iter,
+              stats.num_alloc_extents_iter_nexts - lookup_attempts);
+       insert_iter.emplace(iter);
+       return search_insert_position_iertr::make_ready_future<
+         seastar::stop_iteration>(seastar::stop_iteration::yes);
+      }
+      ceph_assert(policy == alloc_policy_t::linear_search);
+      last_end = (iter.get_key() + iter.get_val().len).checked_to_laddr();
+      TRACET("hint: {}~0x{:x}, current iter: {}, repeat ...",
+            c.trans, hint, length, iter);
+      return search_insert_position_iertr::make_ready_future<
+       seastar::stop_iteration>(seastar::stop_iteration::no);
+    }).si_then([&last_end, &insert_iter] {
+      ceph_assert(insert_iter);
+      return search_insert_position_iertr::make_ready_future<
+       insert_position_t>(last_end, *std::move(insert_iter));
+    });
+  });
+}
+
+BtreeLBAManager::alloc_mappings_ret
+BtreeLBAManager::alloc_contiguous_mappings(
+  Transaction &t,
+  laddr_t hint,
+  std::vector<alloc_mapping_info_t> &alloc_infos,
+  alloc_policy_t policy)
+{
+  ceph_assert(hint != L_ADDR_NULL);
+  extent_len_t total_len = 0;
+  for (auto &info : alloc_infos) {
+    assert(info.key == L_ADDR_NULL);
+    total_len += info.value.len;
+  }
+
+  auto c = get_context(t);
+  return with_btree<LBABtree>(
+    cache,
+    c,
+    [this, c, hint, &alloc_infos, total_len, policy](auto &btree)
+  {
+    return search_insert_position(c, btree, hint, total_len, policy
+    ).si_then([this, c, &alloc_infos, &btree](insert_position_t res) {
+      extent_len_t offset = 0;
+      for (auto &info : alloc_infos) {
+       info.key = (res.laddr + offset).checked_to_laddr();
+       offset += info.value.len;
+      }
+      return insert_mappings(
+       c, btree, std::move(res.insert_iter), alloc_infos);
+    });
+  });
+}
+
+BtreeLBAManager::alloc_mappings_ret
+BtreeLBAManager::alloc_sparse_mappings(
+  Transaction &t,
+  laddr_t hint,
+  std::vector<alloc_mapping_info_t> &alloc_infos,
+  alloc_policy_t policy)
+{
+  ceph_assert(hint != L_ADDR_NULL);
+#ifndef NDEBUG
+  assert(alloc_infos.front().key != L_ADDR_NULL);
+  for (size_t i = 1; i < alloc_infos.size(); i++) {
+    auto &prev = alloc_infos[i - 1];
+    auto &cur = alloc_infos[i];
+    assert(cur.key != L_ADDR_NULL);
+    assert(prev.key + prev.value.len <= cur.key);
+  }
+#endif
+  auto total_len = hint.get_byte_distance<extent_len_t>(
+    alloc_infos.back().key + alloc_infos.back().value.len);
+  auto c = get_context(t);
+  return with_btree<LBABtree>(
+    cache,
+    c,
+    [this, c, hint, &alloc_infos, total_len, policy](auto &btree)
+  {
+    return search_insert_position(c, btree, hint, total_len, policy
+    ).si_then([this, c, hint, &alloc_infos, &btree, policy](auto res) {
+      if (policy != alloc_policy_t::deterministic) {
+       for (auto &info : alloc_infos) {
+         auto offset = info.key.get_byte_distance<extent_len_t>(hint);
+         info.key = (res.laddr + offset).checked_to_laddr();
+       }
+      } // deterministic guarantees hint == res.laddr
+      return insert_mappings(
+       c, btree, std::move(res.insert_iter), alloc_infos);
+    });
+  });
+}
+
+BtreeLBAManager::alloc_mappings_ret
+BtreeLBAManager::insert_mappings(
+  op_context_t c,
+  LBABtree &btree,
+  LBABtree::iterator iter,
+  std::vector<alloc_mapping_info_t> &alloc_infos)
+{
+  return seastar::do_with(
+    std::move(iter), std::list<LBACursorRef>(),
+    [c, &btree, &alloc_infos]
+    (LBABtree::iterator &iter, std::list<LBACursorRef> &ret)
+  {
+    return trans_intr::do_for_each(
+      alloc_infos.begin(),
+      alloc_infos.end(),
+      [c, &btree, &iter, &ret](auto &info)
+    {
+      assert(info.key != L_ADDR_NULL);
+      return btree.insert(
+       c, iter, info.key, info.value
+      ).si_then([c, &iter, &ret, &info](auto p) {
+       ceph_assert(p.second);
+       iter = std::move(p.first);
+       auto &leaf_node = *iter.get_leaf_node();
+       leaf_node.insert_child_ptr(
+         iter.get_leaf_pos(),
+         info.extent,
+         leaf_node.get_size() - 1 /*the size before the insert*/);
+       if (is_valid_child_ptr(info.extent)) {
+         ceph_assert(info.value.pladdr.is_paddr());
+         assert(info.value.pladdr == iter.get_val().pladdr);
+         assert(info.value.len == iter.get_val().len);
+         assert(info.extent->is_logical());
+         if (info.extent->has_laddr()) {
+           // see TM::remap_pin()
+           assert(info.key == info.extent->get_laddr());
+           assert(info.key == iter.get_key());
+         } else {
+           // see TM::alloc_non_data_extent()
+           //     TM::alloc_data_extents()
+           info.extent->set_laddr(iter.get_key());
+         }
+       }
+       ret.push_back(iter.get_cursor(c));
+       return iter.next(c).si_then([&iter](auto p) {
+         iter = std::move(p);
+       });
+      });
+    }).si_then([&ret] {
+      return alloc_mappings_iertr::make_ready_future<
+       std::list<LBACursorRef>>(std::move(ret));
+    });
+  });
+}
+
+static bool is_lba_node(const CachedExtent &e)
+{
+  return is_lba_node(e.get_type());
+}
+
+BtreeLBAManager::base_iertr::template future<>
+_init_cached_extent(
+  op_context_t c,
+  const CachedExtentRef &e,
+  LBABtree &btree,
+  bool &ret)
+{
+  if (e->is_logical()) {
+    auto logn = e->cast<LogicalChildNode>();
+    return btree.lower_bound(
+      c,
+      logn->get_laddr()
+    ).si_then([e, c, logn, &ret](auto iter) {
+      LOG_PREFIX(BtreeLBAManager::init_cached_extent);
+      if (!iter.is_end() &&
+         iter.get_key() == logn->get_laddr() &&
+         iter.get_val().pladdr.is_paddr() &&
+         iter.get_val().pladdr.get_paddr() == logn->get_paddr()) {
+       assert(!iter.get_leaf_node()->is_pending());
+       iter.get_leaf_node()->link_child(logn.get(), iter.get_leaf_pos());
+       logn->set_laddr(iter.get_key());
+       ceph_assert(iter.get_val().len == e->get_length());
+       DEBUGT("logical extent {} live", c.trans, *logn);
+       ret = true;
+      } else {
+       DEBUGT("logical extent {} not live", c.trans, *logn);
+       ret = false;
+      }
+    });
+  } else {
+    return btree.init_cached_extent(c, e
+    ).si_then([&ret](bool is_alive) {
+      ret = is_alive;
+    });
+  }
+}
+
+BtreeLBAManager::init_cached_extent_ret
+BtreeLBAManager::init_cached_extent(
+  Transaction &t,
+  CachedExtentRef e)
+{
+  LOG_PREFIX(BtreeLBAManager::init_cached_extent);
+  TRACET("{}", t, *e);
+  return seastar::do_with(bool(), [this, e, &t](bool &ret) {
+    auto c = get_context(t);
+    return with_btree<LBABtree>(
+      cache, c,
+      [c, e, &ret](auto &btree) -> base_iertr::future<> {
+       LOG_PREFIX(BtreeLBAManager::init_cached_extent);
+       DEBUGT("extent {}", c.trans, *e);
+       return _init_cached_extent(c, e, btree, ret);
+      }
+    ).si_then([&ret] { return ret; });
+  });
+}
+
+#ifdef UNIT_TESTS_BUILT
+BtreeLBAManager::check_child_trackers_ret
+BtreeLBAManager::check_child_trackers(
+  Transaction &t) {
+  auto c = get_context(t);
+  return with_btree<LBABtree>(
+    cache, c,
+    [c](auto &btree) {
+    return btree.check_child_trackers(c);
+  });
+}
+#endif
+
+BtreeLBAManager::scan_mappings_ret
+BtreeLBAManager::scan_mappings(
+  Transaction &t,
+  laddr_t begin,
+  laddr_t end,
+  scan_mappings_func_t &&f)
+{
+  LOG_PREFIX(BtreeLBAManager::scan_mappings);
+  DEBUGT("begin: {}, end: {}", t, begin, end);
+
+  auto c = get_context(t);
+  return with_btree<LBABtree>(
+    cache,
+    c,
+    [c, f=std::move(f), begin, end](auto &btree) mutable {
+      return LBABtree::iterate_repeat(
+       c,
+       btree.upper_bound_right(c, begin),
+       [f=std::move(f), begin, end](auto &pos) {
+         if (pos.is_end() || pos.get_key() >= end) {
+           return typename LBABtree::iterate_repeat_ret_inner(
+             interruptible::ready_future_marker{},
+             seastar::stop_iteration::yes);
+         }
+         ceph_assert((pos.get_key() + pos.get_val().len) > begin);
+         if (pos.get_val().pladdr.is_paddr()) {
+           f(pos.get_key(), pos.get_val().pladdr.get_paddr(), pos.get_val().len);
+         }
+         return LBABtree::iterate_repeat_ret_inner(
+           interruptible::ready_future_marker{},
+           seastar::stop_iteration::no);
+       });
+    });
+}
+
+BtreeLBAManager::rewrite_extent_ret
+BtreeLBAManager::rewrite_extent(
+  Transaction &t,
+  CachedExtentRef extent)
+{
+  LOG_PREFIX(BtreeLBAManager::rewrite_extent);
+  if (extent->has_been_invalidated()) {
+    ERRORT("extent has been invalidated -- {}", t, *extent);
+    ceph_abort();
+  }
+  assert(!extent->is_logical());
+
+  if (is_lba_node(*extent)) {
+    DEBUGT("rewriting lba extent -- {}", t, *extent);
+    auto c = get_context(t);
+    return with_btree<LBABtree>(
+      cache,
+      c,
+      [c, extent](auto &btree) mutable {
+       return btree.rewrite_extent(c, extent);
+      });
+  } else {
+    DEBUGT("skip non lba extent -- {}", t, *extent);
+    return rewrite_extent_iertr::now();
+  }
+}
+
+BtreeLBAManager::update_mapping_ret
+BtreeLBAManager::update_mapping(
+  Transaction& t,
+  laddr_t laddr,
+  extent_len_t prev_len,
+  paddr_t prev_addr,
+  LogicalChildNode& nextent)
+{
+  LOG_PREFIX(BtreeLBAManager::update_mapping);
+  auto addr = nextent.get_paddr();
+  auto len = nextent.get_length();
+  auto checksum = nextent.get_last_committed_crc();
+  TRACET("laddr={}, paddr {}~0x{:x} => {}~0x{:x}, crc=0x{:x}",
+         t, laddr, prev_addr, prev_len, addr, len, checksum);
+  assert(laddr == nextent.get_laddr());
+  assert(!addr.is_null());
+  return _update_mapping(
+    t,
+    laddr,
+    [prev_addr, addr, prev_len, len, checksum]
+    (const lba_map_val_t &in) {
+      lba_map_val_t ret = in;
+      ceph_assert(in.pladdr.is_paddr());
+      ceph_assert(in.pladdr.get_paddr() == prev_addr);
+      ceph_assert(in.len == prev_len);
+      ret.pladdr = addr;
+      ret.len = len;
+      ret.checksum = checksum;
+      return ret;
+    },
+    &nextent
+  ).si_then([&t, laddr, prev_addr, prev_len, addr, len, checksum, FNAME](auto res) {
+      assert(res.is_alive_mapping());
+      DEBUGT("laddr={}, paddr {}~0x{:x} => {}~0x{:x}, crc=0x{:x} done -- {}",
+             t, laddr, prev_addr, prev_len, addr, len, checksum, res.get_cursor());
+      return update_mapping_iertr::make_ready_future<
+       extent_ref_count_t>(res.get_cursor().get_refcount());
+    },
+    update_mapping_iertr::pass_further{},
+    /* ENOENT in particular should be impossible */
+    crimson::ct_error::assert_all{
+      "Invalid error in BtreeLBAManager::update_mapping"
+    }
+  );
+}
+
+BtreeLBAManager::update_mappings_ret
+BtreeLBAManager::update_mappings(
+  Transaction& t,
+  const std::list<LogicalChildNodeRef>& extents)
+{
+  return trans_intr::do_for_each(extents, [this, &t](auto &extent) {
+    LOG_PREFIX(BtreeLBAManager::update_mappings);
+    auto laddr = extent->get_laddr();
+    auto prev_addr = extent->get_prior_paddr_and_reset();
+    auto len = extent->get_length();
+    auto addr = extent->get_paddr();
+    auto checksum = extent->get_last_committed_crc();
+    TRACET("laddr={}, paddr {}~0x{:x} => {}, crc=0x{:x}",
+           t, laddr, prev_addr, len, addr, checksum);
+    assert(!addr.is_null());
+    return _update_mapping(
+      t,
+      laddr,
+      [prev_addr, addr, len, checksum](
+        const lba_map_val_t &in) {
+        lba_map_val_t ret = in;
+        ceph_assert(in.pladdr.is_paddr());
+        ceph_assert(in.pladdr.get_paddr() == prev_addr);
+        ceph_assert(in.len == len);
+        ret.pladdr = addr;
+        ret.checksum = checksum;
+        return ret;
+      },
+      nullptr   // all the extents should have already been
+                // added to the fixed_kv_btree
+    ).si_then([&t, laddr, prev_addr, len, addr, checksum, FNAME](auto res) {
+        DEBUGT("laddr={}, paddr {}~0x{:x} => {}, crc=0x{:x} done -- {}",
+               t, laddr, prev_addr, len, addr, checksum, res.get_cursor());
+        return update_mapping_iertr::make_ready_future();
+      },
+      update_mapping_iertr::pass_further{},
+      /* ENOENT in particular should be impossible */
+      crimson::ct_error::assert_all{
+        "Invalid error in BtreeLBAManager::update_mappings"
+      }
+    );
+  });
+}
+
+BtreeLBAManager::get_physical_extent_if_live_ret
+BtreeLBAManager::get_physical_extent_if_live(
+  Transaction &t,
+  extent_types_t type,
+  paddr_t addr,
+  laddr_t laddr,
+  extent_len_t len)
+{
+  LOG_PREFIX(BtreeLBAManager::get_physical_extent_if_live);
+  DEBUGT("{}, laddr={}, paddr={}, length={}",
+         t, type, laddr, addr, len);
+  ceph_assert(is_lba_node(type));
+  auto c = get_context(t);
+  return with_btree_ret<LBABtree, CachedExtentRef>(
+    cache,
+    c,
+    [c, type, addr, laddr, len](auto &btree) {
+      if (type == extent_types_t::LADDR_INTERNAL) {
+       return btree.get_internal_if_live(c, addr, laddr, len);
+      } else {
+       assert(type == extent_types_t::LADDR_LEAF ||
+              type == extent_types_t::DINK_LADDR_LEAF);
+       return btree.get_leaf_if_live(c, addr, laddr, len);
+      }
+    });
+}
+
+BtreeLBAManager::refresh_lba_mapping_ret
+BtreeLBAManager::refresh_lba_mapping(Transaction &t, LBAMapping mapping)
+{
+  assert(mapping.is_linked_direct());
+  if (mapping.is_viewable()) {
+    return refresh_lba_mapping_iertr::make_ready_future<
+      LBAMapping>(std::move(mapping));
+  }
+  auto c = get_context(t);
+  return with_btree_state<LBABtree, LBAMapping>(
+    cache,
+    c,
+    std::move(mapping),
+    [c, this](LBABtree &btree, LBAMapping &mapping) mutable
+  {
+    return refresh_lba_cursor(c, btree, *mapping.direct_cursor
+    ).si_then([c, this, &btree, &mapping] {
+      if (mapping.indirect_cursor) {
+       return refresh_lba_cursor(c, btree, *mapping.indirect_cursor);
+      }
+      return refresh_lba_cursor_iertr::make_ready_future();
+#ifndef NDEBUG
+    }).si_then([&mapping] {
+      assert(mapping.is_viewable());
+#endif
+    });
+  });
+}
+
+BtreeLBAManager::refresh_lba_cursor_ret
+BtreeLBAManager::refresh_lba_cursor(
+  op_context_t c,
+  LBABtree &btree,
+  LBACursor &cursor)
+{
+  LOG_PREFIX(BtreeLBAManager::refresh_lba_cursor);
+  stats.num_refresh_parent_total++;
+
+  if (!cursor.parent->is_valid()) {
+    stats.num_refresh_invalid_parent++;
+    TRACET("cursor {} parent is invalid, re-search from scratch",
+          c.trans, cursor);
+    return btree.lower_bound(c, cursor.get_laddr()
+    ).si_then([&cursor](LBABtree::iterator iter) {
+      auto leaf = iter.get_leaf_node();
+      cursor.parent = leaf;
+      cursor.modifications = leaf->modifications;
+      cursor.pos = iter.get_leaf_pos();
+      if (!cursor.is_end()) {
+       ceph_assert(!iter.is_end());
+       ceph_assert(iter.get_key() == cursor.get_laddr());
+       cursor.val = iter.get_val();
+       assert(cursor.is_viewable());
+      }
+    });
+  }
+
+  auto [viewable, state] = cursor.parent->is_viewable_by_trans(c.trans);
+  auto leaf = cursor.parent->cast<LBALeafNode>();
+
+  TRACET("cursor: {} viewable: {} state: {}",
+        c.trans, cursor, viewable, state);
+
+  if (!viewable) {
+    stats.num_refresh_unviewable_parent++;
+    leaf = leaf->find_pending_version(c.trans, cursor.get_laddr());
+    cursor.parent = leaf;
+  }
+
+  if (!viewable ||
+      leaf->modified_since(cursor.modifications)) {
+    if (viewable) {
+      stats.num_refresh_modified_viewable_parent++;
+    }
+
+    cursor.modifications = leaf->modifications;
+    if (cursor.is_end()) {
+      cursor.pos = leaf->get_size();
+      assert(!cursor.val);
+    } else {
+      auto i = leaf->lower_bound(cursor.get_laddr());
+      cursor.pos = i.get_offset();
+      cursor.val = i.get_val();
+
+      auto iter = LBALeafNode::iterator(leaf.get(), cursor.pos);
+      ceph_assert(iter.get_key() == cursor.key);
+      ceph_assert(iter.get_val() == cursor.val);
+      assert(cursor.is_viewable());
+    }
+  }
+
+  return refresh_lba_cursor_iertr::make_ready_future();
+}
+
+void BtreeLBAManager::register_metrics()
+{
+  LOG_PREFIX(BtreeLBAManager::register_metrics);
+  DEBUG("start");
+  stats = {};
+  namespace sm = seastar::metrics;
+  metrics.add_group(
+    "LBA",
+    {
+      sm::make_counter(
+        "alloc_extents",
+        stats.num_alloc_extents,
+        sm::description("total number of lba alloc_extent operations")
+      ),
+      sm::make_counter(
+        "alloc_extents_iter_nexts",
+        stats.num_alloc_extents_iter_nexts,
+        sm::description("total number of iterator next operations during extent allocation")
+      ),
+      sm::make_counter(
+        "refresh_parent_total",
+        stats.num_refresh_parent_total,
+        sm::description("total number of refreshed cursors")
+      ),
+      sm::make_counter(
+        "refresh_invalid_parent",
+        stats.num_refresh_invalid_parent,
+        sm::description("total number of refreshed cursors with invalid parents")
+      ),
+      sm::make_counter(
+        "refresh_unviewable_parent",
+        stats.num_refresh_unviewable_parent,
+        sm::description("total number of refreshed cursors with unviewable parents")
+      ),
+      sm::make_counter(
+        "refresh_modified_viewable_parent",
+        stats.num_refresh_modified_viewable_parent,
+        sm::description("total number of refreshed cursors with viewable but modified parents")
+      ),
+    }
+  );
+}
+
+BtreeLBAManager::_decref_intermediate_ret
+BtreeLBAManager::_decref_intermediate(
+  Transaction &t,
+  laddr_t addr,
+  extent_len_t len)
+{
+  auto c = get_context(t);
+  return with_btree<LBABtree>(
+    cache,
+    c,
+    [c, addr, len](auto &btree) mutable {
+    return btree.upper_bound_right(
+      c, addr
+    ).si_then([&btree, addr, len, c](auto iter) {
+      ceph_assert(!iter.is_end());
+      laddr_t key = iter.get_key();
+      ceph_assert(key <= addr);
+      auto val = iter.get_val();
+      ceph_assert(key + val.len >= addr + len);
+      ceph_assert(val.pladdr.is_paddr());
+      ceph_assert(val.refcount >= 1);
+      val.refcount -= 1;
+
+      LOG_PREFIX(BtreeLBAManager::_decref_intermediate);
+      TRACET("decreased refcount of intermediate key {} -- {}",
+            c.trans, key, val);
+
+      if (val.refcount == 0) {
+       return btree.remove(c, iter
+       ).si_then([key, val] {
+         return ref_iertr::make_ready_future<
+           update_mapping_ret_bare_t>(key, val);
+       });
+      } else {
+       return btree.update(c, iter, val
+       ).si_then([c](auto iter) {
+         return ref_iertr::make_ready_future<
+           update_mapping_ret_bare_t>(iter.get_cursor(c));
+       });
+      }
+    });
+  });
+}
+
+BtreeLBAManager::remap_ret
+BtreeLBAManager::remap_mappings(
+  Transaction &t,
+  LBAMapping orig_mapping,
+  std::vector<remap_entry_t> remaps,
+  std::vector<LogicalChildNodeRef> extents)
+{
+  LOG_PREFIX(BtreeLBAManager::remap_mappings);
+  struct state_t {
+    LBAMapping orig_mapping;
+    std::vector<remap_entry_t> remaps;
+    std::vector<LogicalChildNodeRef> extents;
+    std::vector<alloc_mapping_info_t> alloc_infos;
+    std::vector<LBAMapping> ret;
+  };
+  return seastar::do_with(
+    state_t(std::move(orig_mapping), std::move(remaps), std::move(extents), {}, {}),
+    [this, &t, FNAME](state_t &state)
+  {
+    return update_refcount(
+      t, state.orig_mapping.get_key(), -1, false
+    ).si_then([this, &t, &state, FNAME](auto ret) {
+      // Remapping the shared direct mapping is prohibited,
+      // the refcount of indirect mapping should always be 1.
+      ceph_assert(ret.is_removed_mapping());
+
+      auto orig_laddr = state.orig_mapping.get_key();
+      if (!state.orig_mapping.is_indirect()) {
+       auto &addr = ret.get_removed_mapping().map_value.pladdr;
+       ceph_assert(addr.is_paddr() && !addr.get_paddr().is_zero());
+       return alloc_extents(
+         t,
+         (state.remaps.front().offset + orig_laddr).checked_to_laddr(),
+         std::move(state.extents),
+         EXTENT_DEFAULT_REF_COUNT
+       ).si_then([&state](auto ret) {
+         state.ret = std::move(ret);
+         return remap_iertr::make_ready_future();
+       });
+      }
+
+      extent_len_t orig_len = state.orig_mapping.get_length();
+      auto intermediate_key = state.orig_mapping.get_intermediate_key();
+      ceph_assert(intermediate_key != L_ADDR_NULL);
+      DEBUGT("remap indirect mapping {}", t, state.orig_mapping);
+      for (auto &remap : state.remaps) {
+       DEBUGT("remap 0x{:x}~0x{:x}", t, remap.offset, remap.len);
+       ceph_assert(remap.len != 0);
+       ceph_assert(remap.offset + remap.len <= orig_len);
+       auto remapped_laddr = (orig_laddr + remap.offset)
+           .checked_to_laddr();
+       auto remapped_intermediate_key = (intermediate_key + remap.offset)
+           .checked_to_laddr();
+       state.alloc_infos.emplace_back(
+         alloc_mapping_info_t::create_indirect(
+           remapped_laddr, remap.len, remapped_intermediate_key));
+      }
+
+      return alloc_sparse_mappings(
+       t, state.alloc_infos.front().key, state.alloc_infos,
+       alloc_policy_t::deterministic
+      ).si_then([&t, &state, this](std::list<LBACursorRef> cursors) {
+       return seastar::futurize_invoke([&t, &state, this] {
+         if (state.remaps.size() > 1) {
+           auto base = state.orig_mapping.get_intermediate_base();
+           return update_refcount(
+             t, base, state.remaps.size() - 1, false
+           ).si_then([](update_mapping_ret_bare_t ret) {
+             return ret.take_cursor();
+           });
+         } else {
+           return remap_iertr::make_ready_future<
+             LBACursorRef>(state.orig_mapping.direct_cursor->duplicate());
+         }
+       }).si_then([&state, cursors=std::move(cursors)](auto direct) mutable {
+         for (auto &cursor : cursors) {
+           state.ret.emplace_back(LBAMapping::create_indirect(
+             direct->duplicate(), std::move(cursor)));
+         }
+         return remap_iertr::make_ready_future();
+       });
+      });
+    }).si_then([&state] {
+      assert(state.ret.size() == state.remaps.size());
+#ifndef NDEBUG
+      auto mapping_it = state.ret.begin();
+      auto remap_it = state.remaps.begin();
+      for (;mapping_it != state.ret.end(); mapping_it++, remap_it++) {
+       auto &mapping = *mapping_it;
+       auto &remap = *remap_it;
+       assert(mapping.get_key() == state.orig_mapping.get_key() + remap.offset);
+       assert(mapping.get_length() == remap.len);
+      }
+#endif
+      return remap_iertr::make_ready_future<
+       std::vector<LBAMapping>>(std::move(state.ret));
+    });
+  });
+}
+
+BtreeLBAManager::update_refcount_ret
+BtreeLBAManager::update_refcount(
+  Transaction &t,
+  laddr_t addr,
+  int delta,
+  bool cascade_remove)
+{
+  LOG_PREFIX(BtreeLBAManager::update_refcount);
+  TRACET("laddr={}, delta={}", t, addr, delta);
+  return _update_mapping(
+    t,
+    addr,
+    [delta](const lba_map_val_t &in) {
+      lba_map_val_t out = in;
+      ceph_assert((int)out.refcount + delta >= 0);
+      out.refcount += delta;
+      return out;
+    },
+    nullptr
+  ).si_then([&t, addr, delta, FNAME, this, cascade_remove](auto res) {
+    DEBUGT("laddr={}, delta={} done -- {}",
+          t, addr, delta,
+          res.is_alive_mapping()
+            ? res.get_cursor().val
+            : res.get_removed_mapping().map_value);
+    if (res.is_removed_mapping() && cascade_remove &&
+       res.get_removed_mapping().map_value.pladdr.is_laddr()) {
+      auto &val = res.get_removed_mapping().map_value;
+      TRACET("decref intermediate {} -> {}",
+            t, addr, val.pladdr.get_laddr());
+      return _decref_intermediate(t, val.pladdr.get_laddr(), val.len
+      ).handle_error_interruptible(
+       update_mapping_iertr::pass_further{},
+       crimson::ct_error::assert_all{
+         "unexpect ENOENT"
+       }
+      );
+    }
+    return update_mapping_iertr::make_ready_future<
+      update_mapping_ret_bare_t>(std::move(res));
+  });
+}
+
+BtreeLBAManager::_update_mapping_ret
+BtreeLBAManager::_update_mapping(
+  Transaction &t,
+  laddr_t addr,
+  update_func_t &&f,
+  LogicalChildNode* nextent)
+{
+  auto c = get_context(t);
+  return with_btree<LBABtree>(
+    cache,
+    c,
+    [f=std::move(f), c, addr, nextent](auto &btree) mutable {
+      return btree.lower_bound(
+       c, addr
+      ).si_then([&btree, f=std::move(f), c, addr, nextent](auto iter)
+               -> _update_mapping_ret {
+       if (iter.is_end() || iter.get_key() != addr) {
+         LOG_PREFIX(BtreeLBAManager::_update_mapping);
+         ERRORT("laddr={} doesn't exist", c.trans, addr);
+         return crimson::ct_error::enoent::make();
+       }
+
+       auto ret = f(iter.get_val());
+       if (ret.refcount == 0) {
+         assert(nextent == nullptr);
+         return btree.remove(
+           c,
+           iter
+         ).si_then([addr, ret] {
+           return update_mapping_ret_bare_t(addr, ret);
+         });
+       } else {
+         return btree.update(
+           c,
+           iter,
+           ret
+         ).si_then([c, nextent](auto iter) {
+           if (nextent) {
+             // nextent is provided iff unlinked,
+              // also see TM::rewrite_logical_extent()
+             assert(!nextent->has_parent_tracker());
+             iter.get_leaf_node()->update_child_ptr(
+               iter.get_leaf_pos(), nextent);
+           }
+           assert(!nextent || 
+                  (nextent->has_parent_tracker() &&
+                   nextent->get_parent_node().get() == iter.get_leaf_node().get()));
+           return update_mapping_ret_bare_t(iter.get_cursor(c));
+         });
+       }
+      });
+    });
+}
+
+}
diff --git a/src/crimson/os/seastore/lba/btree_lba_manager.h b/src/crimson/os/seastore/lba/btree_lba_manager.h
new file mode 100644 (file)
index 0000000..aef7325
--- /dev/null
@@ -0,0 +1,532 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <iostream>
+
+#include <boost/intrusive_ptr.hpp>
+#include <boost/smart_ptr/intrusive_ref_counter.hpp>
+#include <seastar/core/future.hh>
+
+#include "include/ceph_assert.h"
+#include "include/buffer_fwd.h"
+#include "include/interval_set.h"
+#include "common/interval_map.h"
+#include "crimson/osd/exceptions.h"
+
+#include "crimson/os/seastore/btree/fixed_kv_btree.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/lba_manager.h"
+#include "crimson/os/seastore/cache.h"
+
+#include "crimson/os/seastore/lba/lba_btree_node.h"
+#include "crimson/os/seastore/btree/btree_types.h"
+
+namespace crimson::os::seastore {
+class LogicalCachedExtent;
+}
+
+namespace crimson::os::seastore::lba {
+
+using LBABtree = FixedKVBtree<
+  laddr_t, lba_map_val_t, LBAInternalNode,
+  LBALeafNode, LBACursor, LBA_BLOCK_SIZE>;
+
+/**
+ * BtreeLBAManager
+ *
+ * Uses a wandering btree to track two things:
+ * 1) lba state including laddr_t -> paddr_t mapping
+ * 2) reverse paddr_t -> laddr_t mapping for gc (TODO)
+ *
+ * Generally, any transaction will involve
+ * 1) deltas against lba tree nodes
+ * 2) new lba tree nodes
+ *    - Note, there must necessarily be a delta linking
+ *      these new nodes into the tree -- might be a
+ *      bootstrap_state_t delta if new root
+ *
+ * get_mappings, alloc_extent_*, etc populate a Transaction
+ * which then gets submitted
+ */
+class BtreeLBAManager : public LBAManager {
+public:
+  BtreeLBAManager(Cache &cache)
+    : cache(cache)
+  {
+    register_metrics();
+  }
+
+  mkfs_ret mkfs(
+    Transaction &t) final;
+
+  get_mappings_ret get_mappings(
+    Transaction &t,
+    laddr_t offset, extent_len_t length) final;
+
+  get_mapping_ret get_mapping(
+    Transaction &t,
+    laddr_t offset) final;
+
+  alloc_extent_ret reserve_region(
+    Transaction &t,
+    laddr_t hint,
+    extent_len_t len) final
+  {
+    std::vector<alloc_mapping_info_t> alloc_infos = {
+      alloc_mapping_info_t::create_zero(len)};
+    return seastar::do_with(
+      std::move(alloc_infos),
+      [&t, hint, this](auto &alloc_infos) {
+      return alloc_contiguous_mappings(
+       t, hint, alloc_infos, alloc_policy_t::linear_search
+      ).si_then([](auto cursors) {
+       assert(cursors.size() == 1);
+       return LBAMapping::create_direct(std::move(cursors.front()));
+      });
+    });
+  }
+
+  alloc_extent_ret clone_mapping(
+    Transaction &t,
+    laddr_t laddr,
+    extent_len_t len,
+    laddr_t intermediate_key,
+    laddr_t intermediate_base) final
+  {
+    std::vector<alloc_mapping_info_t> alloc_infos = {
+      alloc_mapping_info_t::create_indirect(
+       laddr, len, intermediate_key)};
+    return seastar::do_with(
+      std::move(alloc_infos),
+      [this, &t, laddr, intermediate_base](auto &infos) {
+       return alloc_sparse_mappings(
+         t, laddr, infos, alloc_policy_t::deterministic
+       ).si_then([this, &t, intermediate_base](auto cursors) {
+         ceph_assert(cursors.size() == 1);
+         ceph_assert(cursors.front()->is_indirect());
+         return update_refcount(t, intermediate_base, 1, false
+         ).si_then([cursors=std::move(cursors)](auto p) mutable {
+           assert(p.is_alive_mapping());
+           auto mapping = LBAMapping::create_indirect(
+             p.take_cursor(), std::move(cursors.front()));
+           ceph_assert(mapping.is_stable());
+           return alloc_extent_iertr::make_ready_future<
+             LBAMapping>(std::move(mapping));
+         });
+       });
+      }).handle_error_interruptible(
+       crimson::ct_error::input_output_error::pass_further{},
+       crimson::ct_error::assert_all{"unexpect enoent"});
+  }
+
+  alloc_extent_ret alloc_extent(
+    Transaction &t,
+    laddr_t hint,
+    LogicalChildNode &ext,
+    extent_ref_count_t refcount) final
+  {
+    // The real checksum will be updated upon transaction commit
+    assert(ext.get_last_committed_crc() == 0);
+    assert(!ext.has_laddr());
+    std::vector<alloc_mapping_info_t> alloc_infos = {
+      alloc_mapping_info_t::create_direct(
+       L_ADDR_NULL,
+       ext.get_length(),
+       ext.get_paddr(),
+       refcount,
+       ext.get_last_committed_crc(),
+       ext)};
+    return seastar::do_with(
+      std::move(alloc_infos),
+      [this, &t, hint](auto &alloc_infos) {
+      return alloc_contiguous_mappings(
+       t, hint, alloc_infos, alloc_policy_t::linear_search
+      ).si_then([](auto cursors) {
+       assert(cursors.size() == 1);
+       return LBAMapping::create_direct(std::move(cursors.front()));
+      });
+    });
+  }
+
+  alloc_extents_ret alloc_extents(
+    Transaction &t,
+    laddr_t hint,
+    std::vector<LogicalChildNodeRef> extents,
+    extent_ref_count_t refcount) final
+  {
+    std::vector<alloc_mapping_info_t> alloc_infos;
+    assert(!extents.empty());
+    auto has_laddr = extents.front()->has_laddr();
+    for (auto &extent : extents) {
+      assert(extent);
+      assert(extent->has_laddr() == has_laddr);
+      alloc_infos.emplace_back(
+       alloc_mapping_info_t::create_direct(
+         extent->has_laddr() ? extent->get_laddr() : L_ADDR_NULL,
+         extent->get_length(),
+         extent->get_paddr(),
+         refcount,
+         extent->get_last_committed_crc(),
+         *extent));
+    }
+    return seastar::do_with(
+      std::move(alloc_infos),
+      [this, &t, hint, has_laddr](auto &alloc_infos)
+    {
+      if (has_laddr) {
+       return alloc_sparse_mappings(
+         t, hint, alloc_infos, alloc_policy_t::deterministic)
+#ifndef NDEBUG
+       .si_then([&alloc_infos](std::list<LBACursorRef> cursors) {
+         assert(alloc_infos.size() == cursors.size());
+         auto info_p = alloc_infos.begin();
+         auto cursor_p = cursors.begin();
+         for (; info_p != alloc_infos.end(); info_p++, cursor_p++) {
+           auto &cursor = *cursor_p;
+           assert(cursor->get_laddr() == info_p->key);
+         }
+         return alloc_extent_iertr::make_ready_future<
+           std::list<LBACursorRef>>(std::move(cursors));
+       })
+#endif
+         ;
+      } else {
+       return alloc_contiguous_mappings(
+         t, hint, alloc_infos, alloc_policy_t::linear_search);
+      }
+    }).si_then([](std::list<LBACursorRef> cursors) {
+      std::vector<LBAMapping> ret;
+      for (auto &cursor : cursors) {
+       ret.emplace_back(LBAMapping::create_direct(std::move(cursor)));
+      }
+      return ret;
+    });
+  }
+
+  ref_ret remove_mapping(
+    Transaction &t,
+    laddr_t addr) final {
+    return update_refcount(t, addr, -1, true
+    ).si_then([](auto res) {
+      return ref_update_result_t(res);
+    });
+  }
+
+  remap_ret remap_mappings(
+    Transaction &t,
+    LBAMapping orig_mapping,
+    std::vector<remap_entry_t> remaps,
+    std::vector<LogicalChildNodeRef> extents) final;
+
+  /**
+   * init_cached_extent
+   *
+   * Checks whether e is live (reachable from lba tree) and drops or initializes
+   * accordingly.
+   *
+   * Returns if e is live.
+   */
+  init_cached_extent_ret init_cached_extent(
+    Transaction &t,
+    CachedExtentRef e) final;
+
+#ifdef UNIT_TESTS_BUILT
+  check_child_trackers_ret check_child_trackers(Transaction &t) final;
+#endif
+
+  scan_mappings_ret scan_mappings(
+    Transaction &t,
+    laddr_t begin,
+    laddr_t end,
+    scan_mappings_func_t &&f) final;
+
+  rewrite_extent_ret rewrite_extent(
+    Transaction &t,
+    CachedExtentRef extent) final;
+
+  update_mapping_ret update_mapping(
+    Transaction& t,
+    laddr_t laddr,
+    extent_len_t prev_len,
+    paddr_t prev_addr,
+    LogicalChildNode&) final;
+
+  update_mappings_ret update_mappings(
+    Transaction& t,
+    const std::list<LogicalChildNodeRef>& extents);
+
+  get_physical_extent_if_live_ret get_physical_extent_if_live(
+    Transaction &t,
+    extent_types_t type,
+    paddr_t addr,
+    laddr_t laddr,
+    extent_len_t len) final;
+
+  refresh_lba_mapping_ret refresh_lba_mapping(
+    Transaction &t,
+    LBAMapping mapping) final;
+
+private:
+  Cache &cache;
+
+  struct {
+    uint64_t num_alloc_extents = 0;
+    uint64_t num_alloc_extents_iter_nexts = 0;
+    uint64_t num_refresh_parent_total = 0;
+    uint64_t num_refresh_invalid_parent = 0;
+    uint64_t num_refresh_unviewable_parent = 0;
+    uint64_t num_refresh_modified_viewable_parent = 0;
+  } stats;
+
+  struct alloc_mapping_info_t {
+    laddr_t key = L_ADDR_NULL; // once assigned, the allocation to
+                              // key must be exact and successful
+    lba_map_val_t value;
+    LogicalChildNode* extent = nullptr;
+
+    static alloc_mapping_info_t create_zero(extent_len_t len) {
+      return {
+       L_ADDR_NULL,
+       {
+         len,
+         pladdr_t(P_ADDR_ZERO),
+         EXTENT_DEFAULT_REF_COUNT,
+         0
+       },
+       static_cast<LogicalChildNode*>(get_reserved_ptr<LBALeafNode, laddr_t>())};
+    }
+    static alloc_mapping_info_t create_indirect(
+      laddr_t laddr,
+      extent_len_t len,
+      laddr_t intermediate_key) {
+      return {
+       laddr,
+       {
+         len,
+         pladdr_t(intermediate_key),
+         EXTENT_DEFAULT_REF_COUNT,
+         0     // crc will only be used and checked with LBA direct mappings
+               // also see pin_to_extent(_by_type)
+       },
+       static_cast<LogicalChildNode*>(get_reserved_ptr<LBALeafNode, laddr_t>())};
+    }
+    static alloc_mapping_info_t create_direct(
+      laddr_t laddr,
+      extent_len_t len,
+      paddr_t paddr,
+      extent_ref_count_t refcount,
+      checksum_t checksum,
+      LogicalChildNode& extent) {
+      return {laddr, {len, pladdr_t(paddr), refcount, checksum}, &extent};
+    }
+  };
+
+  op_context_t get_context(Transaction &t) {
+    return op_context_t{cache, t};
+  }
+
+  seastar::metrics::metric_group metrics;
+  void register_metrics();
+
+  struct update_mapping_ret_bare_t {
+    update_mapping_ret_bare_t()
+       : update_mapping_ret_bare_t(LBACursorRef(nullptr)) {}
+
+    update_mapping_ret_bare_t(LBACursorRef cursor)
+       : ret(std::move(cursor)) {}
+
+    update_mapping_ret_bare_t(laddr_t laddr, lba_map_val_t value)
+       : ret(removed_mapping_t{laddr, value}) {}
+
+    struct removed_mapping_t {
+      laddr_t laddr;
+      lba_map_val_t map_value;
+    };
+    std::variant<removed_mapping_t, LBACursorRef> ret;
+
+    bool is_removed_mapping() const {
+      return ret.index() == 0;
+    }
+
+    bool is_alive_mapping() const {
+      if (ret.index() == 1) {
+       assert(std::get<1>(ret));
+       return true;
+      } else {
+       return false;
+      }
+    }
+
+    const removed_mapping_t& get_removed_mapping() const {
+      assert(is_removed_mapping());
+      return std::get<0>(ret);
+    }
+
+    const LBACursor& get_cursor() const {
+      assert(is_alive_mapping());
+      return *std::get<1>(ret);
+    }
+
+    LBACursorRef take_cursor() {
+      assert(is_alive_mapping());
+      return std::move(std::get<1>(ret));
+    }
+
+    explicit operator ref_update_result_t() const {
+      if (is_removed_mapping()) {
+       auto v = get_removed_mapping();
+       auto &val = v.map_value;
+       ceph_assert(val.pladdr.is_paddr());
+       return {v.laddr, val.refcount, val.pladdr, val.len};
+      } else {
+       assert(is_alive_mapping());
+       auto &c = get_cursor();
+       assert(c.val);
+       ceph_assert(!c.is_indirect());
+       return {c.get_laddr(), c.val->refcount, c.val->pladdr, c.val->len};
+      }
+    }
+  };
+
+  using update_refcount_iertr = ref_iertr;
+  using update_refcount_ret = update_refcount_iertr::future<
+    update_mapping_ret_bare_t>;
+  update_refcount_ret update_refcount(
+    Transaction &t,
+    laddr_t addr,
+    int delta,
+    bool cascade_remove);
+
+  /**
+   * _update_mapping
+   *
+   * Updates mapping, removes if f returns nullopt
+   */
+  using _update_mapping_iertr = ref_iertr;
+  using _update_mapping_ret = ref_iertr::future<
+    update_mapping_ret_bare_t>;
+  using update_func_t = std::function<
+    lba_map_val_t(const lba_map_val_t &v)
+    >;
+  _update_mapping_ret _update_mapping(
+    Transaction &t,
+    laddr_t addr,
+    update_func_t &&f,
+    LogicalChildNode*);
+
+  struct insert_position_t {
+    laddr_t laddr;
+    LBABtree::iterator insert_iter;
+  };
+  enum class alloc_policy_t {
+    deterministic, // no conflict
+    linear_search,
+  };
+  using search_insert_position_iertr = base_iertr;
+  using search_insert_position_ret =
+      search_insert_position_iertr::future<insert_position_t>;
+  search_insert_position_ret search_insert_position(
+    op_context_t c,
+    LBABtree &btree,
+    laddr_t hint,
+    extent_len_t length,
+    alloc_policy_t policy);
+
+  using alloc_mappings_iertr = base_iertr;
+  using alloc_mappings_ret =
+      alloc_mappings_iertr::future<std::list<LBACursorRef>>;
+  /**
+   * alloc_contiguous_mappings
+   *
+   * Insert a range of contiguous mappings into the LBA btree.
+   *
+   * hint is a non-null laddr hint for allocation. All alloc_infos' key
+   * should be L_ADDR_NULL, the final laddr is relative to the allocated
+   * laddr based on preceding mappings' total length.
+   */
+  alloc_mappings_ret alloc_contiguous_mappings(
+    Transaction &t,
+    laddr_t hint,
+    std::vector<alloc_mapping_info_t> &alloc_infos,
+    alloc_policy_t policy);
+
+  /**
+   * alloc_sparse_mappings
+   *
+   * Insert a range of sparse mappings into the LBA btree.
+   *
+   * hint is a non-null laddr hint for allocation. All of alloc_infos' key
+   * are non-null laddr hints and must be incremental, each mapping's final
+   * laddr maintains same offset to allocated laddr as original to hint.
+   */
+  alloc_mappings_ret alloc_sparse_mappings(
+    Transaction &t,
+    laddr_t hint,
+    std::vector<alloc_mapping_info_t> &alloc_infos,
+    alloc_policy_t policy);
+
+  /**
+   * insert_mappings
+   *
+   * Insert all lba mappings built from alloc_infos into LBA btree before
+   * iter and return the inserted LBACursors.
+   *
+   * NOTE: There is no guarantee that the returned cursors are all valid
+   * since the successive insertion is possible to invalidate the parent
+   * extent of predecessively returned LBACursor.
+   */
+  alloc_mappings_ret insert_mappings(
+    op_context_t c,
+    LBABtree &btree,
+    LBABtree::iterator iter,
+    std::vector<alloc_mapping_info_t> &alloc_infos);
+
+  ref_ret _incref_extent(
+    Transaction &t,
+    laddr_t addr,
+    int delta) {
+    ceph_assert(delta > 0);
+    return update_refcount(t, addr, delta, false
+    ).si_then([](auto res) {
+      return ref_update_result_t(res);
+    });
+  }
+
+  using _get_cursor_ret = get_mapping_iertr::future<LBACursorRef>;
+  _get_cursor_ret get_cursor(
+    op_context_t c,
+    LBABtree& btree,
+    laddr_t offset);
+
+  using _get_cursors_ret = get_mappings_iertr::future<std::list<LBACursorRef>>;
+  _get_cursors_ret get_cursors(
+    op_context_t c,
+    LBABtree& btree,
+    laddr_t offset,
+    extent_len_t length);
+
+  using resolve_indirect_cursor_ret = get_mappings_iertr::future<LBACursorRef>;
+  resolve_indirect_cursor_ret resolve_indirect_cursor(
+    op_context_t c,
+    LBABtree& btree,
+    const LBACursor& indirect_cursor);
+
+  using _decref_intermediate_ret = ref_iertr::future<
+    update_mapping_ret_bare_t>;
+  _decref_intermediate_ret _decref_intermediate(
+    Transaction &t,
+    laddr_t addr,
+    extent_len_t len);
+
+  using refresh_lba_cursor_iertr = base_iertr;
+  using refresh_lba_cursor_ret = refresh_lba_cursor_iertr::future<>;
+  refresh_lba_cursor_ret refresh_lba_cursor(
+    op_context_t c,
+    LBABtree &btree,
+    LBACursor &cursor);
+};
+using BtreeLBAManagerRef = std::unique_ptr<BtreeLBAManager>;
+
+}
diff --git a/src/crimson/os/seastore/lba/lba_btree_node.cc b/src/crimson/os/seastore/lba/lba_btree_node.cc
new file mode 100644 (file)
index 0000000..5ee0db2
--- /dev/null
@@ -0,0 +1,87 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <sys/mman.h>
+#include <string.h>
+
+#include <memory>
+#include <string.h>
+
+#include "include/buffer.h"
+#include "include/byteorder.h"
+
+#include "crimson/os/seastore/lba/btree_lba_manager.h"
+#include "crimson/os/seastore/logging.h"
+#include "crimson/os/seastore/logical_child_node.h"
+
+SET_SUBSYS(seastore_lba);
+
+namespace crimson::os::seastore::lba {
+
+std::ostream &LBALeafNode::print_detail(std::ostream &out) const
+{
+  out << ", size=" << this->get_size()
+      << ", meta=" << this->get_meta()
+      << ", modifications=" << this->modifications
+      << ", my_tracker=" << (void*)this->my_tracker;
+  if (this->my_tracker) {
+    out << ", my_tracker->parent=" << (void*)this->my_tracker->get_parent().get();
+  }
+  return out << ", root_block=" << (void*)this->parent_of_root.get();
+}
+
+void LBALeafNode::resolve_relative_addrs(paddr_t base)
+{
+  LOG_PREFIX(LBALeafNode::resolve_relative_addrs);
+  for (auto i: *this) {
+    auto val = i->get_val();
+    if (val.pladdr.is_paddr() &&
+       val.pladdr.get_paddr().is_relative()) {
+      val.pladdr = base.add_relative(val.pladdr.get_paddr());
+      TRACE("{} -> {}", i->get_val().pladdr, val.pladdr);
+      i->set_val(val);
+    }
+  }
+}
+
+void LBALeafNode::update(
+  internal_const_iterator_t iter,
+  lba_map_val_t val)
+{
+  LOG_PREFIX(LBALeafNode::update);
+  SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}",
+    this->pending_for_transaction,
+    iter.get_offset());
+  this->on_modify();
+  if (val.pladdr.is_paddr()) {
+    val.pladdr = maybe_generate_relative(val.pladdr.get_paddr());
+  }
+  return this->journal_update(
+    iter,
+    val,
+    this->maybe_get_delta_buffer());
+}
+
+LBALeafNode::internal_const_iterator_t LBALeafNode::insert(
+  internal_const_iterator_t iter,
+  laddr_t addr,
+  lba_map_val_t val)
+{
+  LOG_PREFIX(LBALeafNode::insert);
+  SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}",
+    this->pending_for_transaction,
+    iter.get_offset(),
+    addr);
+  this->on_modify();
+  if (val.pladdr.is_paddr()) {
+    val.pladdr = maybe_generate_relative(val.pladdr.get_paddr());
+  }
+  this->journal_insert(
+    iter,
+    addr,
+    val,
+    this->maybe_get_delta_buffer());
+  return iter;
+}
+
+}
diff --git a/src/crimson/os/seastore/lba/lba_btree_node.h b/src/crimson/os/seastore/lba/lba_btree_node.h
new file mode 100644 (file)
index 0000000..05f26b6
--- /dev/null
@@ -0,0 +1,291 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <sys/mman.h>
+#include <memory>
+#include <string.h>
+
+
+#include "include/buffer.h"
+
+#include "crimson/common/fixed_kv_node_layout.h"
+#include "crimson/common/errorator.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/cache.h"
+#include "crimson/os/seastore/cached_extent.h"
+
+#include "crimson/os/seastore/btree/btree_types.h"
+#include "crimson/os/seastore/btree/fixed_kv_btree.h"
+#include "crimson/os/seastore/btree/fixed_kv_node.h"
+
+namespace crimson::os::seastore {
+class LogicalChildNode;
+}
+
+namespace crimson::os::seastore::lba {
+
+using base_iertr = Cache::base_iertr;
+using LBANode = FixedKVNode<laddr_t>;
+
+class BtreeLBAMapping;
+
+constexpr size_t LBA_BLOCK_SIZE = 4096;
+
+using lba_node_meta_t = fixed_kv_node_meta_t<laddr_t>;
+
+using lba_node_meta_le_t = fixed_kv_node_meta_le_t<laddr_le_t>;
+
+/**
+ * LBAInternalNode
+ *
+ * Abstracts operations on and layout of internal nodes for the
+ * LBA Tree.
+ *
+ * Layout (4KiB):
+ *   checksum   : ceph_le32[1]               4B
+ *   size       : ceph_le32[1]               4B
+ *   meta       : lba_node_meta_le_t[1]      20B
+ *   keys       : laddr_le_t[CAPACITY]       (254*8)B
+ *   values     : paddr_le_t[CAPACITY]       (254*8)B
+ *                                           = 4092B
+
+ * TODO: make the above capacity calculation part of FixedKVNodeLayout
+ * TODO: the above alignment probably isn't portable without further work
+ */
+constexpr size_t INTERNAL_NODE_CAPACITY = 254;
+struct LBAInternalNode
+  : FixedKVInternalNode<
+      INTERNAL_NODE_CAPACITY,
+      laddr_t, laddr_le_t,
+      LBA_BLOCK_SIZE,
+      LBAInternalNode> {
+  static_assert(
+    check_capacity(LBA_BLOCK_SIZE),
+    "INTERNAL_NODE_CAPACITY doesn't fit in LBA_BLOCK_SIZE");
+  using Ref = TCachedExtentRef<LBAInternalNode>;
+  using internal_iterator_t = const_iterator;
+  using key_type = laddr_t;
+  template <typename... T>
+  LBAInternalNode(T&&... t) :
+    FixedKVInternalNode(std::forward<T>(t)...) {}
+  static constexpr uint32_t CHILD_VEC_UNIT = 0;
+
+  static constexpr extent_types_t TYPE = extent_types_t::LADDR_INTERNAL;
+
+  extent_types_t get_type() const final {
+    return TYPE;
+  }
+};
+using LBAInternalNodeRef = LBAInternalNode::Ref;
+
+/**
+ * LBALeafNode
+ *
+ * Abstracts operations on and layout of leaf nodes for the
+ * LBA Tree.
+ *
+ * Layout (4KiB):
+ *   checksum   : ceph_le32[1]                4B
+ *   size       : ceph_le32[1]                4B
+ *   meta       : lba_node_meta_le_t[1]       20B
+ *   keys       : laddr_le_t[CAPACITY]        (140*8)B
+ *   values     : lba_map_val_le_t[CAPACITY]  (140*21)B
+ *                                            = 4088B
+ *
+ * TODO: update FixedKVNodeLayout to handle the above calculation
+ * TODO: the above alignment probably isn't portable without further work
+ */
+constexpr size_t LEAF_NODE_CAPACITY = 140;
+
+struct LBALeafNode
+  : FixedKVLeafNode<
+      LEAF_NODE_CAPACITY,
+      laddr_t, laddr_le_t,
+      lba_map_val_t, lba_map_val_le_t,
+      LBA_BLOCK_SIZE,
+      LBAInternalNode,
+      LBALeafNode>,
+    ParentNode<LBALeafNode, laddr_t> {
+  static_assert(
+    check_capacity(LBA_BLOCK_SIZE),
+    "LEAF_NODE_CAPACITY doesn't fit in LBA_BLOCK_SIZE");
+  using Ref = TCachedExtentRef<LBALeafNode>;
+  using parent_type_t = FixedKVLeafNode<
+                         LEAF_NODE_CAPACITY,
+                         laddr_t, laddr_le_t,
+                         lba_map_val_t, lba_map_val_le_t,
+                         LBA_BLOCK_SIZE,
+                         LBAInternalNode,
+                         LBALeafNode>;
+  using internal_const_iterator_t =
+    typename parent_type_t::node_layout_t::const_iterator;
+  using internal_iterator_t =
+    typename parent_type_t::node_layout_t::iterator;
+  using key_type = laddr_t;
+  using parent_node_t = ParentNode<LBALeafNode, laddr_t>;
+  using child_t = LogicalChildNode;
+  static constexpr uint32_t CHILD_VEC_UNIT = 0;
+  LBALeafNode(ceph::bufferptr &&ptr)
+    : parent_type_t(std::move(ptr)),
+      parent_node_t(LEAF_NODE_CAPACITY) {}
+  explicit LBALeafNode(extent_len_t length)
+    : parent_type_t(length),
+      parent_node_t(LEAF_NODE_CAPACITY) {}
+  LBALeafNode(const LBALeafNode &rhs)
+    : parent_type_t(rhs),
+      parent_node_t(rhs) {}
+
+  static constexpr extent_types_t TYPE = extent_types_t::LADDR_LEAF;
+
+  void update(
+    internal_const_iterator_t iter,
+    lba_map_val_t val) final;
+
+  internal_const_iterator_t insert(
+    internal_const_iterator_t iter,
+    laddr_t addr,
+    lba_map_val_t val) final;
+
+  void remove(internal_const_iterator_t iter) final {
+    LOG_PREFIX(LBALeafNode::remove);
+    SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}",
+      this->pending_for_transaction,
+      iter.get_offset(),
+      iter.get_key());
+    assert(iter != this->end());
+    this->on_modify();
+    this->remove_child_ptr(iter.get_offset());
+    return this->journal_remove(
+      iter,
+      this->maybe_get_delta_buffer());
+  }
+
+  // See LBAInternalNode, same concept
+  void resolve_relative_addrs(paddr_t base) final;
+  void node_resolve_vals(
+    internal_iterator_t from,
+    internal_iterator_t to) const final
+  {
+    if (this->is_initial_pending()) {
+      for (auto i = from; i != to; ++i) {
+       auto val = i->get_val();
+       if (val.pladdr.is_paddr()
+           && val.pladdr.get_paddr().is_relative()) {
+         assert(val.pladdr.get_paddr().is_block_relative());
+         val.pladdr = this->get_paddr().add_relative(val.pladdr.get_paddr());
+         i->set_val(val);
+       }
+      }
+    }
+  }
+  void node_unresolve_vals(
+    internal_iterator_t from,
+    internal_iterator_t to) const final
+  {
+    if (this->is_initial_pending()) {
+      for (auto i = from; i != to; ++i) {
+       auto val = i->get_val();
+       if (val.pladdr.is_paddr()
+           && val.pladdr.get_paddr().is_relative()) {
+         assert(val.pladdr.get_paddr().is_record_relative());
+         val.pladdr = val.pladdr.get_paddr().block_relative_to(this->get_paddr());
+         i->set_val(val);
+       }
+      }
+    }
+  }
+
+  extent_types_t get_type() const final {
+    return TYPE;
+  }
+
+  void do_on_rewrite(Transaction &t, CachedExtent &extent) final {
+    this->parent_node_t::on_rewrite(t, static_cast<LBALeafNode&>(extent));
+  }
+
+  void do_on_replace_prior() final {
+    this->parent_node_t::on_replace_prior();
+  }
+
+  void do_prepare_commit() final {
+    this->parent_node_t::prepare_commit();
+  }
+
+  bool is_child_stable(
+    op_context_t c,
+    uint16_t pos,
+    laddr_t key) const {
+    return parent_node_t::_is_child_stable(c.trans, c.cache, pos, key);
+  }
+  bool is_child_data_stable(
+    op_context_t c,
+    uint16_t pos,
+    laddr_t key) const {
+    return parent_node_t::_is_child_stable(c.trans, c.cache, pos, key, true);
+  }
+
+  void on_split(
+    Transaction &t,
+    LBALeafNode &left,
+    LBALeafNode &right) final {
+    this->split_child_ptrs(t, left, right);
+  }
+  void adjust_copy_src_dest_on_split(
+    Transaction &t,
+    LBALeafNode &left,
+    LBALeafNode &right) final {
+    this->parent_node_t::adjust_copy_src_dest_on_split(t, left, right);
+  }
+
+  void on_merge(
+    Transaction &t,
+    LBALeafNode &left,
+    LBALeafNode &right) final {
+    this->merge_child_ptrs(t, left, right);
+  }
+  void adjust_copy_src_dest_on_merge(
+    Transaction &t,
+    LBALeafNode &left,
+    LBALeafNode &right) final {
+    this->parent_node_t::adjust_copy_src_dest_on_merge(t, left, right);
+  }
+
+  void on_balance(
+    Transaction &t,
+    LBALeafNode &left,
+    LBALeafNode &right,
+    uint32_t pivot_idx,
+    LBALeafNode &replacement_left,
+    LBALeafNode &replacement_right) final {
+    this->balance_child_ptrs(
+      t, left, right, pivot_idx, replacement_left, replacement_right);
+  }
+  void adjust_copy_src_dest_on_balance(
+    Transaction &t,
+    LBALeafNode &left,
+    LBALeafNode &right,
+    uint32_t pivot_idx,
+    LBALeafNode &replacement_left,
+    LBALeafNode &replacement_right) final {
+    this->parent_node_t::adjust_copy_src_dest_on_balance(
+      t, left, right, pivot_idx, replacement_left, replacement_right);
+  }
+
+  CachedExtentRef duplicate_for_write(Transaction&) final {
+    return CachedExtentRef(new LBALeafNode(*this));
+  }
+
+  std::ostream &print_detail(std::ostream &out) const final;
+};
+using LBALeafNodeRef = TCachedExtentRef<LBALeafNode>;
+
+}
+
+#if FMT_VERSION >= 90000
+template <> struct fmt::formatter<crimson::os::seastore::lba::lba_node_meta_t> : fmt::ostream_formatter {};
+template <> struct fmt::formatter<crimson::os::seastore::lba::lba_map_val_t> : fmt::ostream_formatter {};
+template <> struct fmt::formatter<crimson::os::seastore::lba::LBAInternalNode> : fmt::ostream_formatter {};
+template <> struct fmt::formatter<crimson::os::seastore::lba::LBALeafNode> : fmt::ostream_formatter {};
+#endif
index 50850a7de56d1a00011ce5d1d275c3026639d9eb..3ebfb1dddf83c7d7620aa4ce37046b12c49afd74 100644 (file)
@@ -2,12 +2,12 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "crimson/os/seastore/lba_manager.h"
-#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
+#include "crimson/os/seastore/lba/btree_lba_manager.h"
 
 namespace crimson::os::seastore {
 
-LBAManagerRef lba_manager::create_lba_manager(Cache &cache) {
-  return LBAManagerRef(new btree::BtreeLBAManager(cache));
+LBAManagerRef lba::create_lba_manager(Cache &cache) {
+  return LBAManagerRef(new lba::BtreeLBAManager(cache));
 }
 
 }
index 286496f3c71e99f7aed8e37baf9f988da2a9d5bf..d146a9932ece9465521915749cc00c943db89917 100644 (file)
@@ -241,7 +241,7 @@ public:
 using LBAManagerRef = std::unique_ptr<LBAManager>;
 
 class Cache;
-namespace lba_manager {
+namespace lba {
 LBAManagerRef create_lba_manager(Cache &cache);
 }
 
diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
deleted file mode 100644 (file)
index 3f0700f..0000000
+++ /dev/null
@@ -1,1102 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include <sys/mman.h>
-#include <string.h>
-
-#include <seastar/core/metrics.hh>
-
-#include "include/buffer.h"
-#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
-#include "crimson/os/seastore/logging.h"
-
-SET_SUBSYS(seastore_lba);
-/*
- * levels:
- * - INFO:  mkfs
- * - DEBUG: modification operations
- * - TRACE: read operations, DEBUG details
- */
-
-template <> struct fmt::formatter<
-  crimson::os::seastore::lba_manager::btree::LBABtree::iterator>
-    : public fmt::formatter<std::string_view>
-{
-  using Iter = crimson::os::seastore::lba_manager::btree::LBABtree::iterator;
-
-  template <typename FmtCtx>
-  auto format(const Iter &iter, FmtCtx &ctx) const
-      -> decltype(ctx.out()) {
-    if (iter.is_end()) {
-      return fmt::format_to(ctx.out(), "end");
-    }
-    return fmt::format_to(ctx.out(), "{}~{}", iter.get_key(), iter.get_val());
-  }
-};
-
-namespace crimson::os::seastore {
-
-template <typename T>
-Transaction::tree_stats_t& get_tree_stats(Transaction &t)
-{
-  return t.get_lba_tree_stats();
-}
-
-template Transaction::tree_stats_t&
-get_tree_stats<
-  crimson::os::seastore::lba_manager::btree::LBABtree>(
-  Transaction &t);
-
-template <typename T>
-phy_tree_root_t& get_phy_tree_root(root_t &r)
-{
-  return r.lba_root;
-}
-
-template phy_tree_root_t&
-get_phy_tree_root<
-  crimson::os::seastore::lba_manager::btree::LBABtree>(root_t &r);
-
-template <>
-const get_phy_tree_root_node_ret get_phy_tree_root_node<
-  crimson::os::seastore::lba_manager::btree::LBABtree>(
-  const RootBlockRef &root_block, op_context_t c)
-{
-  auto lba_root = root_block->lba_root_node;
-  if (lba_root) {
-    ceph_assert(lba_root->is_initial_pending()
-      == root_block->is_pending());
-    return {true,
-            c.cache.get_extent_viewable_by_trans(c.trans, lba_root)};
-  } else if (root_block->is_pending()) {
-    auto &prior = static_cast<RootBlock&>(*root_block->get_prior_instance());
-    lba_root = prior.lba_root_node;
-    if (lba_root) {
-      return {true,
-              c.cache.get_extent_viewable_by_trans(c.trans, lba_root)};
-    } else {
-      return {false,
-              Cache::get_extent_iertr::make_ready_future<CachedExtentRef>()};
-    }
-  } else {
-    return {false,
-            Cache::get_extent_iertr::make_ready_future<CachedExtentRef>()};
-  }
-}
-
-template <typename RootT>
-class TreeRootLinker<RootBlock, RootT> {
-public:
-  static void link_root(RootBlockRef &root_block, RootT* lba_root) {
-    root_block->lba_root_node = lba_root;
-    ceph_assert(lba_root != nullptr);
-    lba_root->parent_of_root = root_block;
-  }
-  static void unlink_root(RootBlockRef &root_block) {
-    root_block->lba_root_node = nullptr;
-  }
-};
-
-template class TreeRootLinker<RootBlock, lba_manager::btree::LBAInternalNode>;
-template class TreeRootLinker<RootBlock, lba_manager::btree::LBALeafNode>;
-
-}
-
-namespace crimson::os::seastore::lba_manager::btree {
-
-BtreeLBAManager::mkfs_ret
-BtreeLBAManager::mkfs(
-  Transaction &t)
-{
-  LOG_PREFIX(BtreeLBAManager::mkfs);
-  INFOT("start", t);
-  return cache.get_root(t).si_then([this, &t](auto croot) {
-    assert(croot->is_mutation_pending());
-    croot->get_root().lba_root = LBABtree::mkfs(croot, get_context(t));
-    return mkfs_iertr::now();
-  }).handle_error_interruptible(
-    mkfs_iertr::pass_further{},
-    crimson::ct_error::assert_all{
-      "Invalid error in BtreeLBAManager::mkfs"
-    }
-  );
-}
-
-BtreeLBAManager::get_mappings_ret
-BtreeLBAManager::get_mappings(
-  Transaction &t,
-  laddr_t laddr,
-  extent_len_t length)
-{
-  LOG_PREFIX(BtreeLBAManager::get_mappings);
-  TRACET("{}~0x{:x} ...", t, laddr, length);
-  auto c = get_context(t);
-  return with_btree_state<LBABtree, lba_mapping_list_t>(
-    cache, c,
-    [FNAME, this, c, laddr, length](auto& btree, auto& ret)
-  {
-    return get_cursors(c, btree, laddr, length
-    ).si_then([FNAME, this, c, laddr, length, &btree, &ret](auto cursors) {
-      return seastar::do_with(
-        std::move(cursors),
-        [FNAME, this, c, laddr, length, &btree, &ret](auto& cursors)
-      {
-        return trans_intr::do_for_each(
-          cursors,
-          [FNAME, this, c, laddr, length, &btree, &ret](auto& cursor)
-        {
-          if (!cursor->is_indirect()) {
-            ret.emplace_back(LBAMapping::create_direct(std::move(cursor)));
-            TRACET("{}~0x{:x} got {}",
-                   c.trans, laddr, length, ret.back());
-            return get_mappings_iertr::now();
-          }
-         assert(cursor->val->refcount == EXTENT_DEFAULT_REF_COUNT);
-         assert(cursor->val->checksum == 0);
-          return resolve_indirect_cursor(c, btree, *cursor
-          ).si_then([FNAME, c, &ret, &cursor, laddr, length](auto direct) {
-            ret.emplace_back(LBAMapping::create_indirect(
-               std::move(direct), std::move(cursor)));
-            TRACET("{}~0x{:x} got {}",
-                   c.trans, laddr, length, ret.back());
-            return get_mappings_iertr::now();
-          });
-        });
-      });
-    });
-  });
-}
-
-BtreeLBAManager::_get_cursors_ret
-BtreeLBAManager::get_cursors(
-  op_context_t c,
-  LBABtree& btree,
-  laddr_t laddr,
-  extent_len_t length)
-{
-  LOG_PREFIX(BtreeLBAManager::get_cursors);
-  TRACET("{}~0x{:x} ...", c.trans, laddr, length);
-  return seastar::do_with(
-    std::list<LBACursorRef>(),
-    [FNAME, c, laddr, length, &btree](auto& ret)
-  {
-    return LBABtree::iterate_repeat(
-      c,
-      btree.upper_bound_right(c, laddr),
-      [FNAME, c, laddr, length, &ret](auto& pos)
-    {
-      if (pos.is_end() || pos.get_key() >= (laddr + length)) {
-        TRACET("{}~0x{:x} done with {} results, stop at {}",
-               c.trans, laddr, length, ret.size(), pos);
-        return LBABtree::iterate_repeat_ret_inner(
-          interruptible::ready_future_marker{},
-          seastar::stop_iteration::yes);
-      }
-      TRACET("{}~0x{:x} got {}, repeat ...",
-             c.trans, laddr, length, pos);
-      ceph_assert((pos.get_key() + pos.get_val().len) > laddr);
-      ret.emplace_back(pos.get_cursor(c));
-      return LBABtree::iterate_repeat_ret_inner(
-        interruptible::ready_future_marker{},
-        seastar::stop_iteration::no);
-    }).si_then([&ret] {
-      return std::move(ret);
-    });
-  });
-}
-
-BtreeLBAManager::resolve_indirect_cursor_ret
-BtreeLBAManager::resolve_indirect_cursor(
-  op_context_t c,
-  LBABtree& btree,
-  const LBACursor &indirect_cursor)
-{
-  ceph_assert(indirect_cursor.is_indirect());
-  return get_cursors(
-    c,
-    btree,
-    indirect_cursor.get_intermediate_key(),
-    indirect_cursor.get_length()
-  ).si_then([&indirect_cursor](auto cursors) {
-    ceph_assert(cursors.size() == 1);
-    auto& direct_cursor = cursors.front();
-    auto intermediate_key = indirect_cursor.get_intermediate_key();
-    assert(!direct_cursor->is_indirect());
-    assert(direct_cursor->get_laddr() <= intermediate_key);
-    assert(direct_cursor->get_laddr() + direct_cursor->get_length()
-          >= intermediate_key + indirect_cursor.get_length());
-    return std::move(direct_cursor);
-  });
-}
-
-BtreeLBAManager::get_mapping_ret
-BtreeLBAManager::get_mapping(
-  Transaction &t,
-  laddr_t laddr)
-{
-  LOG_PREFIX(BtreeLBAManager::get_mapping);
-  TRACET("{} ...", t, laddr);
-  auto c = get_context(t);
-  return with_btree<LBABtree>(
-    cache, c,
-    [FNAME, this, c, laddr](auto& btree)
-  {
-    return get_cursor(c, btree, laddr
-    ).si_then([FNAME, this, c, laddr, &btree](LBACursorRef cursor) {
-      if (!cursor->is_indirect()) {
-        TRACET("{} got direct cursor {}",
-               c.trans, laddr, *cursor);
-       auto mapping = LBAMapping::create_direct(std::move(cursor));
-        return get_mapping_iertr::make_ready_future<
-         LBAMapping>(std::move(mapping));
-      }
-      assert(laddr == cursor->get_laddr());
-      assert(cursor->val->refcount == EXTENT_DEFAULT_REF_COUNT);
-      assert(cursor->val->checksum == 0);
-      return resolve_indirect_cursor(c, btree, *cursor
-      ).si_then([FNAME, c, laddr, indirect=std::move(cursor)]
-               (auto direct) mutable {
-       auto mapping = LBAMapping::create_indirect(
-         std::move(direct), std::move(indirect));
-        TRACET("{} got indirect mapping {}",
-               c.trans, laddr, mapping);
-        return get_mapping_iertr::make_ready_future<
-         LBAMapping>(std::move(mapping));
-      });
-    });
-  });
-}
-
-BtreeLBAManager::_get_cursor_ret
-BtreeLBAManager::get_cursor(
-  op_context_t c,
-  LBABtree& btree,
-  laddr_t laddr)
-{
-  LOG_PREFIX(BtreeLBAManager::get_cursor);
-  TRACET("{} ...", c.trans, laddr);
-  return btree.lower_bound(
-    c, laddr
-  ).si_then([FNAME, c, laddr](auto iter) -> _get_cursor_ret {
-    if (iter.is_end() || iter.get_key() != laddr) {
-      ERRORT("{} doesn't exist", c.trans, laddr);
-      return crimson::ct_error::enoent::make();
-    }
-    TRACET("{} got value {}", c.trans, laddr, iter.get_val());
-    return _get_cursor_ret(
-      interruptible::ready_future_marker{},
-      iter.get_cursor(c));
-  });
-}
-
-BtreeLBAManager::search_insert_position_ret
-BtreeLBAManager::search_insert_position(
-  op_context_t c,
-  LBABtree &btree,
-  laddr_t hint,
-  extent_len_t length,
-  alloc_policy_t policy)
-{
-  LOG_PREFIX(BtreeLBAManager::search_insert_position);
-  auto lookup_attempts = stats.num_alloc_extents_iter_nexts;
-  using OptIter = std::optional<LBABtree::iterator>;
-  return seastar::do_with(
-    hint, OptIter(std::nullopt),
-    [this, c, &btree, hint, length, lookup_attempts, policy, FNAME]
-    (laddr_t &last_end, OptIter &insert_iter)
-  {
-    return LBABtree::iterate_repeat(
-      c,
-      btree.upper_bound_right(c, hint),
-      [this, c, hint, length, lookup_attempts, policy,
-       &last_end, &insert_iter, FNAME](auto &iter)
-    {
-      ++stats.num_alloc_extents_iter_nexts;
-      if (iter.is_end() ||
-         iter.get_key() >= (last_end + length)) {
-       if (policy == alloc_policy_t::deterministic) {
-         ceph_assert(hint == last_end);
-       }
-       DEBUGT("hint: {}~0x{:x}, allocated laddr: {}, insert position: {}, "
-              "done with {} attempts",
-              c.trans, hint, length, last_end, iter,
-              stats.num_alloc_extents_iter_nexts - lookup_attempts);
-       insert_iter.emplace(iter);
-       return search_insert_position_iertr::make_ready_future<
-         seastar::stop_iteration>(seastar::stop_iteration::yes);
-      }
-      ceph_assert(policy == alloc_policy_t::linear_search);
-      last_end = (iter.get_key() + iter.get_val().len).checked_to_laddr();
-      TRACET("hint: {}~0x{:x}, current iter: {}, repeat ...",
-            c.trans, hint, length, iter);
-      return search_insert_position_iertr::make_ready_future<
-       seastar::stop_iteration>(seastar::stop_iteration::no);
-    }).si_then([&last_end, &insert_iter] {
-      ceph_assert(insert_iter);
-      return search_insert_position_iertr::make_ready_future<
-       insert_position_t>(last_end, *std::move(insert_iter));
-    });
-  });
-}
-
-BtreeLBAManager::alloc_mappings_ret
-BtreeLBAManager::alloc_contiguous_mappings(
-  Transaction &t,
-  laddr_t hint,
-  std::vector<alloc_mapping_info_t> &alloc_infos,
-  alloc_policy_t policy)
-{
-  ceph_assert(hint != L_ADDR_NULL);
-  extent_len_t total_len = 0;
-  for (auto &info : alloc_infos) {
-    assert(info.key == L_ADDR_NULL);
-    total_len += info.value.len;
-  }
-
-  auto c = get_context(t);
-  return with_btree<LBABtree>(
-    cache,
-    c,
-    [this, c, hint, &alloc_infos, total_len, policy](auto &btree)
-  {
-    return search_insert_position(c, btree, hint, total_len, policy
-    ).si_then([this, c, &alloc_infos, &btree](insert_position_t res) {
-      extent_len_t offset = 0;
-      for (auto &info : alloc_infos) {
-       info.key = (res.laddr + offset).checked_to_laddr();
-       offset += info.value.len;
-      }
-      return insert_mappings(
-       c, btree, std::move(res.insert_iter), alloc_infos);
-    });
-  });
-}
-
-BtreeLBAManager::alloc_mappings_ret
-BtreeLBAManager::alloc_sparse_mappings(
-  Transaction &t,
-  laddr_t hint,
-  std::vector<alloc_mapping_info_t> &alloc_infos,
-  alloc_policy_t policy)
-{
-  ceph_assert(hint != L_ADDR_NULL);
-#ifndef NDEBUG
-  assert(alloc_infos.front().key != L_ADDR_NULL);
-  for (size_t i = 1; i < alloc_infos.size(); i++) {
-    auto &prev = alloc_infos[i - 1];
-    auto &cur = alloc_infos[i];
-    assert(cur.key != L_ADDR_NULL);
-    assert(prev.key + prev.value.len <= cur.key);
-  }
-#endif
-  auto total_len = hint.get_byte_distance<extent_len_t>(
-    alloc_infos.back().key + alloc_infos.back().value.len);
-  auto c = get_context(t);
-  return with_btree<LBABtree>(
-    cache,
-    c,
-    [this, c, hint, &alloc_infos, total_len, policy](auto &btree)
-  {
-    return search_insert_position(c, btree, hint, total_len, policy
-    ).si_then([this, c, hint, &alloc_infos, &btree, policy](auto res) {
-      if (policy != alloc_policy_t::deterministic) {
-       for (auto &info : alloc_infos) {
-         auto offset = info.key.get_byte_distance<extent_len_t>(hint);
-         info.key = (res.laddr + offset).checked_to_laddr();
-       }
-      } // deterministic guarantees hint == res.laddr
-      return insert_mappings(
-       c, btree, std::move(res.insert_iter), alloc_infos);
-    });
-  });
-}
-
-BtreeLBAManager::alloc_mappings_ret
-BtreeLBAManager::insert_mappings(
-  op_context_t c,
-  LBABtree &btree,
-  LBABtree::iterator iter,
-  std::vector<alloc_mapping_info_t> &alloc_infos)
-{
-  return seastar::do_with(
-    std::move(iter), std::list<LBACursorRef>(),
-    [c, &btree, &alloc_infos]
-    (LBABtree::iterator &iter, std::list<LBACursorRef> &ret)
-  {
-    return trans_intr::do_for_each(
-      alloc_infos.begin(),
-      alloc_infos.end(),
-      [c, &btree, &iter, &ret](auto &info)
-    {
-      assert(info.key != L_ADDR_NULL);
-      return btree.insert(
-       c, iter, info.key, info.value
-      ).si_then([c, &iter, &ret, &info](auto p) {
-       ceph_assert(p.second);
-       iter = std::move(p.first);
-       auto &leaf_node = *iter.get_leaf_node();
-       leaf_node.insert_child_ptr(
-         iter.get_leaf_pos(),
-         info.extent,
-         leaf_node.get_size() - 1 /*the size before the insert*/);
-       if (is_valid_child_ptr(info.extent)) {
-         ceph_assert(info.value.pladdr.is_paddr());
-         assert(info.value.pladdr == iter.get_val().pladdr);
-         assert(info.value.len == iter.get_val().len);
-         assert(info.extent->is_logical());
-         if (info.extent->has_laddr()) {
-           // see TM::remap_pin()
-           assert(info.key == info.extent->get_laddr());
-           assert(info.key == iter.get_key());
-         } else {
-           // see TM::alloc_non_data_extent()
-           //     TM::alloc_data_extents()
-           info.extent->set_laddr(iter.get_key());
-         }
-       }
-       ret.push_back(iter.get_cursor(c));
-       return iter.next(c).si_then([&iter](auto p) {
-         iter = std::move(p);
-       });
-      });
-    }).si_then([&ret] {
-      return alloc_mappings_iertr::make_ready_future<
-       std::list<LBACursorRef>>(std::move(ret));
-    });
-  });
-}
-
-static bool is_lba_node(const CachedExtent &e)
-{
-  return is_lba_node(e.get_type());
-}
-
-BtreeLBAManager::base_iertr::template future<>
-_init_cached_extent(
-  op_context_t c,
-  const CachedExtentRef &e,
-  LBABtree &btree,
-  bool &ret)
-{
-  if (e->is_logical()) {
-    auto logn = e->cast<LogicalChildNode>();
-    return btree.lower_bound(
-      c,
-      logn->get_laddr()
-    ).si_then([e, c, logn, &ret](auto iter) {
-      LOG_PREFIX(BtreeLBAManager::init_cached_extent);
-      if (!iter.is_end() &&
-         iter.get_key() == logn->get_laddr() &&
-         iter.get_val().pladdr.is_paddr() &&
-         iter.get_val().pladdr.get_paddr() == logn->get_paddr()) {
-       assert(!iter.get_leaf_node()->is_pending());
-       iter.get_leaf_node()->link_child(logn.get(), iter.get_leaf_pos());
-       logn->set_laddr(iter.get_key());
-       ceph_assert(iter.get_val().len == e->get_length());
-       DEBUGT("logical extent {} live", c.trans, *logn);
-       ret = true;
-      } else {
-       DEBUGT("logical extent {} not live", c.trans, *logn);
-       ret = false;
-      }
-    });
-  } else {
-    return btree.init_cached_extent(c, e
-    ).si_then([&ret](bool is_alive) {
-      ret = is_alive;
-    });
-  }
-}
-
-BtreeLBAManager::init_cached_extent_ret
-BtreeLBAManager::init_cached_extent(
-  Transaction &t,
-  CachedExtentRef e)
-{
-  LOG_PREFIX(BtreeLBAManager::init_cached_extent);
-  TRACET("{}", t, *e);
-  return seastar::do_with(bool(), [this, e, &t](bool &ret) {
-    auto c = get_context(t);
-    return with_btree<LBABtree>(
-      cache, c,
-      [c, e, &ret](auto &btree) -> base_iertr::future<> {
-       LOG_PREFIX(BtreeLBAManager::init_cached_extent);
-       DEBUGT("extent {}", c.trans, *e);
-       return _init_cached_extent(c, e, btree, ret);
-      }
-    ).si_then([&ret] { return ret; });
-  });
-}
-
-#ifdef UNIT_TESTS_BUILT
-BtreeLBAManager::check_child_trackers_ret
-BtreeLBAManager::check_child_trackers(
-  Transaction &t) {
-  auto c = get_context(t);
-  return with_btree<LBABtree>(
-    cache, c,
-    [c](auto &btree) {
-    return btree.check_child_trackers(c);
-  });
-}
-#endif
-
-BtreeLBAManager::scan_mappings_ret
-BtreeLBAManager::scan_mappings(
-  Transaction &t,
-  laddr_t begin,
-  laddr_t end,
-  scan_mappings_func_t &&f)
-{
-  LOG_PREFIX(BtreeLBAManager::scan_mappings);
-  DEBUGT("begin: {}, end: {}", t, begin, end);
-
-  auto c = get_context(t);
-  return with_btree<LBABtree>(
-    cache,
-    c,
-    [c, f=std::move(f), begin, end](auto &btree) mutable {
-      return LBABtree::iterate_repeat(
-       c,
-       btree.upper_bound_right(c, begin),
-       [f=std::move(f), begin, end](auto &pos) {
-         if (pos.is_end() || pos.get_key() >= end) {
-           return typename LBABtree::iterate_repeat_ret_inner(
-             interruptible::ready_future_marker{},
-             seastar::stop_iteration::yes);
-         }
-         ceph_assert((pos.get_key() + pos.get_val().len) > begin);
-         if (pos.get_val().pladdr.is_paddr()) {
-           f(pos.get_key(), pos.get_val().pladdr.get_paddr(), pos.get_val().len);
-         }
-         return LBABtree::iterate_repeat_ret_inner(
-           interruptible::ready_future_marker{},
-           seastar::stop_iteration::no);
-       });
-    });
-}
-
-BtreeLBAManager::rewrite_extent_ret
-BtreeLBAManager::rewrite_extent(
-  Transaction &t,
-  CachedExtentRef extent)
-{
-  LOG_PREFIX(BtreeLBAManager::rewrite_extent);
-  if (extent->has_been_invalidated()) {
-    ERRORT("extent has been invalidated -- {}", t, *extent);
-    ceph_abort();
-  }
-  assert(!extent->is_logical());
-
-  if (is_lba_node(*extent)) {
-    DEBUGT("rewriting lba extent -- {}", t, *extent);
-    auto c = get_context(t);
-    return with_btree<LBABtree>(
-      cache,
-      c,
-      [c, extent](auto &btree) mutable {
-       return btree.rewrite_extent(c, extent);
-      });
-  } else {
-    DEBUGT("skip non lba extent -- {}", t, *extent);
-    return rewrite_extent_iertr::now();
-  }
-}
-
-BtreeLBAManager::update_mapping_ret
-BtreeLBAManager::update_mapping(
-  Transaction& t,
-  laddr_t laddr,
-  extent_len_t prev_len,
-  paddr_t prev_addr,
-  LogicalChildNode& nextent)
-{
-  LOG_PREFIX(BtreeLBAManager::update_mapping);
-  auto addr = nextent.get_paddr();
-  auto len = nextent.get_length();
-  auto checksum = nextent.get_last_committed_crc();
-  TRACET("laddr={}, paddr {}~0x{:x} => {}~0x{:x}, crc=0x{:x}",
-         t, laddr, prev_addr, prev_len, addr, len, checksum);
-  assert(laddr == nextent.get_laddr());
-  assert(!addr.is_null());
-  return _update_mapping(
-    t,
-    laddr,
-    [prev_addr, addr, prev_len, len, checksum]
-    (const lba_map_val_t &in) {
-      lba_map_val_t ret = in;
-      ceph_assert(in.pladdr.is_paddr());
-      ceph_assert(in.pladdr.get_paddr() == prev_addr);
-      ceph_assert(in.len == prev_len);
-      ret.pladdr = addr;
-      ret.len = len;
-      ret.checksum = checksum;
-      return ret;
-    },
-    &nextent
-  ).si_then([&t, laddr, prev_addr, prev_len, addr, len, checksum, FNAME](auto res) {
-      assert(res.is_alive_mapping());
-      DEBUGT("laddr={}, paddr {}~0x{:x} => {}~0x{:x}, crc=0x{:x} done -- {}",
-             t, laddr, prev_addr, prev_len, addr, len, checksum, res.get_cursor());
-      return update_mapping_iertr::make_ready_future<
-       extent_ref_count_t>(res.get_cursor().get_refcount());
-    },
-    update_mapping_iertr::pass_further{},
-    /* ENOENT in particular should be impossible */
-    crimson::ct_error::assert_all{
-      "Invalid error in BtreeLBAManager::update_mapping"
-    }
-  );
-}
-
-BtreeLBAManager::update_mappings_ret
-BtreeLBAManager::update_mappings(
-  Transaction& t,
-  const std::list<LogicalChildNodeRef>& extents)
-{
-  return trans_intr::do_for_each(extents, [this, &t](auto &extent) {
-    LOG_PREFIX(BtreeLBAManager::update_mappings);
-    auto laddr = extent->get_laddr();
-    auto prev_addr = extent->get_prior_paddr_and_reset();
-    auto len = extent->get_length();
-    auto addr = extent->get_paddr();
-    auto checksum = extent->get_last_committed_crc();
-    TRACET("laddr={}, paddr {}~0x{:x} => {}, crc=0x{:x}",
-           t, laddr, prev_addr, len, addr, checksum);
-    assert(!addr.is_null());
-    return _update_mapping(
-      t,
-      laddr,
-      [prev_addr, addr, len, checksum](
-        const lba_map_val_t &in) {
-        lba_map_val_t ret = in;
-        ceph_assert(in.pladdr.is_paddr());
-        ceph_assert(in.pladdr.get_paddr() == prev_addr);
-        ceph_assert(in.len == len);
-        ret.pladdr = addr;
-        ret.checksum = checksum;
-        return ret;
-      },
-      nullptr   // all the extents should have already been
-                // added to the fixed_kv_btree
-    ).si_then([&t, laddr, prev_addr, len, addr, checksum, FNAME](auto res) {
-        DEBUGT("laddr={}, paddr {}~0x{:x} => {}, crc=0x{:x} done -- {}",
-               t, laddr, prev_addr, len, addr, checksum, res.get_cursor());
-        return update_mapping_iertr::make_ready_future();
-      },
-      update_mapping_iertr::pass_further{},
-      /* ENOENT in particular should be impossible */
-      crimson::ct_error::assert_all{
-        "Invalid error in BtreeLBAManager::update_mappings"
-      }
-    );
-  });
-}
-
-BtreeLBAManager::get_physical_extent_if_live_ret
-BtreeLBAManager::get_physical_extent_if_live(
-  Transaction &t,
-  extent_types_t type,
-  paddr_t addr,
-  laddr_t laddr,
-  extent_len_t len)
-{
-  LOG_PREFIX(BtreeLBAManager::get_physical_extent_if_live);
-  DEBUGT("{}, laddr={}, paddr={}, length={}",
-         t, type, laddr, addr, len);
-  ceph_assert(is_lba_node(type));
-  auto c = get_context(t);
-  return with_btree_ret<LBABtree, CachedExtentRef>(
-    cache,
-    c,
-    [c, type, addr, laddr, len](auto &btree) {
-      if (type == extent_types_t::LADDR_INTERNAL) {
-       return btree.get_internal_if_live(c, addr, laddr, len);
-      } else {
-       assert(type == extent_types_t::LADDR_LEAF ||
-              type == extent_types_t::DINK_LADDR_LEAF);
-       return btree.get_leaf_if_live(c, addr, laddr, len);
-      }
-    });
-}
-
-BtreeLBAManager::refresh_lba_mapping_ret
-BtreeLBAManager::refresh_lba_mapping(Transaction &t, LBAMapping mapping)
-{
-  assert(mapping.is_linked_direct());
-  if (mapping.is_viewable()) {
-    return refresh_lba_mapping_iertr::make_ready_future<
-      LBAMapping>(std::move(mapping));
-  }
-  auto c = get_context(t);
-  return with_btree_state<LBABtree, LBAMapping>(
-    cache,
-    c,
-    std::move(mapping),
-    [c, this](LBABtree &btree, LBAMapping &mapping) mutable
-  {
-    return refresh_lba_cursor(c, btree, *mapping.direct_cursor
-    ).si_then([c, this, &btree, &mapping] {
-      if (mapping.indirect_cursor) {
-       return refresh_lba_cursor(c, btree, *mapping.indirect_cursor);
-      }
-      return refresh_lba_cursor_iertr::make_ready_future();
-#ifndef NDEBUG
-    }).si_then([&mapping] {
-      assert(mapping.is_viewable());
-#endif
-    });
-  });
-}
-
-BtreeLBAManager::refresh_lba_cursor_ret
-BtreeLBAManager::refresh_lba_cursor(
-  op_context_t c,
-  LBABtree &btree,
-  LBACursor &cursor)
-{
-  LOG_PREFIX(BtreeLBAManager::refresh_lba_cursor);
-  stats.num_refresh_parent_total++;
-
-  if (!cursor.parent->is_valid()) {
-    stats.num_refresh_invalid_parent++;
-    TRACET("cursor {} parent is invalid, re-search from scratch",
-          c.trans, cursor);
-    return btree.lower_bound(c, cursor.get_laddr()
-    ).si_then([&cursor](LBABtree::iterator iter) {
-      auto leaf = iter.get_leaf_node();
-      cursor.parent = leaf;
-      cursor.modifications = leaf->modifications;
-      cursor.pos = iter.get_leaf_pos();
-      if (!cursor.is_end()) {
-       ceph_assert(!iter.is_end());
-       ceph_assert(iter.get_key() == cursor.get_laddr());
-       cursor.val = iter.get_val();
-       assert(cursor.is_viewable());
-      }
-    });
-  }
-
-  auto [viewable, state] = cursor.parent->is_viewable_by_trans(c.trans);
-  auto leaf = cursor.parent->cast<LBALeafNode>();
-
-  TRACET("cursor: {} viewable: {} state: {}",
-        c.trans, cursor, viewable, state);
-
-  if (!viewable) {
-    stats.num_refresh_unviewable_parent++;
-    leaf = leaf->find_pending_version(c.trans, cursor.get_laddr());
-    cursor.parent = leaf;
-  }
-
-  if (!viewable ||
-      leaf->modified_since(cursor.modifications)) {
-    if (viewable) {
-      stats.num_refresh_modified_viewable_parent++;
-    }
-
-    cursor.modifications = leaf->modifications;
-    if (cursor.is_end()) {
-      cursor.pos = leaf->get_size();
-      assert(!cursor.val);
-    } else {
-      auto i = leaf->lower_bound(cursor.get_laddr());
-      cursor.pos = i.get_offset();
-      cursor.val = i.get_val();
-
-      auto iter = LBALeafNode::iterator(leaf.get(), cursor.pos);
-      ceph_assert(iter.get_key() == cursor.key);
-      ceph_assert(iter.get_val() == cursor.val);
-      assert(cursor.is_viewable());
-    }
-  }
-
-  return refresh_lba_cursor_iertr::make_ready_future();
-}
-
-void BtreeLBAManager::register_metrics()
-{
-  LOG_PREFIX(BtreeLBAManager::register_metrics);
-  DEBUG("start");
-  stats = {};
-  namespace sm = seastar::metrics;
-  metrics.add_group(
-    "LBA",
-    {
-      sm::make_counter(
-        "alloc_extents",
-        stats.num_alloc_extents,
-        sm::description("total number of lba alloc_extent operations")
-      ),
-      sm::make_counter(
-        "alloc_extents_iter_nexts",
-        stats.num_alloc_extents_iter_nexts,
-        sm::description("total number of iterator next operations during extent allocation")
-      ),
-      sm::make_counter(
-        "refresh_parent_total",
-        stats.num_refresh_parent_total,
-        sm::description("total number of refreshed cursors")
-      ),
-      sm::make_counter(
-        "refresh_invalid_parent",
-        stats.num_refresh_invalid_parent,
-        sm::description("total number of refreshed cursors with invalid parents")
-      ),
-      sm::make_counter(
-        "refresh_unviewable_parent",
-        stats.num_refresh_unviewable_parent,
-        sm::description("total number of refreshed cursors with unviewable parents")
-      ),
-      sm::make_counter(
-        "refresh_modified_viewable_parent",
-        stats.num_refresh_modified_viewable_parent,
-        sm::description("total number of refreshed cursors with viewable but modified parents")
-      ),
-    }
-  );
-}
-
-BtreeLBAManager::_decref_intermediate_ret
-BtreeLBAManager::_decref_intermediate(
-  Transaction &t,
-  laddr_t addr,
-  extent_len_t len)
-{
-  auto c = get_context(t);
-  return with_btree<LBABtree>(
-    cache,
-    c,
-    [c, addr, len](auto &btree) mutable {
-    return btree.upper_bound_right(
-      c, addr
-    ).si_then([&btree, addr, len, c](auto iter) {
-      ceph_assert(!iter.is_end());
-      laddr_t key = iter.get_key();
-      ceph_assert(key <= addr);
-      auto val = iter.get_val();
-      ceph_assert(key + val.len >= addr + len);
-      ceph_assert(val.pladdr.is_paddr());
-      ceph_assert(val.refcount >= 1);
-      val.refcount -= 1;
-
-      LOG_PREFIX(BtreeLBAManager::_decref_intermediate);
-      TRACET("decreased refcount of intermediate key {} -- {}",
-            c.trans, key, val);
-
-      if (val.refcount == 0) {
-       return btree.remove(c, iter
-       ).si_then([key, val] {
-         return ref_iertr::make_ready_future<
-           update_mapping_ret_bare_t>(key, val);
-       });
-      } else {
-       return btree.update(c, iter, val
-       ).si_then([c](auto iter) {
-         return ref_iertr::make_ready_future<
-           update_mapping_ret_bare_t>(iter.get_cursor(c));
-       });
-      }
-    });
-  });
-}
-
-BtreeLBAManager::remap_ret
-BtreeLBAManager::remap_mappings(
-  Transaction &t,
-  LBAMapping orig_mapping,
-  std::vector<remap_entry_t> remaps,
-  std::vector<LogicalChildNodeRef> extents)
-{
-  LOG_PREFIX(BtreeLBAManager::remap_mappings);
-  struct state_t {
-    LBAMapping orig_mapping;
-    std::vector<remap_entry_t> remaps;
-    std::vector<LogicalChildNodeRef> extents;
-    std::vector<alloc_mapping_info_t> alloc_infos;
-    std::vector<LBAMapping> ret;
-  };
-  return seastar::do_with(
-    state_t(std::move(orig_mapping), std::move(remaps), std::move(extents), {}, {}),
-    [this, &t, FNAME](state_t &state)
-  {
-    return update_refcount(
-      t, state.orig_mapping.get_key(), -1, false
-    ).si_then([this, &t, &state, FNAME](auto ret) {
-      // Remapping the shared direct mapping is prohibited,
-      // the refcount of indirect mapping should always be 1.
-      ceph_assert(ret.is_removed_mapping());
-
-      auto orig_laddr = state.orig_mapping.get_key();
-      if (!state.orig_mapping.is_indirect()) {
-       auto &addr = ret.get_removed_mapping().map_value.pladdr;
-       ceph_assert(addr.is_paddr() && !addr.get_paddr().is_zero());
-       return alloc_extents(
-         t,
-         (state.remaps.front().offset + orig_laddr).checked_to_laddr(),
-         std::move(state.extents),
-         EXTENT_DEFAULT_REF_COUNT
-       ).si_then([&state](auto ret) {
-         state.ret = std::move(ret);
-         return remap_iertr::make_ready_future();
-       });
-      }
-
-      extent_len_t orig_len = state.orig_mapping.get_length();
-      auto intermediate_key = state.orig_mapping.get_intermediate_key();
-      ceph_assert(intermediate_key != L_ADDR_NULL);
-      DEBUGT("remap indirect mapping {}", t, state.orig_mapping);
-      for (auto &remap : state.remaps) {
-       DEBUGT("remap 0x{:x}~0x{:x}", t, remap.offset, remap.len);
-       ceph_assert(remap.len != 0);
-       ceph_assert(remap.offset + remap.len <= orig_len);
-       auto remapped_laddr = (orig_laddr + remap.offset)
-           .checked_to_laddr();
-       auto remapped_intermediate_key = (intermediate_key + remap.offset)
-           .checked_to_laddr();
-       state.alloc_infos.emplace_back(
-         alloc_mapping_info_t::create_indirect(
-           remapped_laddr, remap.len, remapped_intermediate_key));
-      }
-
-      return alloc_sparse_mappings(
-       t, state.alloc_infos.front().key, state.alloc_infos,
-       alloc_policy_t::deterministic
-      ).si_then([&t, &state, this](std::list<LBACursorRef> cursors) {
-       return seastar::futurize_invoke([&t, &state, this] {
-         if (state.remaps.size() > 1) {
-           auto base = state.orig_mapping.get_intermediate_base();
-           return update_refcount(
-             t, base, state.remaps.size() - 1, false
-           ).si_then([](update_mapping_ret_bare_t ret) {
-             return ret.take_cursor();
-           });
-         } else {
-           return remap_iertr::make_ready_future<
-             LBACursorRef>(state.orig_mapping.direct_cursor->duplicate());
-         }
-       }).si_then([&state, cursors=std::move(cursors)](auto direct) mutable {
-         for (auto &cursor : cursors) {
-           state.ret.emplace_back(LBAMapping::create_indirect(
-             direct->duplicate(), std::move(cursor)));
-         }
-         return remap_iertr::make_ready_future();
-       });
-      });
-    }).si_then([&state] {
-      assert(state.ret.size() == state.remaps.size());
-#ifndef NDEBUG
-      auto mapping_it = state.ret.begin();
-      auto remap_it = state.remaps.begin();
-      for (;mapping_it != state.ret.end(); mapping_it++, remap_it++) {
-       auto &mapping = *mapping_it;
-       auto &remap = *remap_it;
-       assert(mapping.get_key() == state.orig_mapping.get_key() + remap.offset);
-       assert(mapping.get_length() == remap.len);
-      }
-#endif
-      return remap_iertr::make_ready_future<
-       std::vector<LBAMapping>>(std::move(state.ret));
-    });
-  });
-}
-
-BtreeLBAManager::update_refcount_ret
-BtreeLBAManager::update_refcount(
-  Transaction &t,
-  laddr_t addr,
-  int delta,
-  bool cascade_remove)
-{
-  LOG_PREFIX(BtreeLBAManager::update_refcount);
-  TRACET("laddr={}, delta={}", t, addr, delta);
-  return _update_mapping(
-    t,
-    addr,
-    [delta](const lba_map_val_t &in) {
-      lba_map_val_t out = in;
-      ceph_assert((int)out.refcount + delta >= 0);
-      out.refcount += delta;
-      return out;
-    },
-    nullptr
-  ).si_then([&t, addr, delta, FNAME, this, cascade_remove](auto res) {
-    DEBUGT("laddr={}, delta={} done -- {}",
-          t, addr, delta,
-          res.is_alive_mapping()
-            ? res.get_cursor().val
-            : res.get_removed_mapping().map_value);
-    if (res.is_removed_mapping() && cascade_remove &&
-       res.get_removed_mapping().map_value.pladdr.is_laddr()) {
-      auto &val = res.get_removed_mapping().map_value;
-      TRACET("decref intermediate {} -> {}",
-            t, addr, val.pladdr.get_laddr());
-      return _decref_intermediate(t, val.pladdr.get_laddr(), val.len
-      ).handle_error_interruptible(
-       update_mapping_iertr::pass_further{},
-       crimson::ct_error::assert_all{
-         "unexpect ENOENT"
-       }
-      );
-    }
-    return update_mapping_iertr::make_ready_future<
-      update_mapping_ret_bare_t>(std::move(res));
-  });
-}
-
-BtreeLBAManager::_update_mapping_ret
-BtreeLBAManager::_update_mapping(
-  Transaction &t,
-  laddr_t addr,
-  update_func_t &&f,
-  LogicalChildNode* nextent)
-{
-  auto c = get_context(t);
-  return with_btree<LBABtree>(
-    cache,
-    c,
-    [f=std::move(f), c, addr, nextent](auto &btree) mutable {
-      return btree.lower_bound(
-       c, addr
-      ).si_then([&btree, f=std::move(f), c, addr, nextent](auto iter)
-               -> _update_mapping_ret {
-       if (iter.is_end() || iter.get_key() != addr) {
-         LOG_PREFIX(BtreeLBAManager::_update_mapping);
-         ERRORT("laddr={} doesn't exist", c.trans, addr);
-         return crimson::ct_error::enoent::make();
-       }
-
-       auto ret = f(iter.get_val());
-       if (ret.refcount == 0) {
-         assert(nextent == nullptr);
-         return btree.remove(
-           c,
-           iter
-         ).si_then([addr, ret] {
-           return update_mapping_ret_bare_t(addr, ret);
-         });
-       } else {
-         return btree.update(
-           c,
-           iter,
-           ret
-         ).si_then([c, nextent](auto iter) {
-           if (nextent) {
-             // nextent is provided iff unlinked,
-              // also see TM::rewrite_logical_extent()
-             assert(!nextent->has_parent_tracker());
-             iter.get_leaf_node()->update_child_ptr(
-               iter.get_leaf_pos(), nextent);
-           }
-           assert(!nextent || 
-                  (nextent->has_parent_tracker() &&
-                   nextent->get_parent_node().get() == iter.get_leaf_node().get()));
-           return update_mapping_ret_bare_t(iter.get_cursor(c));
-         });
-       }
-      });
-    });
-}
-
-}
diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h
deleted file mode 100644 (file)
index 8a66980..0000000
+++ /dev/null
@@ -1,532 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#pragma once
-
-#include <iostream>
-
-#include <boost/intrusive_ptr.hpp>
-#include <boost/smart_ptr/intrusive_ref_counter.hpp>
-#include <seastar/core/future.hh>
-
-#include "include/ceph_assert.h"
-#include "include/buffer_fwd.h"
-#include "include/interval_set.h"
-#include "common/interval_map.h"
-#include "crimson/osd/exceptions.h"
-
-#include "crimson/os/seastore/btree/fixed_kv_btree.h"
-#include "crimson/os/seastore/seastore_types.h"
-#include "crimson/os/seastore/lba_manager.h"
-#include "crimson/os/seastore/cache.h"
-
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
-#include "crimson/os/seastore/btree/btree_types.h"
-
-namespace crimson::os::seastore {
-class LogicalCachedExtent;
-}
-
-namespace crimson::os::seastore::lba_manager::btree {
-
-using LBABtree = FixedKVBtree<
-  laddr_t, lba_map_val_t, LBAInternalNode,
-  LBALeafNode, LBACursor, LBA_BLOCK_SIZE>;
-
-/**
- * BtreeLBAManager
- *
- * Uses a wandering btree to track two things:
- * 1) lba state including laddr_t -> paddr_t mapping
- * 2) reverse paddr_t -> laddr_t mapping for gc (TODO)
- *
- * Generally, any transaction will involve
- * 1) deltas against lba tree nodes
- * 2) new lba tree nodes
- *    - Note, there must necessarily be a delta linking
- *      these new nodes into the tree -- might be a
- *      bootstrap_state_t delta if new root
- *
- * get_mappings, alloc_extent_*, etc populate a Transaction
- * which then gets submitted
- */
-class BtreeLBAManager : public LBAManager {
-public:
-  BtreeLBAManager(Cache &cache)
-    : cache(cache)
-  {
-    register_metrics();
-  }
-
-  mkfs_ret mkfs(
-    Transaction &t) final;
-
-  get_mappings_ret get_mappings(
-    Transaction &t,
-    laddr_t offset, extent_len_t length) final;
-
-  get_mapping_ret get_mapping(
-    Transaction &t,
-    laddr_t offset) final;
-
-  alloc_extent_ret reserve_region(
-    Transaction &t,
-    laddr_t hint,
-    extent_len_t len) final
-  {
-    std::vector<alloc_mapping_info_t> alloc_infos = {
-      alloc_mapping_info_t::create_zero(len)};
-    return seastar::do_with(
-      std::move(alloc_infos),
-      [&t, hint, this](auto &alloc_infos) {
-      return alloc_contiguous_mappings(
-       t, hint, alloc_infos, alloc_policy_t::linear_search
-      ).si_then([](auto cursors) {
-       assert(cursors.size() == 1);
-       return LBAMapping::create_direct(std::move(cursors.front()));
-      });
-    });
-  }
-
-  alloc_extent_ret clone_mapping(
-    Transaction &t,
-    laddr_t laddr,
-    extent_len_t len,
-    laddr_t intermediate_key,
-    laddr_t intermediate_base) final
-  {
-    std::vector<alloc_mapping_info_t> alloc_infos = {
-      alloc_mapping_info_t::create_indirect(
-       laddr, len, intermediate_key)};
-    return seastar::do_with(
-      std::move(alloc_infos),
-      [this, &t, laddr, intermediate_base](auto &infos) {
-       return alloc_sparse_mappings(
-         t, laddr, infos, alloc_policy_t::deterministic
-       ).si_then([this, &t, intermediate_base](auto cursors) {
-         ceph_assert(cursors.size() == 1);
-         ceph_assert(cursors.front()->is_indirect());
-         return update_refcount(t, intermediate_base, 1, false
-         ).si_then([cursors=std::move(cursors)](auto p) mutable {
-           assert(p.is_alive_mapping());
-           auto mapping = LBAMapping::create_indirect(
-             p.take_cursor(), std::move(cursors.front()));
-           ceph_assert(mapping.is_stable());
-           return alloc_extent_iertr::make_ready_future<
-             LBAMapping>(std::move(mapping));
-         });
-       });
-      }).handle_error_interruptible(
-       crimson::ct_error::input_output_error::pass_further{},
-       crimson::ct_error::assert_all{"unexpect enoent"});
-  }
-
-  alloc_extent_ret alloc_extent(
-    Transaction &t,
-    laddr_t hint,
-    LogicalChildNode &ext,
-    extent_ref_count_t refcount) final
-  {
-    // The real checksum will be updated upon transaction commit
-    assert(ext.get_last_committed_crc() == 0);
-    assert(!ext.has_laddr());
-    std::vector<alloc_mapping_info_t> alloc_infos = {
-      alloc_mapping_info_t::create_direct(
-       L_ADDR_NULL,
-       ext.get_length(),
-       ext.get_paddr(),
-       refcount,
-       ext.get_last_committed_crc(),
-       ext)};
-    return seastar::do_with(
-      std::move(alloc_infos),
-      [this, &t, hint](auto &alloc_infos) {
-      return alloc_contiguous_mappings(
-       t, hint, alloc_infos, alloc_policy_t::linear_search
-      ).si_then([](auto cursors) {
-       assert(cursors.size() == 1);
-       return LBAMapping::create_direct(std::move(cursors.front()));
-      });
-    });
-  }
-
-  alloc_extents_ret alloc_extents(
-    Transaction &t,
-    laddr_t hint,
-    std::vector<LogicalChildNodeRef> extents,
-    extent_ref_count_t refcount) final
-  {
-    std::vector<alloc_mapping_info_t> alloc_infos;
-    assert(!extents.empty());
-    auto has_laddr = extents.front()->has_laddr();
-    for (auto &extent : extents) {
-      assert(extent);
-      assert(extent->has_laddr() == has_laddr);
-      alloc_infos.emplace_back(
-       alloc_mapping_info_t::create_direct(
-         extent->has_laddr() ? extent->get_laddr() : L_ADDR_NULL,
-         extent->get_length(),
-         extent->get_paddr(),
-         refcount,
-         extent->get_last_committed_crc(),
-         *extent));
-    }
-    return seastar::do_with(
-      std::move(alloc_infos),
-      [this, &t, hint, has_laddr](auto &alloc_infos)
-    {
-      if (has_laddr) {
-       return alloc_sparse_mappings(
-         t, hint, alloc_infos, alloc_policy_t::deterministic)
-#ifndef NDEBUG
-       .si_then([&alloc_infos](std::list<LBACursorRef> cursors) {
-         assert(alloc_infos.size() == cursors.size());
-         auto info_p = alloc_infos.begin();
-         auto cursor_p = cursors.begin();
-         for (; info_p != alloc_infos.end(); info_p++, cursor_p++) {
-           auto &cursor = *cursor_p;
-           assert(cursor->get_laddr() == info_p->key);
-         }
-         return alloc_extent_iertr::make_ready_future<
-           std::list<LBACursorRef>>(std::move(cursors));
-       })
-#endif
-         ;
-      } else {
-       return alloc_contiguous_mappings(
-         t, hint, alloc_infos, alloc_policy_t::linear_search);
-      }
-    }).si_then([](std::list<LBACursorRef> cursors) {
-      std::vector<LBAMapping> ret;
-      for (auto &cursor : cursors) {
-       ret.emplace_back(LBAMapping::create_direct(std::move(cursor)));
-      }
-      return ret;
-    });
-  }
-
-  ref_ret remove_mapping(
-    Transaction &t,
-    laddr_t addr) final {
-    return update_refcount(t, addr, -1, true
-    ).si_then([](auto res) {
-      return ref_update_result_t(res);
-    });
-  }
-
-  remap_ret remap_mappings(
-    Transaction &t,
-    LBAMapping orig_mapping,
-    std::vector<remap_entry_t> remaps,
-    std::vector<LogicalChildNodeRef> extents) final;
-
-  /**
-   * init_cached_extent
-   *
-   * Checks whether e is live (reachable from lba tree) and drops or initializes
-   * accordingly.
-   *
-   * Returns if e is live.
-   */
-  init_cached_extent_ret init_cached_extent(
-    Transaction &t,
-    CachedExtentRef e) final;
-
-#ifdef UNIT_TESTS_BUILT
-  check_child_trackers_ret check_child_trackers(Transaction &t) final;
-#endif
-
-  scan_mappings_ret scan_mappings(
-    Transaction &t,
-    laddr_t begin,
-    laddr_t end,
-    scan_mappings_func_t &&f) final;
-
-  rewrite_extent_ret rewrite_extent(
-    Transaction &t,
-    CachedExtentRef extent) final;
-
-  update_mapping_ret update_mapping(
-    Transaction& t,
-    laddr_t laddr,
-    extent_len_t prev_len,
-    paddr_t prev_addr,
-    LogicalChildNode&) final;
-
-  update_mappings_ret update_mappings(
-    Transaction& t,
-    const std::list<LogicalChildNodeRef>& extents);
-
-  get_physical_extent_if_live_ret get_physical_extent_if_live(
-    Transaction &t,
-    extent_types_t type,
-    paddr_t addr,
-    laddr_t laddr,
-    extent_len_t len) final;
-
-  refresh_lba_mapping_ret refresh_lba_mapping(
-    Transaction &t,
-    LBAMapping mapping) final;
-
-private:
-  Cache &cache;
-
-  struct {
-    uint64_t num_alloc_extents = 0;
-    uint64_t num_alloc_extents_iter_nexts = 0;
-    uint64_t num_refresh_parent_total = 0;
-    uint64_t num_refresh_invalid_parent = 0;
-    uint64_t num_refresh_unviewable_parent = 0;
-    uint64_t num_refresh_modified_viewable_parent = 0;
-  } stats;
-
-  struct alloc_mapping_info_t {
-    laddr_t key = L_ADDR_NULL; // once assigned, the allocation to
-                              // key must be exact and successful
-    lba_map_val_t value;
-    LogicalChildNode* extent = nullptr;
-
-    static alloc_mapping_info_t create_zero(extent_len_t len) {
-      return {
-       L_ADDR_NULL,
-       {
-         len,
-         pladdr_t(P_ADDR_ZERO),
-         EXTENT_DEFAULT_REF_COUNT,
-         0
-       },
-       static_cast<LogicalChildNode*>(get_reserved_ptr<LBALeafNode, laddr_t>())};
-    }
-    static alloc_mapping_info_t create_indirect(
-      laddr_t laddr,
-      extent_len_t len,
-      laddr_t intermediate_key) {
-      return {
-       laddr,
-       {
-         len,
-         pladdr_t(intermediate_key),
-         EXTENT_DEFAULT_REF_COUNT,
-         0     // crc will only be used and checked with LBA direct mappings
-               // also see pin_to_extent(_by_type)
-       },
-       static_cast<LogicalChildNode*>(get_reserved_ptr<LBALeafNode, laddr_t>())};
-    }
-    static alloc_mapping_info_t create_direct(
-      laddr_t laddr,
-      extent_len_t len,
-      paddr_t paddr,
-      extent_ref_count_t refcount,
-      checksum_t checksum,
-      LogicalChildNode& extent) {
-      return {laddr, {len, pladdr_t(paddr), refcount, checksum}, &extent};
-    }
-  };
-
-  op_context_t get_context(Transaction &t) {
-    return op_context_t{cache, t};
-  }
-
-  seastar::metrics::metric_group metrics;
-  void register_metrics();
-
-  struct update_mapping_ret_bare_t {
-    update_mapping_ret_bare_t()
-       : update_mapping_ret_bare_t(LBACursorRef(nullptr)) {}
-
-    update_mapping_ret_bare_t(LBACursorRef cursor)
-       : ret(std::move(cursor)) {}
-
-    update_mapping_ret_bare_t(laddr_t laddr, lba_map_val_t value)
-       : ret(removed_mapping_t{laddr, value}) {}
-
-    struct removed_mapping_t {
-      laddr_t laddr;
-      lba_map_val_t map_value;
-    };
-    std::variant<removed_mapping_t, LBACursorRef> ret;
-
-    bool is_removed_mapping() const {
-      return ret.index() == 0;
-    }
-
-    bool is_alive_mapping() const {
-      if (ret.index() == 1) {
-       assert(std::get<1>(ret));
-       return true;
-      } else {
-       return false;
-      }
-    }
-
-    const removed_mapping_t& get_removed_mapping() const {
-      assert(is_removed_mapping());
-      return std::get<0>(ret);
-    }
-
-    const LBACursor& get_cursor() const {
-      assert(is_alive_mapping());
-      return *std::get<1>(ret);
-    }
-
-    LBACursorRef take_cursor() {
-      assert(is_alive_mapping());
-      return std::move(std::get<1>(ret));
-    }
-
-    explicit operator ref_update_result_t() const {
-      if (is_removed_mapping()) {
-       auto v = get_removed_mapping();
-       auto &val = v.map_value;
-       ceph_assert(val.pladdr.is_paddr());
-       return {v.laddr, val.refcount, val.pladdr, val.len};
-      } else {
-       assert(is_alive_mapping());
-       auto &c = get_cursor();
-       assert(c.val);
-       ceph_assert(!c.is_indirect());
-       return {c.get_laddr(), c.val->refcount, c.val->pladdr, c.val->len};
-      }
-    }
-  };
-
-  using update_refcount_iertr = ref_iertr;
-  using update_refcount_ret = update_refcount_iertr::future<
-    update_mapping_ret_bare_t>;
-  update_refcount_ret update_refcount(
-    Transaction &t,
-    laddr_t addr,
-    int delta,
-    bool cascade_remove);
-
-  /**
-   * _update_mapping
-   *
-   * Updates mapping, removes if f returns nullopt
-   */
-  using _update_mapping_iertr = ref_iertr;
-  using _update_mapping_ret = ref_iertr::future<
-    update_mapping_ret_bare_t>;
-  using update_func_t = std::function<
-    lba_map_val_t(const lba_map_val_t &v)
-    >;
-  _update_mapping_ret _update_mapping(
-    Transaction &t,
-    laddr_t addr,
-    update_func_t &&f,
-    LogicalChildNode*);
-
-  struct insert_position_t {
-    laddr_t laddr;
-    LBABtree::iterator insert_iter;
-  };
-  enum class alloc_policy_t {
-    deterministic, // no conflict
-    linear_search,
-  };
-  using search_insert_position_iertr = base_iertr;
-  using search_insert_position_ret =
-      search_insert_position_iertr::future<insert_position_t>;
-  search_insert_position_ret search_insert_position(
-    op_context_t c,
-    LBABtree &btree,
-    laddr_t hint,
-    extent_len_t length,
-    alloc_policy_t policy);
-
-  using alloc_mappings_iertr = base_iertr;
-  using alloc_mappings_ret =
-      alloc_mappings_iertr::future<std::list<LBACursorRef>>;
-  /**
-   * alloc_contiguous_mappings
-   *
-   * Insert a range of contiguous mappings into the LBA btree.
-   *
-   * hint is a non-null laddr hint for allocation. All alloc_infos' key
-   * should be L_ADDR_NULL, the final laddr is relative to the allocated
-   * laddr based on preceding mappings' total length.
-   */
-  alloc_mappings_ret alloc_contiguous_mappings(
-    Transaction &t,
-    laddr_t hint,
-    std::vector<alloc_mapping_info_t> &alloc_infos,
-    alloc_policy_t policy);
-
-  /**
-   * alloc_sparse_mappings
-   *
-   * Insert a range of sparse mappings into the LBA btree.
-   *
-   * hint is a non-null laddr hint for allocation. All of alloc_infos' key
-   * are non-null laddr hints and must be incremental, each mapping's final
-   * laddr maintains same offset to allocated laddr as original to hint.
-   */
-  alloc_mappings_ret alloc_sparse_mappings(
-    Transaction &t,
-    laddr_t hint,
-    std::vector<alloc_mapping_info_t> &alloc_infos,
-    alloc_policy_t policy);
-
-  /**
-   * insert_mappings
-   *
-   * Insert all lba mappings built from alloc_infos into LBA btree before
-   * iter and return the inserted LBACursors.
-   *
-   * NOTE: There is no guarantee that the returned cursors are all valid
-   * since the successive insertion is possible to invalidate the parent
-   * extent of predecessively returned LBACursor.
-   */
-  alloc_mappings_ret insert_mappings(
-    op_context_t c,
-    LBABtree &btree,
-    LBABtree::iterator iter,
-    std::vector<alloc_mapping_info_t> &alloc_infos);
-
-  ref_ret _incref_extent(
-    Transaction &t,
-    laddr_t addr,
-    int delta) {
-    ceph_assert(delta > 0);
-    return update_refcount(t, addr, delta, false
-    ).si_then([](auto res) {
-      return ref_update_result_t(res);
-    });
-  }
-
-  using _get_cursor_ret = get_mapping_iertr::future<LBACursorRef>;
-  _get_cursor_ret get_cursor(
-    op_context_t c,
-    LBABtree& btree,
-    laddr_t offset);
-
-  using _get_cursors_ret = get_mappings_iertr::future<std::list<LBACursorRef>>;
-  _get_cursors_ret get_cursors(
-    op_context_t c,
-    LBABtree& btree,
-    laddr_t offset,
-    extent_len_t length);
-
-  using resolve_indirect_cursor_ret = get_mappings_iertr::future<LBACursorRef>;
-  resolve_indirect_cursor_ret resolve_indirect_cursor(
-    op_context_t c,
-    LBABtree& btree,
-    const LBACursor& indirect_cursor);
-
-  using _decref_intermediate_ret = ref_iertr::future<
-    update_mapping_ret_bare_t>;
-  _decref_intermediate_ret _decref_intermediate(
-    Transaction &t,
-    laddr_t addr,
-    extent_len_t len);
-
-  using refresh_lba_cursor_iertr = base_iertr;
-  using refresh_lba_cursor_ret = refresh_lba_cursor_iertr::future<>;
-  refresh_lba_cursor_ret refresh_lba_cursor(
-    op_context_t c,
-    LBABtree &btree,
-    LBACursor &cursor);
-};
-using BtreeLBAManagerRef = std::unique_ptr<BtreeLBAManager>;
-
-}
diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc
deleted file mode 100644 (file)
index 9cb62db..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include <sys/mman.h>
-#include <string.h>
-
-#include <memory>
-#include <string.h>
-
-#include "include/buffer.h"
-#include "include/byteorder.h"
-
-#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
-#include "crimson/os/seastore/logging.h"
-#include "crimson/os/seastore/logical_child_node.h"
-
-SET_SUBSYS(seastore_lba);
-
-namespace crimson::os::seastore::lba_manager::btree {
-
-std::ostream &LBALeafNode::print_detail(std::ostream &out) const
-{
-  out << ", size=" << this->get_size()
-      << ", meta=" << this->get_meta()
-      << ", modifications=" << this->modifications
-      << ", my_tracker=" << (void*)this->my_tracker;
-  if (this->my_tracker) {
-    out << ", my_tracker->parent=" << (void*)this->my_tracker->get_parent().get();
-  }
-  return out << ", root_block=" << (void*)this->parent_of_root.get();
-}
-
-void LBALeafNode::resolve_relative_addrs(paddr_t base)
-{
-  LOG_PREFIX(LBALeafNode::resolve_relative_addrs);
-  for (auto i: *this) {
-    auto val = i->get_val();
-    if (val.pladdr.is_paddr() &&
-       val.pladdr.get_paddr().is_relative()) {
-      val.pladdr = base.add_relative(val.pladdr.get_paddr());
-      TRACE("{} -> {}", i->get_val().pladdr, val.pladdr);
-      i->set_val(val);
-    }
-  }
-}
-
-void LBALeafNode::update(
-  internal_const_iterator_t iter,
-  lba_map_val_t val)
-{
-  LOG_PREFIX(LBALeafNode::update);
-  SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}",
-    this->pending_for_transaction,
-    iter.get_offset());
-  this->on_modify();
-  if (val.pladdr.is_paddr()) {
-    val.pladdr = maybe_generate_relative(val.pladdr.get_paddr());
-  }
-  return this->journal_update(
-    iter,
-    val,
-    this->maybe_get_delta_buffer());
-}
-
-LBALeafNode::internal_const_iterator_t LBALeafNode::insert(
-  internal_const_iterator_t iter,
-  laddr_t addr,
-  lba_map_val_t val)
-{
-  LOG_PREFIX(LBALeafNode::insert);
-  SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}",
-    this->pending_for_transaction,
-    iter.get_offset(),
-    addr);
-  this->on_modify();
-  if (val.pladdr.is_paddr()) {
-    val.pladdr = maybe_generate_relative(val.pladdr.get_paddr());
-  }
-  this->journal_insert(
-    iter,
-    addr,
-    val,
-    this->maybe_get_delta_buffer());
-  return iter;
-}
-
-}
diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h
deleted file mode 100644 (file)
index 8531946..0000000
+++ /dev/null
@@ -1,291 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#pragma once
-
-#include <sys/mman.h>
-#include <memory>
-#include <string.h>
-
-
-#include "include/buffer.h"
-
-#include "crimson/common/fixed_kv_node_layout.h"
-#include "crimson/common/errorator.h"
-#include "crimson/os/seastore/seastore_types.h"
-#include "crimson/os/seastore/cache.h"
-#include "crimson/os/seastore/cached_extent.h"
-
-#include "crimson/os/seastore/btree/btree_types.h"
-#include "crimson/os/seastore/btree/fixed_kv_btree.h"
-#include "crimson/os/seastore/btree/fixed_kv_node.h"
-
-namespace crimson::os::seastore {
-class LogicalChildNode;
-}
-
-namespace crimson::os::seastore::lba_manager::btree {
-
-using base_iertr = Cache::base_iertr;
-using LBANode = FixedKVNode<laddr_t>;
-
-class BtreeLBAMapping;
-
-constexpr size_t LBA_BLOCK_SIZE = 4096;
-
-using lba_node_meta_t = fixed_kv_node_meta_t<laddr_t>;
-
-using lba_node_meta_le_t = fixed_kv_node_meta_le_t<laddr_le_t>;
-
-/**
- * LBAInternalNode
- *
- * Abstracts operations on and layout of internal nodes for the
- * LBA Tree.
- *
- * Layout (4KiB):
- *   checksum   : ceph_le32[1]               4B
- *   size       : ceph_le32[1]               4B
- *   meta       : lba_node_meta_le_t[1]      20B
- *   keys       : laddr_le_t[CAPACITY]       (254*8)B
- *   values     : paddr_le_t[CAPACITY]       (254*8)B
- *                                           = 4092B
-
- * TODO: make the above capacity calculation part of FixedKVNodeLayout
- * TODO: the above alignment probably isn't portable without further work
- */
-constexpr size_t INTERNAL_NODE_CAPACITY = 254;
-struct LBAInternalNode
-  : FixedKVInternalNode<
-      INTERNAL_NODE_CAPACITY,
-      laddr_t, laddr_le_t,
-      LBA_BLOCK_SIZE,
-      LBAInternalNode> {
-  static_assert(
-    check_capacity(LBA_BLOCK_SIZE),
-    "INTERNAL_NODE_CAPACITY doesn't fit in LBA_BLOCK_SIZE");
-  using Ref = TCachedExtentRef<LBAInternalNode>;
-  using internal_iterator_t = const_iterator;
-  using key_type = laddr_t;
-  template <typename... T>
-  LBAInternalNode(T&&... t) :
-    FixedKVInternalNode(std::forward<T>(t)...) {}
-  static constexpr uint32_t CHILD_VEC_UNIT = 0;
-
-  static constexpr extent_types_t TYPE = extent_types_t::LADDR_INTERNAL;
-
-  extent_types_t get_type() const final {
-    return TYPE;
-  }
-};
-using LBAInternalNodeRef = LBAInternalNode::Ref;
-
-/**
- * LBALeafNode
- *
- * Abstracts operations on and layout of leaf nodes for the
- * LBA Tree.
- *
- * Layout (4KiB):
- *   checksum   : ceph_le32[1]                4B
- *   size       : ceph_le32[1]                4B
- *   meta       : lba_node_meta_le_t[1]       20B
- *   keys       : laddr_le_t[CAPACITY]        (140*8)B
- *   values     : lba_map_val_le_t[CAPACITY]  (140*21)B
- *                                            = 4088B
- *
- * TODO: update FixedKVNodeLayout to handle the above calculation
- * TODO: the above alignment probably isn't portable without further work
- */
-constexpr size_t LEAF_NODE_CAPACITY = 140;
-
-struct LBALeafNode
-  : FixedKVLeafNode<
-      LEAF_NODE_CAPACITY,
-      laddr_t, laddr_le_t,
-      lba_map_val_t, lba_map_val_le_t,
-      LBA_BLOCK_SIZE,
-      LBAInternalNode,
-      LBALeafNode>,
-    ParentNode<LBALeafNode, laddr_t> {
-  static_assert(
-    check_capacity(LBA_BLOCK_SIZE),
-    "LEAF_NODE_CAPACITY doesn't fit in LBA_BLOCK_SIZE");
-  using Ref = TCachedExtentRef<LBALeafNode>;
-  using parent_type_t = FixedKVLeafNode<
-                         LEAF_NODE_CAPACITY,
-                         laddr_t, laddr_le_t,
-                         lba_map_val_t, lba_map_val_le_t,
-                         LBA_BLOCK_SIZE,
-                         LBAInternalNode,
-                         LBALeafNode>;
-  using internal_const_iterator_t =
-    typename parent_type_t::node_layout_t::const_iterator;
-  using internal_iterator_t =
-    typename parent_type_t::node_layout_t::iterator;
-  using key_type = laddr_t;
-  using parent_node_t = ParentNode<LBALeafNode, laddr_t>;
-  using child_t = LogicalChildNode;
-  static constexpr uint32_t CHILD_VEC_UNIT = 0;
-  LBALeafNode(ceph::bufferptr &&ptr)
-    : parent_type_t(std::move(ptr)),
-      parent_node_t(LEAF_NODE_CAPACITY) {}
-  explicit LBALeafNode(extent_len_t length)
-    : parent_type_t(length),
-      parent_node_t(LEAF_NODE_CAPACITY) {}
-  LBALeafNode(const LBALeafNode &rhs)
-    : parent_type_t(rhs),
-      parent_node_t(rhs) {}
-
-  static constexpr extent_types_t TYPE = extent_types_t::LADDR_LEAF;
-
-  void update(
-    internal_const_iterator_t iter,
-    lba_map_val_t val) final;
-
-  internal_const_iterator_t insert(
-    internal_const_iterator_t iter,
-    laddr_t addr,
-    lba_map_val_t val) final;
-
-  void remove(internal_const_iterator_t iter) final {
-    LOG_PREFIX(LBALeafNode::remove);
-    SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}",
-      this->pending_for_transaction,
-      iter.get_offset(),
-      iter.get_key());
-    assert(iter != this->end());
-    this->on_modify();
-    this->remove_child_ptr(iter.get_offset());
-    return this->journal_remove(
-      iter,
-      this->maybe_get_delta_buffer());
-  }
-
-  // See LBAInternalNode, same concept
-  void resolve_relative_addrs(paddr_t base) final;
-  void node_resolve_vals(
-    internal_iterator_t from,
-    internal_iterator_t to) const final
-  {
-    if (this->is_initial_pending()) {
-      for (auto i = from; i != to; ++i) {
-       auto val = i->get_val();
-       if (val.pladdr.is_paddr()
-           && val.pladdr.get_paddr().is_relative()) {
-         assert(val.pladdr.get_paddr().is_block_relative());
-         val.pladdr = this->get_paddr().add_relative(val.pladdr.get_paddr());
-         i->set_val(val);
-       }
-      }
-    }
-  }
-  void node_unresolve_vals(
-    internal_iterator_t from,
-    internal_iterator_t to) const final
-  {
-    if (this->is_initial_pending()) {
-      for (auto i = from; i != to; ++i) {
-       auto val = i->get_val();
-       if (val.pladdr.is_paddr()
-           && val.pladdr.get_paddr().is_relative()) {
-         assert(val.pladdr.get_paddr().is_record_relative());
-         val.pladdr = val.pladdr.get_paddr().block_relative_to(this->get_paddr());
-         i->set_val(val);
-       }
-      }
-    }
-  }
-
-  extent_types_t get_type() const final {
-    return TYPE;
-  }
-
-  void do_on_rewrite(Transaction &t, CachedExtent &extent) final {
-    this->parent_node_t::on_rewrite(t, static_cast<LBALeafNode&>(extent));
-  }
-
-  void do_on_replace_prior() final {
-    this->parent_node_t::on_replace_prior();
-  }
-
-  void do_prepare_commit() final {
-    this->parent_node_t::prepare_commit();
-  }
-
-  bool is_child_stable(
-    op_context_t c,
-    uint16_t pos,
-    laddr_t key) const {
-    return parent_node_t::_is_child_stable(c.trans, c.cache, pos, key);
-  }
-  bool is_child_data_stable(
-    op_context_t c,
-    uint16_t pos,
-    laddr_t key) const {
-    return parent_node_t::_is_child_stable(c.trans, c.cache, pos, key, true);
-  }
-
-  void on_split(
-    Transaction &t,
-    LBALeafNode &left,
-    LBALeafNode &right) final {
-    this->split_child_ptrs(t, left, right);
-  }
-  void adjust_copy_src_dest_on_split(
-    Transaction &t,
-    LBALeafNode &left,
-    LBALeafNode &right) final {
-    this->parent_node_t::adjust_copy_src_dest_on_split(t, left, right);
-  }
-
-  void on_merge(
-    Transaction &t,
-    LBALeafNode &left,
-    LBALeafNode &right) final {
-    this->merge_child_ptrs(t, left, right);
-  }
-  void adjust_copy_src_dest_on_merge(
-    Transaction &t,
-    LBALeafNode &left,
-    LBALeafNode &right) final {
-    this->parent_node_t::adjust_copy_src_dest_on_merge(t, left, right);
-  }
-
-  void on_balance(
-    Transaction &t,
-    LBALeafNode &left,
-    LBALeafNode &right,
-    uint32_t pivot_idx,
-    LBALeafNode &replacement_left,
-    LBALeafNode &replacement_right) final {
-    this->balance_child_ptrs(
-      t, left, right, pivot_idx, replacement_left, replacement_right);
-  }
-  void adjust_copy_src_dest_on_balance(
-    Transaction &t,
-    LBALeafNode &left,
-    LBALeafNode &right,
-    uint32_t pivot_idx,
-    LBALeafNode &replacement_left,
-    LBALeafNode &replacement_right) final {
-    this->parent_node_t::adjust_copy_src_dest_on_balance(
-      t, left, right, pivot_idx, replacement_left, replacement_right);
-  }
-
-  CachedExtentRef duplicate_for_write(Transaction&) final {
-    return CachedExtentRef(new LBALeafNode(*this));
-  }
-
-  std::ostream &print_detail(std::ostream &out) const final;
-};
-using LBALeafNodeRef = TCachedExtentRef<LBALeafNode>;
-
-}
-
-#if FMT_VERSION >= 90000
-template <> struct fmt::formatter<crimson::os::seastore::lba_manager::btree::lba_node_meta_t> : fmt::ostream_formatter {};
-template <> struct fmt::formatter<crimson::os::seastore::lba_manager::btree::lba_map_val_t> : fmt::ostream_formatter {};
-template <> struct fmt::formatter<crimson::os::seastore::lba_manager::btree::LBAInternalNode> : fmt::ostream_formatter {};
-template <> struct fmt::formatter<crimson::os::seastore::lba_manager::btree::LBALeafNode> : fmt::ostream_formatter {};
-#endif
index d52233869bbe2022c9fa36bc8ebb3479bca0950d..77c564c2d7ebea623430ce96862e46eef6235394 100644 (file)
@@ -31,7 +31,7 @@ std::ostream &operator<<(std::ostream &out, const lba_mapping_list_t &rhs)
   return out << ']';
 }
 
-using lba_manager::btree::LBALeafNode;
+using lba::LBALeafNode;
 
 get_child_ret_t<LBALeafNode, LogicalChildNode>
 LBAMapping::get_logical_extent(Transaction &t)
index 05f987e8def382e17fd1a71d3c8ccef6d3a83afb..1f7e61d7073df197d3f5d086ceb88ebf759c0720 100644 (file)
@@ -5,12 +5,12 @@
 
 #include "crimson/os/seastore/cached_extent.h"
 #include "crimson/os/seastore/btree/btree_types.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
+#include "crimson/os/seastore/lba/lba_btree_node.h"
 #include "crimson/os/seastore/logical_child_node.h"
 
 namespace crimson::os::seastore {
 
-namespace lba_manager::btree {
+namespace lba {
 class BtreeLBAManager;
 }
 
@@ -94,7 +94,7 @@ public:
     return direct_cursor->get_laddr();
   }
 
-   // An lba pin may be indirect, see comments in lba_manager/btree/btree_lba_manager.h
+   // An lba pin may be indirect, see comments in lba/btree_lba_manager.h
   laddr_t get_intermediate_key() const {
     assert(is_indirect());
     return indirect_cursor->get_intermediate_key();
@@ -117,7 +117,7 @@ public:
       extent_len_t>(get_intermediate_key());
   }
 
-  get_child_ret_t<lba_manager::btree::LBALeafNode, LogicalChildNode>
+  get_child_ret_t<lba::LBALeafNode, LogicalChildNode>
   get_logical_extent(Transaction &t);
 
   LBAMapping duplicate() const {
@@ -132,7 +132,7 @@ public:
   }
 
 private:
-  friend lba_manager::btree::BtreeLBAManager;
+  friend lba::BtreeLBAManager;
 
   // To support cloning, there are two kinds of lba mappings:
   //    1. direct lba mapping: the pladdr in the value of which is the paddr of
index ab4f2e67262c88f2b6ba4a544d88e551ee18b3f3..b17d5c17bc453e29ddbab12ba23c56de01aab540 100644 (file)
@@ -6,16 +6,16 @@
 #include "crimson/os/seastore/cached_extent.h"
 #include "crimson/os/seastore/linked_tree_node.h"
 #include "crimson/os/seastore/btree/btree_types.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
+#include "crimson/os/seastore/lba/lba_btree_node.h"
 
 namespace crimson::os::seastore {
 
 class LogicalChildNode : public LogicalCachedExtent,
-                        public ChildNode<lba_manager::btree::LBALeafNode,
+                        public ChildNode<lba::LBALeafNode,
                                          LogicalChildNode,
                                          laddr_t> {
   using lba_child_node_t = ChildNode<
-    lba_manager::btree::LBALeafNode, LogicalChildNode, laddr_t>;
+    lba::LBALeafNode, LogicalChildNode, laddr_t>;
 public:
   template <typename... T>
   LogicalChildNode(T&&... t) : LogicalCachedExtent(std::forward<T>(t)...) {}
index c422442f5e0003e3a55afe26a2dfa92377a7b5b9..afc5494a026e66d3f5d7d89be0e8c5e1cf30b172 100644 (file)
@@ -2,7 +2,7 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "crimson/os/seastore/root_block.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
+#include "crimson/os/seastore/lba/lba_btree_node.h"
 #include "crimson/os/seastore/backref/backref_tree_node.h"
 #include "crimson/os/seastore/linked_tree_node.h"
 
@@ -14,17 +14,17 @@ void RootBlock::on_replace_prior() {
     if (prior.lba_root_node) {
       RootBlockRef this_ref = this;
       auto lba_root = static_cast<
-       lba_manager::btree::LBANode*>(prior.lba_root_node);
+       lba::LBANode*>(prior.lba_root_node);
       if (likely(lba_root->range.depth > 1)) {
-       TreeRootLinker<RootBlock, lba_manager::btree::LBAInternalNode>::link_root(
+       TreeRootLinker<RootBlock, lba::LBAInternalNode>::link_root(
          this_ref,
-         static_cast<lba_manager::btree::LBAInternalNode*>(prior.lba_root_node)
+         static_cast<lba::LBAInternalNode*>(prior.lba_root_node)
        );
       } else {
        assert(lba_root->range.depth == 1);
-       TreeRootLinker<RootBlock, lba_manager::btree::LBALeafNode>::link_root(
+       TreeRootLinker<RootBlock, lba::LBALeafNode>::link_root(
          this_ref,
-         static_cast<lba_manager::btree::LBALeafNode*>(prior.lba_root_node)
+         static_cast<lba::LBALeafNode*>(prior.lba_root_node)
        );
       }
     }
index 3a2ce9efe6e7e9da3b0e74fb3ca241d6f0f317a9..65d70ee5e4d4f75fcb96277b6529a52678cc46bb 100644 (file)
@@ -8,7 +8,7 @@
 #include "crimson/os/seastore/transaction_manager.h"
 #include "crimson/os/seastore/journal.h"
 #include "crimson/os/seastore/journal/circular_bounded_journal.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
+#include "crimson/os/seastore/lba/lba_btree_node.h"
 #include "crimson/os/seastore/random_block_manager/rbm_device.h"
 
 /*
@@ -805,7 +805,7 @@ TransactionManagerRef make_transaction_manager(
 {
   auto epm = std::make_unique<ExtentPlacementManager>();
   auto cache = std::make_unique<Cache>(*epm);
-  auto lba_manager = lba_manager::create_lba_manager(*cache);
+  auto lba_manager = lba::create_lba_manager(*cache);
   auto sms = std::make_unique<SegmentManagerGroup>();
   auto rbs = std::make_unique<RBMDeviceGroup>();
   auto backref_manager = create_backref_manager(*cache);
index 9d2c9dda88257c5ab41c7615db5b99f077bf3068..6fca3bd2966f96db235c71e7e1c5bf40b3e86fe2 100644 (file)
@@ -951,7 +951,7 @@ private:
 
   shard_stats_t& shard_stats;
 
-  using LBALeafNode = lba_manager::btree::LBALeafNode;
+  using LBALeafNode = lba::LBALeafNode;
   struct unlinked_child_t {
     LBAMapping mapping;
     child_pos_t<LBALeafNode> child_pos;
index 4f055cbade4cffd0609d037bf79f4c4e49f8d060..a74186ae25f0a20694b821011accaed317224ffb 100644 (file)
@@ -8,7 +8,7 @@
 #include "crimson/os/seastore/journal.h"
 #include "crimson/os/seastore/cache.h"
 #include "crimson/os/seastore/segment_manager/ephemeral.h"
-#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
+#include "crimson/os/seastore/lba/btree_lba_manager.h"
 
 #include "test/crimson/seastore/test_block.h"
 
@@ -21,8 +21,7 @@ namespace {
 using namespace crimson;
 using namespace crimson::os;
 using namespace crimson::os::seastore;
-using namespace crimson::os::seastore::lba_manager;
-using namespace crimson::os::seastore::lba_manager::btree;
+using namespace crimson::os::seastore::lba;
 
 struct btree_test_base :
   public seastar_test_suite_t, SegmentProvider, JournalTrimmer {
index 6244f6e80d9a6a7e0a51f7b3a882450b425bc547..514edc0407ff0f3f2f1c36039992a70bd0e0548e 100644 (file)
@@ -14,7 +14,7 @@
 #include "crimson/os/seastore/segment_manager.h"
 
 #include "test/crimson/seastore/test_block.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
+#include "crimson/os/seastore/lba/lba_btree_node.h"
 
 using namespace crimson;
 using namespace crimson::os;
@@ -2118,7 +2118,7 @@ TEST_P(tm_single_device_intergrity_check_test_t, remap_lazy_read)
 TEST_P(tm_single_device_test_t, invalid_lba_mapping_detect)
 {
   run_async([this] {
-    using namespace crimson::os::seastore::lba_manager::btree;
+    using namespace crimson::os::seastore::lba;
     {
       auto t = create_transaction();
       for (unsigned i = 0; i < LEAF_NODE_CAPACITY; i++) {