]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/lba_manager: replace btree implementation 42901/head
authorSamuel Just <sjust@redhat.com>
Tue, 13 Jul 2021 02:02:18 +0000 (02:02 +0000)
committerSamuel Just <sjust@redhat.com>
Fri, 27 Aug 2021 02:10:45 +0000 (19:10 -0700)
Replace previous implementation with one based around an internal
iterator interface.  Besides simplifying the implementation and
removing duplicate lookups in the allocation pathway, this implementation
should correct a design problem in the prior implementation wherein
LBALeafNode::find_hole couldn't see the first element of the subsequent
node and therefore assumed that there was one at get_meta().end.

This patch removes the btree logic from lba_btree_node_impl.* leaving
the LBAInternalNode and LBALeafNode layout in lba_btree_node.*.
lba_btree.h/cc now have the main btree update/query logic.

Signed-off-by: Samuel Just <sjust@redhat.com>
13 files changed:
src/crimson/os/seastore/CMakeLists.txt
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/lba_manager.h
src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h
src/crimson/os/seastore/lba_manager/btree/btree_range_pin.h
src/crimson/os/seastore/lba_manager/btree/lba_btree.cc [new file with mode: 0644]
src/crimson/os/seastore/lba_manager/btree/lba_btree.h [new file with mode: 0644]
src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc [new file with mode: 0644]
src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h
src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc [deleted file]
src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h [deleted file]
src/test/crimson/seastore/test_btree_lba_manager.cc

index 71050d099e0e121d736a2a11f94ac4e9171b47e0..ccd67298328b80c6946e2820e3b7cc48465dbdfd 100644 (file)
@@ -10,8 +10,9 @@ add_library(crimson-seastore STATIC
   lba_manager.cc
   segment_cleaner.cc
   lba_manager/btree/btree_lba_manager.cc
-  lba_manager/btree/lba_btree_node_impl.cc
   lba_manager/btree/btree_range_pin.cc
+  lba_manager/btree/lba_btree.cc
+  lba_manager/btree/lba_btree_node.cc
   omap_manager.cc
   omap_manager/btree/btree_omap_manager.cc
   omap_manager/btree/omap_btree_node_impl.cc
index 98e76346d21fa69a166bdb5a14e2990df84fad09..a61e9bebdebf3713b91181bb9c02ec408a25105a 100644 (file)
@@ -9,7 +9,7 @@
 
 // included for get_extent_by_type
 #include "crimson/os/seastore/collection_manager/collection_flat_node.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h"
+#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
 #include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h"
 #include "crimson/os/seastore/object_data_handler.h"
 #include "crimson/os/seastore/collection_manager/collection_flat_node.h"
index 1e19ac6ba9e0d7c7d54ca916190f53054ab2bdd2..735c4d69be458eb6ccb3c2a0a52835ff469f614c 100644 (file)
@@ -83,18 +83,6 @@ public:
     extent_len_t len,
     paddr_t addr) = 0;
 
-  /**
-   * Creates a new absolute mapping.
-   *
-   * off~len must be unreferenced
-   */
-  using set_extent_iertr = base_iertr::extend<
-    crimson::ct_error::invarg>;
-  using set_extent_ret = set_extent_iertr::future<LBAPinRef>;
-  virtual set_extent_ret set_extent(
-    Transaction &t,
-    laddr_t off, extent_len_t len, paddr_t addr) = 0;
-
   struct ref_update_result_t {
     unsigned refcount = 0;
     paddr_t addr;
index 278c0958fa990f9cca25d6e511fcca4dead4d9df..361b0bb40532c457e636967f4b98ba2e98b6dc6b 100644 (file)
@@ -5,10 +5,12 @@
 #include <string.h>
 
 #include "crimson/common/log.h"
+#include "crimson/os/seastore/logging.h"
 
 #include "include/buffer.h"
 #include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h"
+#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
+#include "crimson/os/seastore/lba_manager/btree/lba_btree.h"
 
 
 namespace {
@@ -22,18 +24,8 @@ namespace crimson::os::seastore::lba_manager::btree {
 BtreeLBAManager::mkfs_ret BtreeLBAManager::mkfs(
   Transaction &t)
 {
-  logger().debug("BtreeLBAManager::mkfs");
   return cache.get_root(t).si_then([this, &t](auto croot) {
-    auto root_leaf = cache.alloc_new_extent<LBALeafNode>(
-      t,
-      LBA_BLOCK_SIZE);
-    root_leaf->set_size(0);
-    lba_node_meta_t meta{0, L_ADDR_MAX, 1};
-    root_leaf->set_meta(meta);
-    root_leaf->pin.set_range(meta);
-    croot->get_root().lba_root =
-      lba_root_t{root_leaf->get_paddr(), 1u};
-    t.get_lba_tree_stats().depth = 1u;
+    croot->get_root().lba_root = LBABtree::mkfs(get_context(t));
     return mkfs_iertr::now();
   }).handle_error_interruptible(
     mkfs_iertr::pass_further{},
@@ -43,39 +35,33 @@ BtreeLBAManager::mkfs_ret BtreeLBAManager::mkfs(
   );
 }
 
-BtreeLBAManager::get_root_ret
-BtreeLBAManager::get_root(Transaction &t)
-{
-  return cache.get_root(t).si_then([this, &t](auto croot) {
-    logger().debug(
-      "BtreeLBAManager::get_root: reading root at {} depth {}",
-      paddr_t{croot->get_root().lba_root.get_location()},
-      croot->get_root().lba_root.get_depth());
-    return get_lba_btree_extent(
-      get_context(t),
-      croot,
-      croot->get_root().lba_root.get_depth(),
-      croot->get_root().lba_root.get_location(),
-      paddr_t());
-  });
-}
-
 BtreeLBAManager::get_mappings_ret
 BtreeLBAManager::get_mappings(
   Transaction &t,
   laddr_t offset, extent_len_t length)
 {
-  logger().debug("BtreeLBAManager::get_mappings: {}, {}", offset, length);
-  return get_root(
-    t).si_then([this, &t, offset, length](auto extent) {
-      return extent->lookup_range(
-       get_context(t),
-       offset, length
-      ).si_then([](auto &&e) {
-        logger().debug("BtreeLBAManager::get_mappings: got mappings {}", e);
-        return std::move(e);
-      });
-  });
+  LOG_PREFIX(BtreeLBAManager::get_mappings);
+  DEBUGT("offset: {}, length{}", t, offset, length);
+  auto c = get_context(t);
+  return with_btree_state<lba_pin_list_t>(
+    c,
+    [c, offset, length](auto &btree, auto &ret) {
+      return LBABtree::iterate_repeat(
+       c,
+       btree.upper_bound_right(c, offset),
+       [&ret, offset, length](auto &pos) {
+         if (pos.is_end() || pos.get_key() >= (offset + length)) {
+           return LBABtree::iterate_repeat_ret(
+             interruptible::ready_future_marker{},
+             seastar::stop_iteration::yes);
+         }
+         ceph_assert((pos.get_key() + pos.get_val().len) > offset);
+         ret.push_back(pos.get_pin());
+         return LBABtree::iterate_repeat_ret(
+           interruptible::ready_future_marker{},
+           seastar::stop_iteration::no);
+       });
+    });
 }
 
 
@@ -84,7 +70,8 @@ BtreeLBAManager::get_mappings(
   Transaction &t,
   laddr_list_t &&list)
 {
-  logger().debug("BtreeLBAManager::get_mappings: {}", list);
+  LOG_PREFIX(BtreeLBAManager::get_mappings);
+  DEBUGT("{}", t, list);
   auto l = std::make_unique<laddr_list_t>(std::move(list));
   auto retptr = std::make_unique<lba_pin_list_t>();
   auto &ret = *retptr;
@@ -107,14 +94,26 @@ BtreeLBAManager::get_mapping(
   Transaction &t,
   laddr_t offset)
 {
-  logger().debug("BtreeLBAManager::get_mapping: {}", offset);
-  return get_root(t
-  ).si_then([this, &t, offset] (auto extent) {
-    return extent->lookup_pin(get_context(t), offset);
-  }).si_then([] (auto &&e) {
-    logger().debug("BtreeLBAManager::get_mapping: got mapping {}", *e);
-    return std::move(e);
-  });
+  LOG_PREFIX(BtreeLBAManager::get_mapping);
+  DEBUGT(": {}", t, offset);
+  auto c = get_context(t);
+  return with_btree_ret<LBAPinRef>(
+    c,
+    [FNAME, c, offset](auto &btree) {
+      return btree.lower_bound(
+       c, offset
+      ).si_then([FNAME, offset, c](auto iter) -> get_mapping_ret {
+       if (iter.is_end() || iter.get_key() != offset) {
+         return crimson::ct_error::enoent::make();
+       } else {
+         auto e = iter.get_pin();
+         DEBUGT("got mapping {}", c.trans, *e);
+         return get_mapping_ret(
+           interruptible::ready_future_marker{},
+           std::move(e));
+       }
+      });
+    });
 }
 
 BtreeLBAManager::alloc_extent_ret
@@ -124,54 +123,52 @@ BtreeLBAManager::alloc_extent(
   extent_len_t len,
   paddr_t addr)
 {
-  // TODO: we can certainly combine the lookup and the insert.
-  return get_root(
-    t).si_then([this, &t, hint, len](auto extent) {
-      logger().debug(
-       "BtreeLBAManager::alloc_extent: beginning search at {}",
-       *extent);
-      return extent->find_hole(
-       get_context(t),
-       hint,
-       L_ADDR_MAX,
-       len).si_then([extent](auto ret) {
-         return std::make_pair(ret, extent);
+  struct state_t {
+    laddr_t last_end;
+
+    std::optional<LBABtree::iterator> insert_iter;
+    std::optional<LBABtree::iterator> ret;
+
+    state_t(laddr_t hint) : last_end(hint) {}
+  };
+
+  LOG_PREFIX(BtreeLBAManager::alloc_extent);
+  DEBUGT("hint: {}, length: {}", t, hint, len);
+  auto c = get_context(t);
+  return with_btree_state<state_t>(
+    c,
+    hint,
+    [FNAME, c, hint, len, addr](auto &btree, auto &state) {
+      return LBABtree::iterate_repeat(
+       c,
+       btree.upper_bound_right(c, hint),
+       [&state, len](auto &pos) {
+         if (pos.is_end() || pos.get_key() >= (state.last_end + len)) {
+           state.insert_iter = pos;
+           return LBABtree::iterate_repeat_ret(
+             interruptible::ready_future_marker{},
+             seastar::stop_iteration::yes);
+         } else {
+           state.last_end = pos.get_key() + pos.get_val().len;
+           return LBABtree::iterate_repeat_ret(
+             interruptible::ready_future_marker{},
+             seastar::stop_iteration::no);
+         }
+       }).si_then([FNAME, c, addr, len, &btree, &state] {
+         DEBUGT("about to insert at addr {}~{}", c.trans, state.last_end, len);
+         return btree.insert(
+           c,
+           *state.insert_iter,
+           state.last_end,
+           lba_map_val_t{len, addr, 1, 0}
+         ).si_then([&state](auto &&p) {
+           auto [iter, inserted] = std::move(p);
+           ceph_assert(inserted);
+           state.ret = iter;
+         });
        });
-    }).si_then([this, &t, len, addr](auto allocation_pair) {
-      auto &[laddr, extent] = allocation_pair;
-      ceph_assert(laddr != L_ADDR_MAX);
-      return insert_mapping(
-       t,
-       extent,
-       laddr,
-       { len, addr, 1, 0 }
-      ).si_then([laddr=laddr, addr, len](auto pin) {
-       logger().debug(
-         "BtreeLBAManager::alloc_extent: alloc {}~{} for {}",
-         laddr,
-         len,
-         addr);
-       return LBAPinRef(pin.release());
-      });
-    });
-}
-
-BtreeLBAManager::set_extent_ret
-BtreeLBAManager::set_extent(
-  Transaction &t,
-  laddr_t off, extent_len_t len, paddr_t addr)
-{
-  return get_root(
-    t).si_then([this, &t, off, len, addr](auto root) {
-      return insert_mapping(
-       t,
-       root,
-       off,
-       { len, addr, 1, 0 });
-    }).si_then([](auto ret) {
-      return set_extent_ret(
-       interruptible::ready_future_marker{},
-       LBAPinRef(ret.release()));
+    }).si_then([](auto &&state) {
+      return state.ret->get_pin();
     });
 }
 
@@ -257,58 +254,15 @@ BtreeLBAManager::init_cached_extent_ret BtreeLBAManager::init_cached_extent(
   Transaction &t,
   CachedExtentRef e)
 {
-  logger().debug("{}: {}", __func__, *e);
-  return get_root(t).si_then(
-    [this, &t, e=std::move(e)](LBANodeRef root) mutable {
-      if (is_lba_node(*e)) {
-       auto lban = e->cast<LBANode>();
-       logger().debug("init_cached_extent: lba node, getting root");
-       return root->lookup(
-         op_context_t{cache, pin_set, t},
-         lban->get_node_meta().begin,
-         lban->get_node_meta().depth
-       ).si_then([this, e=std::move(e)](LBANodeRef c) {
-         if (c->get_paddr() == e->get_paddr()) {
-           assert(&*c == &*e);
-           logger().debug("init_cached_extent: {} initialized", *e);
-         } else {
-           // e is obsolete
-           logger().debug("init_cached_extent: {} obsolete", *e);
-           cache.drop_from_cache(e);
-         }
-         return init_cached_extent_iertr::now();
-       });
-      } else if (e->is_logical()) {
-       auto logn = e->cast<LogicalCachedExtent>();
-       return root->lookup_range(
-         op_context_t{cache, pin_set, t},
-         logn->get_laddr(),
-         logn->get_length()).si_then(
-           [this, logn=std::move(logn)](auto pins) {
-             if (pins.size() == 1) {
-               auto pin = std::move(pins.front());
-               pins.pop_front();
-               if (pin->get_paddr() == logn->get_paddr()) {
-                 logn->set_pin(std::move(pin));
-                 pin_set.add_pin(
-                   static_cast<BtreeLBAPin&>(logn->get_pin()).pin);
-                 logger().debug("init_cached_extent: {} initialized", *logn);
-               } else {
-                 // paddr doesn't match, remapped, obsolete
-                 logger().debug("init_cached_extent: {} obsolete", *logn);
-                 cache.drop_from_cache(logn);
-               }
-             } else {
-               // set of extents changed, obsolete
-               logger().debug("init_cached_extent: {} obsolete", *logn);
-               cache.drop_from_cache(logn);
-             }
-             return init_cached_extent_iertr::now();
-           });
-      } else {
-       logger().debug("init_cached_extent: {} skipped", *e);
-       return init_cached_extent_iertr::now();
-      }
+  LOG_PREFIX(BtreeLBAManager::init_cached_extent);
+  DEBUGT(": extent {}", t, *e);
+  auto c = get_context(t);
+  return with_btree(
+    c,
+    [c, e](auto &btree) {
+      return btree.init_cached_extent(
+       c, e
+      ).si_then([](auto) {});
     });
 }
 
@@ -318,18 +272,27 @@ BtreeLBAManager::scan_mappings_ret BtreeLBAManager::scan_mappings(
   laddr_t end,
   scan_mappings_func_t &&f)
 {
-  return seastar::do_with(
-    std::move(f),
-    LBANodeRef(),
-    [=, &t](auto &f, auto &lbarootref) {
-      return get_root(t).si_then(
-       [=, &t, &f](LBANodeRef lbaroot) mutable {
-         lbarootref = lbaroot;
-         return lbaroot->scan_mappings(
-           get_context(t),
-           begin,
-           end,
-           f);
+  LOG_PREFIX(BtreeLBAManager::scan_mappings);
+  DEBUGT("begin: {}, end: {}", t, begin, end);
+
+  auto c = get_context(t);
+  return with_btree(
+    c,
+    [c, f=std::move(f), begin, end](auto &btree) mutable {
+      return LBABtree::iterate_repeat(
+       c,
+       btree.upper_bound_right(c, begin),
+       [f=std::move(f), begin, end](auto &pos) {
+         if (pos.is_end() || pos.get_key() >= end) {
+           return LBABtree::iterate_repeat_ret(
+             interruptible::ready_future_marker{},
+             seastar::stop_iteration::yes);
+         }
+         ceph_assert((pos.get_key() + pos.get_val().len) > begin);
+         f(pos.get_key(), pos.get_val().paddr, pos.get_val().len);
+         return LBABtree::iterate_repeat_ret(
+           interruptible::ready_future_marker{},
+           seastar::stop_iteration::no);
        });
     });
 }
@@ -338,16 +301,30 @@ BtreeLBAManager::scan_mapped_space_ret BtreeLBAManager::scan_mapped_space(
     Transaction &t,
     scan_mapped_space_func_t &&f)
 {
+  LOG_PREFIX(BtreeLBAManager::scan_mapped_space);
+  DEBUGT("", t);
+  auto c = get_context(t);
   return seastar::do_with(
     std::move(f),
-    LBANodeRef(),
-    [=, &t](auto &f, auto &lbarootref) {
-      return get_root(t).si_then(
-       [=, &t, &f](LBANodeRef lbaroot) mutable {
-         lbarootref = lbaroot;
-         return lbaroot->scan_mapped_space(
-           get_context(t),
-           f);
+    [this, c](auto &visitor) {
+      return with_btree(
+       c,
+       [c, &visitor](auto &btree) {
+         return LBABtree::iterate_repeat(
+           c,
+           btree.lower_bound(c, 0, &visitor),
+           [&visitor](auto &pos) {
+             if (pos.is_end()) {
+               return LBABtree::iterate_repeat_ret(
+                 interruptible::ready_future_marker{},
+                 seastar::stop_iteration::yes);
+             }
+             visitor(pos.get_val().paddr, pos.get_val().len);
+             return LBABtree::iterate_repeat_ret(
+               interruptible::ready_future_marker{},
+               seastar::stop_iteration::no);
+           },
+           &visitor);
        });
     });
 }
@@ -356,6 +333,10 @@ BtreeLBAManager::rewrite_extent_ret BtreeLBAManager::rewrite_extent(
   Transaction &t,
   CachedExtentRef extent)
 {
+  LOG_PREFIX(BtreeLBAManager::rewrite_extent);
+  if (extent->has_been_invalidated()) {
+    ERRORT("{} has been invalidated", t, *extent);
+  }
   assert(!extent->has_been_invalidated());
 
   logger().debug(
@@ -401,46 +382,12 @@ BtreeLBAManager::rewrite_extent_ret BtreeLBAManager::rewrite_extent(
        }
       );
   } else if (is_lba_node(*extent)) {
-    auto lba_extent = extent->cast<LBANode>();
-    cache.retire_extent(t, extent);
-    auto nlba_extent = cache.alloc_new_extent_by_type(
-      t,
-      lba_extent->get_type(),
-      lba_extent->get_length())->cast<LBANode>();
-    lba_extent->get_bptr().copy_out(
-      0,
-      lba_extent->get_length(),
-      nlba_extent->get_bptr().c_str());
-    nlba_extent->pin.set_range(nlba_extent->get_node_meta());
-
-    /* This is a bit underhanded.  Any relative addrs here must necessarily
-     * be record relative as we are rewriting a dirty extent.  Thus, we
-     * are using resolve_relative_addrs with a (likely negative) block
-     * relative offset to correct them to block-relative offsets adjusted
-     * for our new transaction location.
-     *
-     * Upon commit, these now block relative addresses will be interpretted
-     * against the real final address.
-     */
-    nlba_extent->resolve_relative_addrs(
-      make_record_relative_paddr(0) - nlba_extent->get_paddr());
-
-    logger().debug(
-      "{}: rewriting {} into {}",
-      __func__,
-      *lba_extent,
-      *nlba_extent);
-
-    return update_internal_mapping(
-      t,
-      nlba_extent->get_node_meta().depth,
-      nlba_extent->get_node_meta().begin,
-      nlba_extent->get_paddr()).si_then(
-       [](auto) {},
-       rewrite_extent_iertr::pass_further{},
-       crimson::ct_error::assert_all{
-         "Invalid error in BtreeLBAManager::rewrite_extent update_internal_mapping"
-       });
+    auto c = get_context(t);
+    return with_btree(
+      c,
+      [c, extent](auto &btree) mutable {
+       return btree.rewrite_lba_extent(c, extent);
+      });
   } else {
     return rewrite_extent_iertr::now();
   }
@@ -462,24 +409,14 @@ BtreeLBAManager::get_physical_extent_if_live(
     laddr,
     len
   ).si_then([=, &t](CachedExtentRef extent) {
-    return get_root(t).si_then([=, &t](LBANodeRef root) {
-      auto lba_node = extent->cast<LBANode>();
-      return root->lookup(
-       op_context_t{cache, pin_set, t},
-       lba_node->get_node_meta().begin,
-       lba_node->get_node_meta().depth).si_then([=](LBANodeRef c) {
-         if (c->get_paddr() == lba_node->get_paddr()) {
-           return get_physical_extent_if_live_ret(
-             interruptible::ready_future_marker{},
-             lba_node);
-         } else {
-           cache.drop_from_cache(lba_node);
-           return get_physical_extent_if_live_ret(
-             interruptible::ready_future_marker{},
-             CachedExtentRef());
-         }
-       });
-    });
+    auto c = get_context(t);
+    return with_btree_ret<CachedExtentRef>(
+      c,
+      [c, extent](auto &btree) {
+       return btree.init_cached_extent(
+         c,
+         extent);
+      });
   });
 }
 
@@ -489,58 +426,13 @@ BtreeLBAManager::BtreeLBAManager(
   : segment_manager(segment_manager),
     cache(cache) {}
 
-BtreeLBAManager::insert_mapping_ret BtreeLBAManager::insert_mapping(
-  Transaction &t,
-  LBANodeRef root,
-  laddr_t laddr,
-  lba_map_val_t val)
-{
-  ++(t.get_lba_tree_stats().num_inserts);
-  auto split = insert_mapping_iertr::future<LBANodeRef>(
-    interruptible::ready_future_marker{},
-    root);
-  if (root->at_max_capacity()) {
-    split = cache.get_root(t).si_then(
-      [this, root, laddr, &t](RootBlockRef croot) {
-       logger().debug(
-         "BtreeLBAManager::insert_mapping: splitting root {}",
-         *croot);
-       {
-         auto mut_croot = cache.duplicate_for_write(t, croot);
-         croot = mut_croot->cast<RootBlock>();
-       }
-       auto nroot = cache.alloc_new_extent<LBAInternalNode>(t, LBA_BLOCK_SIZE);
-       lba_node_meta_t meta{0, L_ADDR_MAX, root->get_node_meta().depth + 1};
-       nroot->set_meta(meta);
-       nroot->pin.set_range(meta);
-       nroot->journal_insert(
-         nroot->begin(),
-         L_ADDR_MIN,
-         root->get_paddr(),
-         nullptr);
-       auto new_depth = root->get_node_meta().depth + 1;
-       croot->get_root().lba_root = lba_root_t{
-         nroot->get_paddr(),
-         new_depth
-       };
-       t.get_lba_tree_stats().depth = new_depth;
-       return nroot->split_entry(
-         get_context(t),
-         laddr, nroot->begin(), root);
-      });
-  }
-  return split.si_then([this, &t, laddr, val](LBANodeRef node) {
-    return node->insert(
-      get_context(t),
-      laddr, val);
-  });
-}
-
 BtreeLBAManager::update_refcount_ret BtreeLBAManager::update_refcount(
   Transaction &t,
   laddr_t addr,
   int delta)
 {
+  LOG_PREFIX(BtreeLBAManager::update_refcount);
+  DEBUGT("addr {}, delta {}", t, addr, delta);
   return update_mapping(
     t,
     addr,
@@ -563,58 +455,39 @@ BtreeLBAManager::update_mapping_ret BtreeLBAManager::update_mapping(
   laddr_t addr,
   update_func_t &&f)
 {
-  return get_root(t
-  ).si_then([this, f=std::move(f), &t, addr](LBANodeRef root) mutable {
-    return root->mutate_mapping(
-      get_context(t),
-      addr,
-      std::move(f));
-  });
-}
+  LOG_PREFIX(BtreeLBAManager::update_mapping);
+  DEBUGT("addr {}", t, addr);
+  auto c = get_context(t);
+  return with_btree_ret<lba_map_val_t>(
+    c,
+    [f=std::move(f), c, addr](auto &btree) mutable {
+      return btree.lower_bound(
+       c, addr
+      ).si_then([&btree, f=std::move(f), c, addr](auto iter)
+               -> update_mapping_ret {
+       if (iter.is_end() || iter.get_key() != addr) {
+         return crimson::ct_error::enoent::make();
+       }
 
-BtreeLBAManager::update_internal_mapping_ret
-BtreeLBAManager::update_internal_mapping(
-  Transaction &t,
-  depth_t depth,
-  laddr_t laddr,
-  paddr_t paddr)
-{
-  return cache.get_root(t).si_then([=, &t](RootBlockRef croot) {
-    if (depth == croot->get_root().lba_root.get_depth()) {
-      logger().debug(
-       "update_internal_mapping: updating lba root to: {}->{}",
-       laddr,
-       paddr);
-      {
-       auto mut_croot = cache.duplicate_for_write(t, croot);
-       croot = mut_croot->cast<RootBlock>();
-      }
-      ceph_assert(laddr == 0);
-      auto old_paddr = croot->get_root().lba_root.get_location();
-      croot->get_root().lba_root.set_location(paddr);
-      return update_internal_mapping_ret(
-       interruptible::ready_future_marker{},
-       old_paddr);
-    } else {
-      logger().debug(
-       "update_internal_mapping: updating lba node at depth {} to: {}->{}",
-       depth,
-       laddr,
-       paddr);
-      return get_lba_btree_extent(
-       get_context(t),
-       croot,
-       croot->get_root().lba_root.get_depth(),
-       croot->get_root().lba_root.get_location(),
-       paddr_t()).si_then([=, &t](LBANodeRef broot) {
-         return broot->mutate_internal_address(
-           get_context(t),
-           depth,
-           laddr,
-           paddr);
-       });
-    }
-  });
+       auto ret = f(iter.get_val());
+       if (ret.refcount == 0) {
+         return btree.remove(
+           c,
+           iter
+         ).si_then([ret] {
+           return ret;
+         });
+       } else {
+         return btree.update(
+           c,
+           iter,
+           ret
+         ).si_then([ret](auto) {
+           return ret;
+         });
+       }
+      });
+    });
 }
 
 BtreeLBAManager::~BtreeLBAManager()
index 0c16efaca74ba639a76340346062f155d4ddf5b8..a0834b159c24c7ff55ed576444dd8d7a30d671e4 100644 (file)
 #include "common/interval_map.h"
 #include "crimson/osd/exceptions.h"
 
+#include "crimson/os/seastore/logging.h"
 #include "crimson/os/seastore/seastore_types.h"
 #include "crimson/os/seastore/lba_manager.h"
 #include "crimson/os/seastore/cache.h"
 #include "crimson/os/seastore/segment_manager.h"
 
 #include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
+#include "crimson/os/seastore/lba_manager/btree/lba_btree.h"
 
 namespace crimson::os::seastore::lba_manager::btree {
 
@@ -68,10 +70,6 @@ public:
     extent_len_t len,
     paddr_t addr) final;
 
-  set_extent_ret set_extent(
-    Transaction &t,
-    laddr_t off, extent_len_t len, paddr_t addr) final;
-
   ref_ret decref_extent(
     Transaction &t,
     laddr_t addr) final {
@@ -131,29 +129,69 @@ private:
 
   static btree_range_pin_t &get_pin(CachedExtent &e);
 
+  template <typename F, typename... Args>
+  auto with_btree(
+    op_context_t c,
+    F &&f) {
+    return cache.get_root(
+      c.trans
+    ).si_then([this, c, f=std::forward<F>(f)](RootBlockRef croot) mutable {
+      return seastar::do_with(
+       LBABtree(croot->get_root().lba_root),
+       [this, c, croot, f=std::move(f)](auto &btree) mutable {
+         return f(
+           btree
+         ).si_then([this, c, croot, &btree] {
+           if (btree.is_root_dirty()) {
+             auto mut_croot = cache.duplicate_for_write(
+               c.trans, croot
+             )->cast<RootBlock>();
+             mut_croot->get_root().lba_root = btree.get_root_undirty();
+           }
+           return base_iertr::now();
+         });
+       });
+    });
+  }
 
-  /**
-   * get_root
-   *
-   * Get a reference to the root LBANode.
-   */
-  using get_root_iertr = base_iertr;
-  using get_root_ret = get_root_iertr::future<LBANodeRef>;
-  get_root_ret get_root(Transaction &);
+  template <typename State, typename F>
+  auto with_btree_state(
+    op_context_t c,
+    State &&init,
+    F &&f) {
+    return seastar::do_with(
+      std::forward<State>(init),
+      [this, c, f=std::forward<F>(f)](auto &state) mutable {
+       (void)this; // silence incorrect clang warning about capture
+       return with_btree(c, [&state, f=std::move(f)](auto &btree) mutable {
+         return f(btree, state);
+       }).si_then([&state] {
+         return seastar::make_ready_future<State>(std::move(state));
+       });
+      });
+  }
 
-  /**
-   * insert_mapping
-   *
-   * Insert a lba mapping into the tree
-   */
-  using insert_mapping_iertr = base_iertr;
-  using insert_mapping_ret = insert_mapping_iertr::future<LBAPinRef>;
-  insert_mapping_ret insert_mapping(
-    Transaction &t,   ///< [in,out] transaction
-    LBANodeRef root,  ///< [in] root node
-    laddr_t laddr,    ///< [in] logical addr to insert
-    lba_map_val_t val ///< [in] mapping to insert
-  );
+  template <typename State, typename F>
+  auto with_btree_state(
+    op_context_t c,
+    F &&f) {
+    return with_btree_state<State, F>(c, State{}, std::forward<F>(f));
+  }
+
+  template <typename Ret, typename F>
+  auto with_btree_ret(
+    op_context_t c,
+    F &&f) {
+    return with_btree_state<Ret>(
+      c,
+      [f=std::forward<F>(f)](auto &btree, auto &ret) mutable {
+       return f(
+         btree
+       ).si_then([&ret](auto &&_ret) {
+         ret = std::move(_ret);
+       });
+      });
+  }
 
   /**
    * update_refcount
@@ -173,19 +211,13 @@ private:
    */
   using update_mapping_iertr = ref_iertr;
   using update_mapping_ret = ref_iertr::future<lba_map_val_t>;
-  using update_func_t = LBANode::mutate_func_t;
+  using update_func_t = std::function<
+    lba_map_val_t(const lba_map_val_t &v)
+    >;
   update_mapping_ret update_mapping(
     Transaction &t,
     laddr_t addr,
     update_func_t &&f);
-
-  using update_internal_mapping_iertr = LBANode::mutate_internal_address_iertr;
-  using update_internal_mapping_ret = LBANode::mutate_internal_address_ret;
-  update_internal_mapping_ret update_internal_mapping(
-    Transaction &t,
-    depth_t depth,
-    laddr_t laddr,
-    paddr_t paddr);
 };
 using BtreeLBAManagerRef = std::unique_ptr<BtreeLBAManager>;
 
index 08e3979c020ec120f9903d5d9456a570dffd75f7..bdc76c441dbe62a1cbb214c226154f7426999b77 100644 (file)
@@ -235,6 +235,7 @@ public:
 
 class BtreeLBAPin : public LBAPin {
   friend class BtreeLBAManager;
+  friend class LBABtree;
 
   /**
    * parent
diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree.cc b/src/crimson/os/seastore/lba_manager/btree/lba_btree.cc
new file mode 100644 (file)
index 0000000..e350f43
--- /dev/null
@@ -0,0 +1,863 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "crimson/common/log.h"
+#include "crimson/os/seastore/logging.h"
+
+#include "crimson/os/seastore/lba_manager/btree/lba_btree.h"
+
+namespace crimson::os::seastore::lba_manager::btree {
+
+LBABtree::mkfs_ret LBABtree::mkfs(op_context_t c)
+{
+  auto root_leaf = c.cache.alloc_new_extent<LBALeafNode>(
+    c.trans,
+    LBA_BLOCK_SIZE);
+  root_leaf->set_size(0);
+  lba_node_meta_t meta{0, L_ADDR_MAX, 1};
+  root_leaf->set_meta(meta);
+  root_leaf->pin.set_range(meta);
+  c.trans.get_lba_tree_stats().depth = 1u;
+  return lba_root_t{root_leaf->get_paddr(), 1u};
+}
+
+LBABtree::iterator_fut LBABtree::iterator::next(
+  op_context_t c,
+  mapped_space_visitor_t *visitor) const
+{
+  assert_valid();
+  assert(!is_end());
+
+  if ((leaf.pos + 1) < leaf.node->get_size()) {
+    auto ret = *this;
+    ret.leaf.pos++;
+    return iterator_fut(
+      interruptible::ready_future_marker{},
+      ret);
+  }
+
+  depth_t depth_with_space = 2;
+  for (; depth_with_space <= get_depth(); ++depth_with_space) {
+    if ((get_internal(depth_with_space).pos + 1) <
+       get_internal(depth_with_space).node->get_size()) {
+      break;
+    }
+  }
+
+  if (depth_with_space <= get_depth()) {
+    return seastar::do_with(
+      *this,
+      [](const LBAInternalNode &internal) { return internal.begin(); },
+      [](const LBALeafNode &leaf) { return leaf.begin(); },
+      [c, depth_with_space, visitor](auto &ret, auto &li, auto &ll) {
+       for (depth_t depth = 2; depth < depth_with_space; ++depth) {
+         ret.get_internal(depth).reset();
+       }
+       ret.leaf.reset();
+       ret.get_internal(depth_with_space).pos++;
+       return lookup_depth_range(
+         c, ret, depth_with_space - 1, 0, li, ll, visitor
+       ).si_then([&ret] {
+         return std::move(ret);
+       });
+      });
+  } else {
+    // end
+    auto ret = *this;
+    ret.leaf.pos = ret.leaf.node->get_size();
+    return iterator_fut(
+      interruptible::ready_future_marker{},
+      ret);
+  }
+}
+
+LBABtree::iterator_fut LBABtree::iterator::prev(op_context_t c) const
+{
+  assert_valid();
+  assert(!is_begin());
+
+  auto ret = *this;
+
+  if (is_end()) {
+    ret.leaf.pos = ret.leaf.node->get_size();
+  }
+
+  if (ret.leaf.pos > 0) {
+    ret.leaf.pos--;
+    return iterator_fut(
+      interruptible::ready_future_marker{},
+      ret);
+  }
+
+  depth_t depth_with_space = 2;
+  for (; depth_with_space <= get_depth(); ++depth_with_space) {
+    if (ret.get_internal(depth_with_space).pos > 0) {
+      break;
+    }
+  }
+
+  assert(depth_with_space <= ret.get_depth()); // must not be begin()
+  return seastar::do_with(
+    std::move(ret),
+    [](const LBAInternalNode &internal) { return --internal.end(); },
+    [](const LBALeafNode &leaf) { return --leaf.end(); },
+    [c, depth_with_space](auto &ret, auto &li, auto &ll) {
+      for (depth_t depth = 2; depth < depth_with_space; ++depth) {
+       ret.get_internal(depth).reset();
+      }
+      ret.leaf.reset();
+      ret.get_internal(depth_with_space).pos--;
+      return lookup_depth_range(
+       c, ret, depth_with_space - 1, 0, li, ll, nullptr
+      ).si_then([&ret] {
+       return std::move(ret);
+      });
+    });
+}
+
+LBABtree::iterator_fut LBABtree::lower_bound(
+  op_context_t c,
+  laddr_t addr,
+  mapped_space_visitor_t *visitor) const
+{
+  LOG_PREFIX(LBATree::lower_bound);
+  return lookup(
+    c,
+    [addr](const LBAInternalNode &internal) {
+      assert(internal.get_size() > 0);
+      auto iter = internal.upper_bound(addr);
+      assert(iter != internal.begin());
+      --iter;
+      return iter;
+    },
+    [FNAME, c, addr](const LBALeafNode &leaf) {
+      auto ret = leaf.lower_bound(addr);
+      DEBUGT(
+       "leaf addr {}, got ret offset {}, size {}, end {}",
+       c.trans,
+       addr,
+       ret.get_offset(),
+       leaf.get_size(),
+       ret == leaf.end());
+      return ret;
+    },
+    visitor
+  ).si_then([FNAME, c](auto &&ret) {
+    DEBUGT(
+      "ret.leaf.pos {}",
+      c.trans,
+      ret.leaf.pos);
+    ret.assert_valid();
+    return std::move(ret);
+  });
+}
+
+LBABtree::insert_ret LBABtree::insert(
+  op_context_t c,
+  iterator iter,
+  laddr_t laddr,
+  lba_map_val_t val)
+{
+  LOG_PREFIX(LBATree::insert);
+  DEBUGT(
+    "inserting laddr {} at iter {}",
+    c.trans,
+    laddr,
+    iter.is_end() ? L_ADDR_MAX : iter.get_key());
+  return seastar::do_with(
+    iter,
+    [this, c, laddr, val](auto &ret) {
+      return find_insertion(
+       c, laddr, ret
+      ).si_then([this, c, laddr, val, &ret] {
+       if (!ret.is_end() && ret.get_key() == laddr) {
+         return insert_ret(
+           interruptible::ready_future_marker{},
+           std::make_pair(ret, false));
+       } else {
+         return handle_split(
+           c, ret
+         ).si_then([c, laddr, val, &ret] {
+           if (!ret.leaf.node->is_pending()) {
+             CachedExtentRef mut = c.cache.duplicate_for_write(
+               c.trans, ret.leaf.node
+             );
+             ret.leaf.node = mut->cast<LBALeafNode>();
+           }
+           auto iter = ret.leaf.node->lower_bound(laddr);
+           if (iter != ret.leaf.node->end() && iter->get_key() == laddr) {
+             return insert_ret(
+               interruptible::ready_future_marker{},
+               std::make_pair(ret, false));
+           } else {
+             ret.leaf.pos = iter->get_offset();
+             assert(laddr >= ret.leaf.node->get_meta().begin &&
+                    laddr < ret.leaf.node->get_meta().end);
+             ret.leaf.node->insert(iter, laddr, val);
+             return insert_ret(
+               interruptible::ready_future_marker{},
+               std::make_pair(ret, true));
+           }
+         });
+       }
+      });
+    });
+}
+
+LBABtree::update_ret LBABtree::update(
+  op_context_t c,
+  iterator iter,
+  lba_map_val_t val)
+{
+  LOG_PREFIX(LBATree::update);
+  DEBUGT(
+    "update element at {}",
+    c.trans,
+    iter.is_end() ? L_ADDR_MAX : iter.get_key());
+  if (!iter.leaf.node->is_pending()) {
+    CachedExtentRef mut = c.cache.duplicate_for_write(
+      c.trans, iter.leaf.node
+    );
+    iter.leaf.node = mut->cast<LBALeafNode>();
+  }
+  iter.leaf.node->update(
+    iter.leaf.node->iter_idx(iter.leaf.pos),
+    val);
+  return update_ret(
+    interruptible::ready_future_marker{},
+    iter);
+}
+
+LBABtree::remove_ret LBABtree::remove(
+  op_context_t c,
+  iterator iter)
+{
+  LOG_PREFIX(LBATree::remove);
+  DEBUGT(
+    "remove element at {}",
+    c.trans,
+    iter.is_end() ? L_ADDR_MAX : iter.get_key());
+  assert(!iter.is_end());
+  return seastar::do_with(
+    iter,
+    [this, c](auto &ret) {
+      if (!ret.leaf.node->is_pending()) {
+       CachedExtentRef mut = c.cache.duplicate_for_write(
+         c.trans, ret.leaf.node
+       );
+       ret.leaf.node = mut->cast<LBALeafNode>();
+      }
+      ret.leaf.node->remove(
+       ret.leaf.node->iter_idx(ret.leaf.pos));
+
+      return handle_merge(
+       c, ret
+      );
+    });
+}
+
+LBABtree::init_cached_extent_ret LBABtree::init_cached_extent(
+  op_context_t c,
+  CachedExtentRef e)
+{
+  LOG_PREFIX(LBATree::init_cached_extent);
+  DEBUGT(": extent {}", c.trans, *e);
+  if (e->is_logical()) {
+    auto logn = e->cast<LogicalCachedExtent>();
+    return lower_bound(
+      c,
+      logn->get_laddr()
+    ).si_then([FNAME, e, c, logn](auto iter) {
+      if (!iter.is_end() &&
+         iter.get_key() == logn->get_laddr() &&
+         iter.get_val().paddr == logn->get_paddr()) {
+       logn->set_pin(iter.get_pin());
+       ceph_assert(iter.get_val().len == e->get_length());
+       c.pins.add_pin(
+         static_cast<BtreeLBAPin&>(logn->get_pin()).pin);
+       DEBUGT(": logical extent {} live, initialized", c.trans, *logn);
+       return e;
+      } else {
+       DEBUGT(": logical extent {} not live, dropping", c.trans, *logn);
+       c.cache.drop_from_cache(logn);
+       return CachedExtentRef();
+      }
+    });
+  } else if (e->get_type() == extent_types_t::LADDR_INTERNAL) {
+    auto eint = e->cast<LBAInternalNode>();
+    return lower_bound(
+      c, eint->get_node_meta().begin
+    ).si_then([FNAME, e, c, eint](auto iter) {
+      // Note, this check is valid even if iter.is_end()
+      depth_t cand_depth = eint->get_node_meta().depth;
+      if (cand_depth <= iter.get_depth() &&
+         &*iter.get_internal(cand_depth).node == &*eint) {
+       DEBUGT(": extent {} is live", c.trans, *eint);
+       return e;
+      } else {
+       DEBUGT(": extent {} is not live", c.trans, *eint);
+       c.cache.drop_from_cache(eint);
+       return CachedExtentRef();
+      }
+    });
+  } else if (e->get_type() == extent_types_t::LADDR_LEAF) {
+    auto eleaf = e->cast<LBALeafNode>();
+    return lower_bound(
+      c, eleaf->get_node_meta().begin
+    ).si_then([FNAME, c, e, eleaf](auto iter) {
+      // Note, this check is valid even if iter.is_end()
+      if (iter.leaf.node == &*eleaf) {
+       DEBUGT(": extent {} is live", c.trans, *eleaf);
+       return e;
+      } else {
+       DEBUGT(": extent {} is not live", c.trans, *eleaf);
+       c.cache.drop_from_cache(eleaf);
+       return CachedExtentRef();
+      }
+    });
+  } else {
+    DEBUGT(
+      ": found other extent {} type {}",
+      c.trans,
+      *e,
+      e->get_type());
+    return init_cached_extent_ret(
+      interruptible::ready_future_marker{},
+      e);
+  }
+}
+
+LBABtree::rewrite_lba_extent_ret LBABtree::rewrite_lba_extent(
+  op_context_t c,
+  CachedExtentRef e)
+{
+  LOG_PREFIX(LBABtree::rewrite_lba_extent);
+  assert(e->get_type() == extent_types_t::LADDR_INTERNAL ||
+        e->get_type() == extent_types_t::LADDR_LEAF);
+
+  auto do_rewrite = [&](auto &lba_extent) {
+    auto nlba_extent = c.cache.alloc_new_extent<
+      std::remove_reference_t<decltype(lba_extent)>
+      >(
+      c.trans,
+      lba_extent.get_length());
+    lba_extent.get_bptr().copy_out(
+      0,
+      lba_extent.get_length(),
+      nlba_extent->get_bptr().c_str());
+    nlba_extent->pin.set_range(nlba_extent->get_node_meta());
+
+    /* This is a bit underhanded.  Any relative addrs here must necessarily
+     * be record relative as we are rewriting a dirty extent.  Thus, we
+     * are using resolve_relative_addrs with a (likely negative) block
+     * relative offset to correct them to block-relative offsets adjusted
+     * for our new transaction location.
+     *
+     * Upon commit, these now block relative addresses will be interpretted
+     * against the real final address.
+     */
+    nlba_extent->resolve_relative_addrs(
+      make_record_relative_paddr(0) - nlba_extent->get_paddr());
+
+    DEBUGT(
+      "rewriting {} into {}",
+      c.trans,
+      lba_extent,
+      *nlba_extent);
+
+    return update_internal_mapping(
+      c,
+      nlba_extent->get_node_meta().depth,
+      nlba_extent->get_node_meta().begin,
+      e->get_paddr(),
+      nlba_extent->get_paddr()
+    ).si_then([c, e] {
+      c.cache.retire_extent(c.trans, e);
+    });
+  };
+
+  CachedExtentRef nlba_extent;
+  if (e->get_type() == extent_types_t::LADDR_INTERNAL) {
+    auto lint = e->cast<LBAInternalNode>();
+    return do_rewrite(*lint);
+  } else {
+    assert(e->get_type() == extent_types_t::LADDR_LEAF);
+    auto lleaf = e->cast<LBALeafNode>();
+    return do_rewrite(*lleaf);
+  }
+}
+
+LBABtree::get_internal_node_ret LBABtree::get_internal_node(
+  op_context_t c,
+  depth_t depth,
+  paddr_t offset)
+{
+  LOG_PREFIX(LBATree::get_internal_node);
+  DEBUGT(
+    "reading internal at offset {}, depth {}",
+    c.trans,
+    offset,
+    depth);
+    return c.cache.get_extent<LBAInternalNode>(
+      c.trans,
+      offset,
+      LBA_BLOCK_SIZE
+    ).si_then([FNAME, c, offset](LBAInternalNodeRef ret) {
+      DEBUGT(
+       "read internal at offset {} {}",
+       c.trans,
+       offset,
+       *ret);
+      auto meta = ret->get_meta();
+      if (ret->get_size()) {
+       ceph_assert(meta.begin <= ret->begin()->get_key());
+       ceph_assert(meta.end > (ret->end() - 1)->get_key());
+      }
+      if (!ret->is_pending() && !ret->pin.is_linked()) {
+       ret->pin.set_range(meta);
+       c.pins.add_pin(ret->pin);
+      }
+      return get_internal_node_ret(
+       interruptible::ready_future_marker{},
+       ret);
+    });
+}
+
+LBABtree::get_leaf_node_ret LBABtree::get_leaf_node(
+  op_context_t c,
+  paddr_t offset)
+{
+  LOG_PREFIX(LBATree::get_leaf_node);
+  DEBUGT(
+    "reading leaf at offset {}",
+    c.trans,
+    offset);
+  return c.cache.get_extent<LBALeafNode>(
+    c.trans,
+    offset,
+    LBA_BLOCK_SIZE
+  ).si_then([FNAME, c, offset](LBALeafNodeRef ret) {
+    DEBUGT(
+      "read leaf at offset {} {}",
+      c.trans,
+      offset,
+      *ret);
+    auto meta = ret->get_meta();
+    if (ret->get_size()) {
+      ceph_assert(meta.begin <= ret->begin()->get_key());
+      ceph_assert(meta.end > (ret->end() - 1)->get_key());
+    }
+    if (!ret->is_pending() && !ret->pin.is_linked()) {
+      ret->pin.set_range(meta);
+      c.pins.add_pin(ret->pin);
+    }
+    return get_leaf_node_ret(
+      interruptible::ready_future_marker{},
+      ret);
+  });
+}
+
+LBABtree::find_insertion_ret LBABtree::find_insertion(
+  op_context_t c,
+  laddr_t laddr,
+  iterator &iter)
+{
+  assert(iter.is_end() || iter.get_key() >= laddr);
+  if (!iter.is_end() && iter.get_key() == laddr) {
+    return seastar::now();
+  } else if (iter.leaf.node->get_node_meta().begin <= laddr) {
+    auto p = iter;
+    if (p.leaf.pos > 0) {
+      --p.leaf.pos;
+      assert(p.get_key() < laddr);
+    }
+    return seastar::now();
+  } else {
+    assert(iter.leaf.pos == 0);
+    return iter.prev(
+      c
+    ).si_then([laddr, &iter](auto p) {
+      assert(p.leaf.node->get_node_meta().begin <= laddr);
+      assert(p.get_key() < laddr);
+      // Note, this is specifically allowed to violate the iterator
+      // invariant that pos is a valid index for the node in the event
+      // that the insertion point is at the end of a node.
+      p.leaf.pos++;
+      iter = p;
+      return seastar::now();
+    });
+  }
+}
+
+LBABtree::handle_split_ret LBABtree::handle_split(
+  op_context_t c,
+  iterator &iter)
+{
+  LOG_PREFIX(LBATree::insert);
+
+  depth_t split_from = iter.check_split();
+
+  DEBUGT("split_from {}, depth {}", c.trans, split_from, iter.get_depth());
+
+  if (split_from == iter.get_depth()) {
+    auto nroot = c.cache.alloc_new_extent<LBAInternalNode>(
+      c.trans, LBA_BLOCK_SIZE);
+    lba_node_meta_t meta{0, L_ADDR_MAX, iter.get_depth() + 1};
+    nroot->set_meta(meta);
+    nroot->pin.set_range(meta);
+    nroot->journal_insert(
+      nroot->begin(),
+      L_ADDR_MIN,
+      root.get_location(),
+      nullptr);
+    iter.internal.push_back({nroot, 0});
+
+    root.set_location(nroot->get_paddr());
+    root.set_depth(iter.get_depth());
+    c.trans.get_lba_tree_stats().depth = iter.get_depth();
+    root_dirty = true;
+  }
+
+  /* pos may be either node_position_t<LBALeafNode> or
+   * node_position_t<LBAInternalNode> */
+  auto split_level = [&](auto &parent_pos, auto &pos) {
+    auto [left, right, pivot] = pos.node->make_split_children(c);
+
+    auto parent_node = parent_pos.node;
+    auto parent_iter = parent_pos.get_iter();
+
+    parent_node->update(
+      parent_iter,
+      left->get_paddr());
+    parent_node->insert(
+      parent_iter + 1,
+      pivot,
+      right->get_paddr());
+
+    c.cache.retire_extent(c.trans, pos.node);
+
+    /* right->get_node_meta().begin == pivot == right->begin()->get_key()
+     * Thus, if pos.pos == left->get_size(), we want iter to point to
+     * left with pos.pos at the end rather than right with pos.pos = 0
+     * since the insertion would be to the left of the first element
+     * of right and thus necessarily less than right->get_node_meta().begin.
+     */
+    if (pos.pos <= left->get_size()) {
+      pos.node = left;
+    } else {
+      pos.node = right;
+      pos.pos -= left->get_size();
+
+      parent_pos.pos += 1;
+    }
+  };
+
+  for (; split_from > 0; --split_from) {
+    auto &parent_pos = iter.get_internal(split_from + 1);
+    if (!parent_pos.node->is_pending()) {
+      parent_pos.node = c.cache.duplicate_for_write(
+       c.trans, parent_pos.node
+      )->cast<LBAInternalNode>();
+    }
+
+    if (split_from > 1) {
+      auto &pos = iter.get_internal(split_from);
+      DEBUGT("splitting parent {} depth {}", c.trans, split_from, *pos.node);
+      split_level(parent_pos, pos);
+    } else {
+      auto &pos = iter.leaf;
+      DEBUGT("splitting child {}", c.trans, *pos.node);
+      split_level(parent_pos, pos);
+    }
+  }
+
+  return seastar::now();
+}
+
+template <typename NodeType>
+LBABtree::base_iertr::future<typename NodeType::Ref> get_node(
+  op_context_t c,
+  depth_t depth,
+  paddr_t addr);
+
+template <>
+LBABtree::base_iertr::future<LBALeafNodeRef> get_node<LBALeafNode>(
+  op_context_t c,
+  depth_t depth,
+  paddr_t addr) {
+  assert(depth == 1);
+  return LBABtree::get_leaf_node(c, addr);
+}
+
+template <>
+LBABtree::base_iertr::future<LBAInternalNodeRef> get_node<LBAInternalNode>(
+  op_context_t c,
+  depth_t depth,
+  paddr_t addr) {
+  return LBABtree::get_internal_node(c, depth, addr);
+}
+
+template <typename NodeType>
+LBABtree::handle_merge_ret merge_level(
+  op_context_t c,
+  depth_t depth,
+  LBABtree::node_position_t<LBAInternalNode> &parent_pos,
+  LBABtree::node_position_t<NodeType> &pos)
+{
+  if (!parent_pos.node->is_pending()) {
+    parent_pos.node = c.cache.duplicate_for_write(
+      c.trans, parent_pos.node
+    )->cast<LBAInternalNode>();
+  }
+
+  auto iter = parent_pos.get_iter();
+  assert(iter.get_offset() < parent_pos.node->get_size());
+  bool donor_is_left = ((iter.get_offset() + 1) == parent_pos.node->get_size());
+  auto donor_iter = donor_is_left ? (iter - 1) : (iter + 1);
+
+  return get_node<NodeType>(
+    c,
+    depth,
+    donor_iter.get_val().maybe_relative_to(parent_pos.node->get_paddr())
+  ).si_then([c, iter, donor_iter, donor_is_left, &parent_pos, &pos](
+             typename NodeType::Ref donor) {
+    auto [l, r] = donor_is_left ?
+      std::make_pair(donor, pos.node) : std::make_pair(pos.node, donor);
+
+    auto [liter, riter] = donor_is_left ?
+      std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter);
+
+    if (donor->at_min_capacity()) {
+      auto replacement = l->make_full_merge(c, r);
+
+      parent_pos.node->update(
+       liter,
+       replacement->get_paddr());
+      parent_pos.node->remove(riter);
+
+      pos.node = replacement;
+      if (donor_is_left) {
+       pos.pos += r->get_size();
+       parent_pos.pos--;
+      }
+
+      c.cache.retire_extent(c.trans, l);
+      c.cache.retire_extent(c.trans, r);
+    } else {
+      auto [replacement_l, replacement_r, pivot] =
+       l->make_balanced(
+         c,
+         r,
+         !donor_is_left);
+
+      parent_pos.node->update(
+       liter,
+       replacement_l->get_paddr());
+      parent_pos.node->replace(
+       riter,
+       pivot,
+       replacement_r->get_paddr());
+
+      if (donor_is_left) {
+       assert(parent_pos.pos > 0);
+       parent_pos.pos--;
+      }
+
+      auto orig_position = donor_is_left ?
+       l->get_size() + pos.pos :
+       pos.pos;
+      if (orig_position < replacement_l->get_size()) {
+       pos.node = replacement_l;
+       pos.pos = orig_position;
+      } else {
+       parent_pos.pos++;
+       pos.node = replacement_r;
+       pos.pos = orig_position - replacement_l->get_size();
+      }
+
+      c.cache.retire_extent(c.trans, l);
+      c.cache.retire_extent(c.trans, r);
+    }
+
+    return seastar::now();
+  });
+}
+
+LBABtree::handle_merge_ret LBABtree::handle_merge(
+  op_context_t c,
+  iterator &iter)
+{
+  LOG_PREFIX(LBATree::handle_merge);
+  if (!iter.leaf.node->at_min_capacity() ||
+      iter.get_depth() == 1) {
+    DEBUGT(
+      "no need to merge leaf, leaf size {}, depth {}",
+      c.trans,
+      iter.leaf.node->get_size(),
+      iter.get_depth());
+    return seastar::now();
+  }
+
+  return seastar::do_with(
+    depth_t{1},
+    [FNAME, this, c, &iter](auto &to_merge) {
+      return trans_intr::repeat(
+       [FNAME, this, c, &iter, &to_merge] {
+         DEBUGT(
+           "merging depth {}",
+           c.trans,
+           to_merge);
+         auto &parent_pos = iter.get_internal(to_merge + 1);
+         auto merge_fut = handle_merge_iertr::now();
+         if (to_merge > 1) {
+           auto &pos = iter.get_internal(to_merge);
+           merge_fut = merge_level(c, to_merge, parent_pos, pos);
+         } else {
+           auto &pos = iter.leaf;
+           merge_fut = merge_level(c, to_merge, parent_pos, pos);
+         }
+
+         return merge_fut.si_then([FNAME, this, c, &iter, &to_merge] {
+           ++to_merge;
+           auto &pos = iter.get_internal(to_merge);
+           if (to_merge == iter.get_depth()) {
+             if (pos.node->get_size() == 1) {
+               DEBUGT("collapsing root", c.trans);
+               c.cache.retire_extent(c.trans, pos.node);
+               assert(pos.pos == 0);
+               auto node_iter = pos.get_iter();
+               root.set_location(
+                 node_iter->get_val().maybe_relative_to(pos.node->get_paddr()));
+               iter.internal.pop_back();
+               root.set_depth(iter.get_depth());
+               c.trans.get_lba_tree_stats().depth = iter.get_depth();
+               root_dirty = true;
+             } else {
+               DEBUGT("no need to collapse root", c.trans);
+             }
+             return seastar::stop_iteration::yes;
+           } else if (pos.node->at_min_capacity()) {
+             DEBUGT(
+               "continuing, next node {} depth {} at min",
+               c.trans,
+               *pos.node,
+               to_merge);
+             return seastar::stop_iteration::no;
+           } else {
+             DEBUGT(
+               "complete, next node {} depth {} not min",
+               c.trans,
+               *pos.node,
+               to_merge);
+             return seastar::stop_iteration::yes;
+           }
+         });
+       });
+    });
+}
+
+LBABtree::update_internal_mapping_ret LBABtree::update_internal_mapping(
+  op_context_t c,
+  depth_t depth,
+  laddr_t laddr,
+  paddr_t old_addr,
+  paddr_t new_addr)
+{
+  LOG_PREFIX(LBATree::update_internal_mapping);
+  DEBUGT(
+    "updating laddr {} at depth {} from {} to {}",
+    c.trans,
+    laddr,
+    depth,
+    old_addr,
+    new_addr);
+
+  return lower_bound(
+    c, laddr
+  ).si_then([=](auto iter) {
+    assert(iter.get_depth() >= depth);
+    if (depth == iter.get_depth()) {
+      DEBUGT("update at root", c.trans);
+
+      if (laddr != 0) {
+       ERRORT(
+         "updating root laddr {} at depth {} from {} to {},"
+         "laddr is not 0",
+         c.trans,
+         laddr,
+         depth,
+         old_addr,
+         new_addr,
+         root.get_location());
+       ceph_assert(0 == "impossible");
+      }
+
+      if (root.get_location() != old_addr) {
+       ERRORT(
+         "updating root laddr {} at depth {} from {} to {},"
+         "root addr {} does not match",
+         c.trans,
+         laddr,
+         depth,
+         old_addr,
+         new_addr,
+         root.get_location());
+       ceph_assert(0 == "impossible");
+      }
+
+      root.set_location(new_addr);
+      root_dirty = true;
+    } else {
+      auto &parent = iter.get_internal(depth + 1);
+      assert(parent.node);
+      assert(parent.pos < parent.node->get_size());
+      auto piter = parent.node->iter_idx(parent.pos);
+
+      if (piter->get_key() != laddr) {
+       ERRORT(
+         "updating laddr {} at depth {} from {} to {},"
+         "node {} pos {} val pivot addr {} does not match",
+         c.trans,
+         laddr,
+         depth,
+         old_addr,
+         new_addr,
+         *(parent.node),
+         parent.pos,
+         piter->get_key());
+       ceph_assert(0 == "impossible");
+      }
+
+
+      if (piter->get_val() != old_addr) {
+       ERRORT(
+         "updating laddr {} at depth {} from {} to {},"
+         "node {} pos {} val addr {} does not match",
+         c.trans,
+         laddr,
+         depth,
+         old_addr,
+         new_addr,
+         *(parent.node),
+         parent.pos,
+         piter->get_val());
+       ceph_assert(0 == "impossible");
+      }
+
+      CachedExtentRef mut = c.cache.duplicate_for_write(
+       c.trans,
+       parent.node
+      );
+      LBAInternalNodeRef mparent = mut->cast<LBAInternalNode>();
+      mparent->update(piter, new_addr);
+
+      /* Note, iter is now invalid as we didn't udpate either the parent
+       * node reference to the new mutable instance nor did we update the
+       * child pointer to the new node.  Not a problem as we'll now just
+       * destruct it.
+       */
+    }
+    return seastar::now();
+  });
+}
+}
diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree.h b/src/crimson/os/seastore/lba_manager/btree/lba_btree.h
new file mode 100644 (file)
index 0000000..887d58a
--- /dev/null
@@ -0,0 +1,608 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <boost/container/static_vector.hpp>
+#include <sys/mman.h>
+#include <memory>
+#include <string.h>
+
+#include "crimson/common/log.h"
+
+#include "crimson/os/seastore/lba_manager.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
+
+namespace crimson::os::seastore::lba_manager::btree {
+
+
+class LBABtree {
+  static constexpr size_t MAX_DEPTH = 16;
+public:
+  using base_iertr = LBAManager::base_iertr;
+
+  class iterator;
+  using iterator_fut = base_iertr::future<iterator>;
+
+  using mapped_space_visitor_t = LBAManager::scan_mapped_space_func_t;
+
+  class iterator {
+  public:
+    iterator(const iterator &) noexcept = default;
+    iterator(iterator &&) noexcept = default;
+    iterator &operator=(const iterator &) = default;
+    iterator &operator=(iterator &&) = default;
+
+    iterator(depth_t depth) : internal(depth - 1) {}
+
+    iterator_fut next(
+      op_context_t c,
+      mapped_space_visitor_t *visit=nullptr) const;
+
+    iterator_fut prev(op_context_t c) const;
+
+    void assert_valid() const {
+      assert(leaf.node);
+      assert(leaf.pos <= leaf.node->get_size());
+
+      for (auto &i: internal) {
+       (void)i;
+       assert(i.node);
+       assert(i.pos < i.node->get_size());
+      }
+    }
+
+    depth_t get_depth() const {
+      return internal.size() + 1;
+    }
+
+    auto &get_internal(depth_t depth) {
+      assert(depth > 1);
+      assert((depth - 2) < internal.size());
+      return internal[depth - 2];
+    }
+
+    const auto &get_internal(depth_t depth) const {
+      assert(depth > 1);
+      assert((depth - 2) < internal.size());
+      return internal[depth - 2];
+    }
+
+    laddr_t get_key() const {
+      assert(!is_end());
+      return leaf.node->iter_idx(leaf.pos).get_key();
+    }
+    lba_map_val_t get_val() const {
+      assert(!is_end());
+      return leaf.node->iter_idx(leaf.pos).get_val();
+    }
+
+    bool is_end() const {
+      assert(leaf.pos <= leaf.node->get_size());
+      return leaf.pos == leaf.node->get_size();
+    }
+
+    bool is_begin() const {
+      for (auto &i: internal) {
+       if (i.pos != 0)
+         return false;
+      }
+      return leaf.pos == 0;
+    }
+
+    LBAPinRef get_pin() const {
+      assert(!is_end());
+      return std::make_unique<BtreeLBAPin>(
+       leaf.node,
+       get_val().paddr.maybe_relative_to(leaf.node->get_paddr()),
+       lba_node_meta_t{ get_key(), get_key() + get_val().len, 0 });
+    }
+
+  private:
+    iterator() = default;
+
+    friend class LBABtree;
+    static constexpr uint16_t INVALID = std::numeric_limits<uint16_t>::max();
+    template <typename NodeType>
+    struct node_position_t {
+      typename NodeType::Ref node;
+      uint16_t pos = INVALID;
+
+      void reset() {
+       *this = node_position_t{};
+      }
+
+      auto get_iter() {
+       assert(pos != INVALID);
+       assert(pos < node->get_size());
+       return node->iter_idx(pos);
+      }
+    };
+    boost::container::static_vector<
+      node_position_t<LBAInternalNode>, MAX_DEPTH> internal;
+    node_position_t<LBALeafNode> leaf;
+
+    depth_t check_split() const {
+      if (!leaf.node->at_max_capacity()) {
+       return 0;
+      }
+      for (depth_t split_from = 1; split_from < get_depth(); ++split_from) {
+       if (!get_internal(split_from + 1).node->at_max_capacity())
+         return split_from;
+      }
+      return get_depth();
+    }
+
+    depth_t check_merge() const {
+      if (!leaf.node->at_min_capacity()) {
+       return 0;
+      }
+      for (depth_t merge_from = 1; merge_from < get_depth(); ++merge_from) {
+       if (!get_internal(merge_from + 1).node->at_min_capacity())
+         return merge_from;
+      }
+      return get_depth();
+    }
+  };
+
+  LBABtree(lba_root_t root) : root(root) {}
+
+  bool is_root_dirty() const {
+    return root_dirty;
+  }
+  lba_root_t get_root_undirty() {
+    ceph_assert(root_dirty);
+    root_dirty = false;
+    return root;
+  }
+
+  /// mkfs
+  using mkfs_ret = lba_root_t;
+  static mkfs_ret mkfs(op_context_t c);
+
+  /**
+   * lower_bound
+   *
+   * @param c [in] context
+   * @param addr [in] ddr
+   * @return least iterator >= key
+   */
+  iterator_fut lower_bound(
+    op_context_t c,
+    laddr_t addr,
+    mapped_space_visitor_t *visit=nullptr) const;
+
+  /**
+   * upper_bound
+   *
+   * @param c [in] context
+   * @param addr [in] ddr
+   * @return least iterator > key
+   */
+  iterator_fut upper_bound(
+    op_context_t c,
+    laddr_t addr
+  ) const {
+    return lower_bound(
+      c, addr
+    ).si_then([c, addr](auto iter) {
+      if (!iter.is_end() && iter.get_key() == addr) {
+       return iter.next(c);
+      } else {
+       return iterator_fut(
+         interruptible::ready_future_marker{},
+         iter);
+      }
+    });
+  }
+
+  /**
+   * upper_bound_right
+   *
+   * @param c [in] context
+   * @param addr [in] addr
+   * @return least iterator i s.t. i.get_key() + i.get_val().len > key
+   */
+  iterator_fut upper_bound_right(
+    op_context_t c,
+    laddr_t addr) const
+  {
+    return lower_bound(
+      c, addr
+    ).si_then([c, addr](auto iter) {
+      if (iter.is_begin()) {
+       return iterator_fut(
+         interruptible::ready_future_marker{},
+         iter);
+      } else {
+       return iter.prev(
+         c
+       ).si_then([iter, addr](auto prev) {
+         if ((prev.get_key() + prev.get_val().len) > addr) {
+           return iterator_fut(
+             interruptible::ready_future_marker{},
+             prev);
+         } else {
+           return iterator_fut(
+             interruptible::ready_future_marker{},
+             iter);
+         }
+       });
+      }
+    });
+  }
+
+  iterator_fut begin(op_context_t c) const {
+    return lower_bound(c, 0);
+  }
+  iterator_fut end(op_context_t c) const {
+    return upper_bound(c, L_ADDR_MAX);
+  }
+
+  using iterate_repeat_ret = base_iertr::future<
+    seastar::stop_iteration>;
+  template <typename F>
+  static auto iterate_repeat(
+    op_context_t c,
+    iterator_fut &&iter_fut,
+    F &&f,
+    mapped_space_visitor_t *visitor=nullptr) {
+    return std::move(
+      iter_fut
+    ).si_then([c, visitor, f=std::forward<F>(f)](auto iter) {
+      return seastar::do_with(
+       iter,
+       std::move(f),
+       [c, visitor](auto &pos, auto &f) {
+         return trans_intr::repeat(
+           [c, visitor, &f, &pos] {
+             return f(
+               pos
+             ).si_then([c, visitor, &pos](auto done) {
+               if (done == seastar::stop_iteration::yes) {
+                 return iterate_repeat_ret(
+                   interruptible::ready_future_marker{},
+                   seastar::stop_iteration::yes);
+               } else {
+                 ceph_assert(!pos.is_end());
+                 return pos.next(
+                   c, visitor
+                 ).si_then([&pos](auto next) {
+                   pos = next;
+                   return iterate_repeat_ret(
+                     interruptible::ready_future_marker{},
+                     seastar::stop_iteration::no);
+                 });
+               }
+             });
+           });
+       });
+    });
+  }
+
+  /**
+   * insert
+   *
+   * Inserts val at laddr with iter as a hint.  If element at laddr already
+   * exists returns iterator to that element unchanged and returns false.
+   *
+   * Invalidates all outstanding iterators for this tree on this transaction.
+   *
+   * @param c [in] op context
+   * @param iter [in] hint, insertion constant if immediately prior to iter
+   * @param laddr [in] addr at which to insert
+   * @param val [in] val to insert
+   * @return pair<iter, bool> where iter points to element at addr, bool true
+   *         iff element at laddr did not exist.
+   */
+  using insert_iertr = base_iertr;
+  using insert_ret = insert_iertr::future<std::pair<iterator, bool>>;
+  insert_ret insert(
+    op_context_t c,
+    iterator iter,
+    laddr_t laddr,
+    lba_map_val_t val
+  );
+  insert_ret insert(
+    op_context_t c,
+    laddr_t laddr,
+    lba_map_val_t val) {
+    return lower_bound(
+      c, laddr
+    ).si_then([this, c, laddr, val](auto iter) {
+      return insert(c, iter, laddr, val);
+    });
+  }
+
+  /**
+   * update
+   *
+   * Invalidates all outstanding iterators for this tree on this transaction.
+   *
+   * @param c [in] op context
+   * @param iter [in] iterator to element to update, must not be end
+   * @param val [in] val with which to update
+   * @return iterator to newly updated element
+   */
+  using update_iertr = base_iertr;
+  using update_ret = update_iertr::future<iterator>;
+  update_ret update(
+    op_context_t c,
+    iterator iter,
+    lba_map_val_t val);
+
+  /**
+   * remove
+   *
+   * Invalidates all outstanding iterators for this tree on this transaction.
+   *
+   * @param c [in] op context
+   * @param iter [in] iterator to element to remove, must not be end
+   */
+  using remove_iertr = base_iertr;
+  using remove_ret = remove_iertr::future<>;
+  remove_ret remove(
+    op_context_t c,
+    iterator iter);
+
+  /**
+   * init_cached_extent
+   *
+   * Checks whether e is live (reachable from lba tree) and drops or initializes
+   * accordingly.
+   *
+   * Returns e if live and a null CachedExtentRef otherwise.
+   */
+  using init_cached_extent_iertr = base_iertr;
+  using init_cached_extent_ret = init_cached_extent_iertr::future<CachedExtentRef>;
+  init_cached_extent_ret init_cached_extent(op_context_t c, CachedExtentRef e);
+
+  /**
+   * rewrite_lba_extent
+   *
+   * Rewrites a fresh copy of extent into transaction and updates internal
+   * references.
+   */
+  using rewrite_lba_extent_iertr = base_iertr;
+  using rewrite_lba_extent_ret = rewrite_lba_extent_iertr::future<>;
+  rewrite_lba_extent_ret rewrite_lba_extent(op_context_t c, CachedExtentRef e);
+
+private:
+  lba_root_t root;
+  bool root_dirty = false;
+
+  using get_internal_node_iertr = base_iertr;
+  using get_internal_node_ret = get_internal_node_iertr::future<LBAInternalNodeRef>;
+  static get_internal_node_ret get_internal_node(
+    op_context_t c,
+    depth_t depth,
+    paddr_t offset);
+
+  using get_leaf_node_iertr = base_iertr;
+  using get_leaf_node_ret = get_leaf_node_iertr::future<LBALeafNodeRef>;
+  static get_leaf_node_ret get_leaf_node(
+    op_context_t c,
+    paddr_t offset);
+
+  using lookup_root_iertr = base_iertr;
+  using lookup_root_ret = lookup_root_iertr::future<>;
+  lookup_root_ret lookup_root(
+    op_context_t c,
+    iterator &iter,
+    mapped_space_visitor_t *visitor) const {
+    if (root.get_depth() > 1) {
+      return get_internal_node(
+       c,
+       root.get_depth(),
+       root.get_location()
+      ).si_then([this, visitor, &iter](LBAInternalNodeRef root_node) {
+       iter.get_internal(root.get_depth()).node = root_node;
+       if (visitor) (*visitor)(root_node->get_paddr(), root_node->get_length());
+       return lookup_root_iertr::now();
+      });
+    } else {
+      return get_leaf_node(
+       c,
+       root.get_location()
+      ).si_then([visitor, &iter](LBALeafNodeRef root_node) {
+       iter.leaf.node = root_node;
+       if (visitor) (*visitor)(root_node->get_paddr(), root_node->get_length());
+       return lookup_root_iertr::now();
+      });
+    }
+  }
+
+  using lookup_internal_level_iertr = base_iertr;
+  using lookup_internal_level_ret = lookup_internal_level_iertr::future<>;
+  template <typename F>
+  static lookup_internal_level_ret lookup_internal_level(
+    op_context_t c,
+    depth_t depth,
+    iterator &iter,
+    F &f,
+    mapped_space_visitor_t *visitor
+  ) {
+    assert(depth > 1);
+    auto &parent_entry = iter.get_internal(depth + 1);
+    auto parent = parent_entry.node;
+    auto node_iter = parent->iter_idx(parent_entry.pos);
+    return get_internal_node(
+      c,
+      depth,
+      node_iter->get_val().maybe_relative_to(parent->get_paddr())
+    ).si_then([depth, visitor, &iter, &f](LBAInternalNodeRef node) {
+      auto &entry = iter.get_internal(depth);
+      entry.node = node;
+      auto node_iter = f(*node);
+      assert(node_iter != node->end());
+      entry.pos = node_iter->get_offset();
+      if (visitor) (*visitor)(node->get_paddr(), node->get_length());
+      return seastar::now();
+    });
+  }
+
+  using lookup_leaf_iertr = base_iertr;
+  using lookup_leaf_ret = lookup_leaf_iertr::future<>;
+  template <typename F>
+  static lookup_internal_level_ret lookup_leaf(
+    op_context_t c,
+    iterator &iter,
+    F &f,
+    mapped_space_visitor_t *visitor
+  ) {
+    auto &parent_entry = iter.get_internal(2);
+    auto parent = parent_entry.node;
+    assert(parent);
+    auto node_iter = parent->iter_idx(parent_entry.pos);
+
+    return get_leaf_node(
+      c,
+      node_iter->get_val().maybe_relative_to(parent->get_paddr())
+    ).si_then([visitor, &iter, &f](LBALeafNodeRef node) {
+      iter.leaf.node = node;
+      auto node_iter = f(*node);
+      iter.leaf.pos = node_iter->get_offset();
+      if (visitor) (*visitor)(node->get_paddr(), node->get_length());
+      return seastar::now();
+    });
+  }
+
+  using lookup_depth_range_iertr = base_iertr;
+  using lookup_depth_range_ret = lookup_depth_range_iertr::future<>;
+  template <typename LI, typename LL>
+  static lookup_depth_range_ret lookup_depth_range(
+    op_context_t c, ///< [in] context
+    iterator &iter, ///< [in,out] iterator to populate
+    depth_t from,   ///< [in] from inclusive
+    depth_t to,     ///< [in] to exclusive, (to <= from, to == from is a noop)
+    LI &li,         ///< [in] internal->iterator
+    LL &ll,         ///< [in] leaf->iterator
+    mapped_space_visitor_t *visitor ///< [in] mapped space visitor
+  ) {
+    LOG_PREFIX(LBATree::lookup_depth_range);
+    DEBUGT("{} -> {}", c.trans, from, to);
+    return seastar::do_with(
+      from,
+      [c, to, visitor, &iter, &li, &ll](auto &d) {
+       return trans_intr::repeat(
+         [c, to, visitor, &iter, &li, &ll, &d] {
+           if (d > to) {
+             return [&] {
+               if (d > 1) {
+                 return lookup_internal_level(
+                   c,
+                   d,
+                   iter,
+                   li,
+                   visitor);
+               } else {
+                 assert(d == 1);
+                 return lookup_leaf(
+                   c,
+                   iter,
+                   ll,
+                   visitor);
+               }
+             }().si_then([&d] {
+               --d;
+               return lookup_depth_range_iertr::make_ready_future<
+                 seastar::stop_iteration
+                 >(seastar::stop_iteration::no);
+             });
+           } else {
+             return lookup_depth_range_iertr::make_ready_future<
+               seastar::stop_iteration
+               >(seastar::stop_iteration::yes);
+           }
+         });
+      });
+  }
+
+  using lookup_iertr = base_iertr;
+  using lookup_ret = lookup_iertr::future<iterator>;
+  template <typename LI, typename LL>
+  lookup_ret lookup(
+    op_context_t c,
+    LI &&lookup_internal,
+    LL &&lookup_leaf,
+    mapped_space_visitor_t *visitor
+  ) const {
+    LOG_PREFIX(LBATree::lookup);
+    return seastar::do_with(
+      iterator{root.get_depth()},
+      std::forward<LI>(lookup_internal),
+      std::forward<LL>(lookup_leaf),
+      [FNAME, this, visitor, c](auto &iter, auto &li, auto &ll) {
+       return lookup_root(
+         c, iter, visitor
+       ).si_then([FNAME, this, visitor, c, &iter, &li, &ll] {
+         if (iter.get_depth() > 1) {
+           auto &root_entry = *(iter.internal.rbegin());
+           root_entry.pos = li(*(root_entry.node)).get_offset();
+         } else {
+           auto &root_entry = iter.leaf;
+           auto riter = ll(*(root_entry.node));
+           root_entry.pos = riter->get_offset();
+         }
+         DEBUGT("got root, depth {}", c.trans, root.get_depth());
+         return lookup_depth_range(
+           c,
+           iter,
+           root.get_depth() - 1,
+           0,
+           li,
+           ll,
+           visitor);
+       }).si_then([&iter] {
+         return std::move(iter);
+       });
+      });
+  }
+
+  using find_insertion_iertr = base_iertr;
+  using find_insertion_ret = find_insertion_iertr::future<>;
+  static find_insertion_ret find_insertion(
+    op_context_t c,
+    laddr_t laddr,
+    iterator &iter);
+
+  using handle_split_iertr = base_iertr;
+  using handle_split_ret = handle_split_iertr::future<>;
+  handle_split_ret handle_split(
+    op_context_t c,
+    iterator &iter);
+
+  using handle_merge_iertr = base_iertr;
+  using handle_merge_ret = handle_merge_iertr::future<>;
+  handle_merge_ret handle_merge(
+    op_context_t c,
+    iterator &iter);
+
+  using update_internal_mapping_iertr = base_iertr;
+  using update_internal_mapping_ret = update_internal_mapping_iertr::future<>;
+  update_internal_mapping_ret update_internal_mapping(
+    op_context_t c,
+    depth_t depth,
+    laddr_t laddr,
+    paddr_t old_addr,
+    paddr_t new_addr);
+
+  template <typename T>
+  using node_position_t = iterator::node_position_t<T>;
+
+  template <typename NodeType>
+  friend base_iertr::future<typename NodeType::Ref> get_node(
+    op_context_t c,
+    depth_t depth,
+    paddr_t addr);
+
+  template <typename NodeType>
+  friend handle_merge_ret merge_level(
+    op_context_t c,
+    depth_t depth,
+    node_position_t<LBAInternalNode> &parent_pos,
+    node_position_t<NodeType> &pos);
+};
+
+}
diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node.cc
new file mode 100644 (file)
index 0000000..8fadc00
--- /dev/null
@@ -0,0 +1,64 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <sys/mman.h>
+#include <string.h>
+
+#include <memory>
+#include <string.h>
+
+#include "include/buffer.h"
+#include "include/byteorder.h"
+
+#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
+
+namespace {
+  seastar::logger& logger() {
+    return crimson::get_logger(ceph_subsys_seastore);
+  }
+}
+
+namespace crimson::os::seastore::lba_manager::btree {
+
+std::ostream &LBAInternalNode::print_detail(std::ostream &out) const
+{
+  return out << ", size=" << get_size()
+            << ", meta=" << get_meta();
+}
+
+void LBAInternalNode::resolve_relative_addrs(paddr_t base)
+{
+  for (auto i: *this) {
+    if (i->get_val().is_relative()) {
+      auto updated = base.add_relative(i->get_val());
+      logger().debug(
+       "LBAInternalNode::resolve_relative_addrs {} -> {}",
+       i->get_val(),
+       updated);
+      i->set_val(updated);
+    }
+  }
+}
+
+std::ostream &LBALeafNode::print_detail(std::ostream &out) const
+{
+  return out << ", size=" << get_size()
+            << ", meta=" << get_meta();
+}
+
+void LBALeafNode::resolve_relative_addrs(paddr_t base)
+{
+  for (auto i: *this) {
+    if (i->get_val().paddr.is_relative()) {
+      auto val = i->get_val();
+      val.paddr = base.add_relative(val.paddr);
+      logger().debug(
+       "LBALeafNode::resolve_relative_addrs {} -> {}",
+       i->get_val().paddr,
+       val.paddr);
+      i->set_val(val);
+    }
+  }
+}
+
+}
index 053bf27b2f6c9ef22edc4842ea0e818495753bfe..66eca56a223dfe73220a00d477dbb2e2995a842f 100644 (file)
@@ -7,9 +7,17 @@
 #include <memory>
 #include <string.h>
 
-#include "crimson/common/log.h"
-#include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h"
+
+#include "include/buffer.h"
+
+#include "crimson/common/fixed_kv_node_layout.h"
+#include "crimson/common/errorator.h"
 #include "crimson/os/seastore/lba_manager.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/cache.h"
+#include "crimson/os/seastore/cached_extent.h"
+#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
+#include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h"
 
 namespace crimson::os::seastore::lba_manager::btree {
 
@@ -32,6 +40,7 @@ struct lba_map_val_t {
   uint32_t refcount = 0; ///< refcount
   uint32_t checksum = 0; ///< checksum of original block written at paddr (TODO)
 
+  lba_map_val_t() = default;
   lba_map_val_t(
     extent_len_t len,
     paddr_t paddr,
@@ -43,6 +52,30 @@ struct lba_map_val_t {
 class BtreeLBAPin;
 using BtreeLBAPinRef = std::unique_ptr<BtreeLBAPin>;
 
+constexpr size_t LBA_BLOCK_SIZE = 4096;
+
+/**
+ * lba_node_meta_le_t
+ *
+ * On disk layout for lba_node_meta_t
+ */
+struct lba_node_meta_le_t {
+  laddr_le_t begin = laddr_le_t(0);
+  laddr_le_t end = laddr_le_t(0);
+  depth_le_t depth = init_depth_le(0);
+
+  lba_node_meta_le_t() = default;
+  lba_node_meta_le_t(const lba_node_meta_le_t &) = default;
+  explicit lba_node_meta_le_t(const lba_node_meta_t &val)
+    : begin(ceph_le64(val.begin)),
+      end(ceph_le64(val.end)),
+      depth(init_depth_le(val.depth)) {}
+
+  operator lba_node_meta_t() const {
+    return lba_node_meta_t{ begin, end, depth };
+  }
+};
+
 /**
  * LBANode
  *
@@ -59,221 +92,448 @@ struct LBANode : CachedExtent {
 
   virtual lba_node_meta_t get_node_meta() const = 0;
 
-  /**
-   * lookup
-   *
-   * Returns the node at the specified depth responsible
-   * for laddr
-   */
-  using lookup_iertr = base_iertr;
-  using lookup_ret = lookup_iertr::future<LBANodeRef>;
-  virtual lookup_ret lookup(
-    op_context_t c,
-    laddr_t addr,
-    depth_t depth) = 0;
+  virtual ~LBANode() = default;
 
-  /**
-   * lookup_range
-   *
-   * Returns mappings within range [addr, addr+len)
-   */
-  using lookup_range_iertr = LBAManager::get_mappings_iertr;
-  using lookup_range_ret = LBAManager::get_mappings_ret;
-  virtual lookup_range_ret lookup_range(
-    op_context_t c,
-    laddr_t addr,
-    extent_len_t len) = 0;
+  void on_delta_write(paddr_t record_block_offset) final {
+    // All in-memory relative addrs are necessarily record-relative
+    assert(get_prior_instance());
+    pin.take_pin(get_prior_instance()->cast<LBANode>()->pin);
+    resolve_relative_addrs(record_block_offset);
+  }
 
-  /**
-   * lookup_pin
-   *
-   * Returns the mapping at addr
-   */
-  using lookup_pin_iertr = LBAManager::get_mapping_iertr;
-  using lookup_pin_ret = LBAManager::get_mapping_ret;
-  virtual lookup_pin_ret lookup_pin(
-    op_context_t c,
-    laddr_t addr) = 0;
+  void on_initial_write() final {
+    // All in-memory relative addrs are necessarily block-relative
+    resolve_relative_addrs(get_paddr());
+  }
 
-  /**
-   * insert
-   *
-   * Recursively inserts into subtree rooted at *this.  Caller
-   * must already have handled splitting if at_max_capacity().
-   *
-   * Precondition: !at_max_capacity()
-   */
-  using insert_iertr = base_iertr;
-  using insert_ret = insert_iertr::future<LBAPinRef>;
-  virtual insert_ret insert(
-    op_context_t c,
-    laddr_t laddr,
-    lba_map_val_t val) = 0;
+  void on_clean_read() final {
+    // From initial write of block, relative addrs are necessarily block-relative
+    resolve_relative_addrs(get_paddr());
+  }
 
-  /**
-   * find_hole
-   *
-   * Finds minimum hole of size len in [min, max)
-   *
-   * @return addr of hole, L_ADDR_NULL if unfound
-   */
-  using find_hole_iertr = base_iertr;
-  using find_hole_ret = find_hole_iertr::future<laddr_t>;
-  virtual find_hole_ret find_hole(
-    op_context_t c,
-    laddr_t min,
-    laddr_t max,
-    extent_len_t len) = 0;
+  virtual void resolve_relative_addrs(paddr_t base) = 0;
+};
+using LBANodeRef = LBANode::LBANodeRef;
 
-  /**
-   * scan_mappings
-   *
-   * Call f for all mappings in [begin, end)
-   */
-  using scan_mappings_iertr = LBAManager::scan_mappings_iertr;
-  using scan_mappings_ret = LBAManager::scan_mappings_ret;
-  using scan_mappings_func_t = LBAManager::scan_mappings_func_t;
-  virtual scan_mappings_ret scan_mappings(
-    op_context_t c,
-    laddr_t begin,
-    laddr_t end,
-    scan_mappings_func_t &f) = 0;
-
-  using scan_mapped_space_iertr = LBAManager::scan_mapped_space_iertr;
-  using scan_mapped_space_ret = LBAManager::scan_mapped_space_ret;
-  using scan_mapped_space_func_t = LBAManager::scan_mapped_space_func_t;
-  virtual scan_mapped_space_ret scan_mapped_space(
-    op_context_t c,
-    scan_mapped_space_func_t &f) = 0;
+/**
+ * LBAInternalNode
+ *
+ * Abstracts operations on and layout of internal nodes for the
+ * LBA Tree.
+ *
+ * Layout (4k):
+ *   size       : uint32_t[1]                4b
+ *   (padding)  :                            4b
+ *   meta       : lba_node_meta_le_t[3]      (1*24)b
+ *   keys       : laddr_t[255]               (254*8)b
+ *   values     : paddr_t[255]               (254*8)b
+ *                                           = 4096
+
+ * TODO: make the above capacity calculation part of FixedKVNodeLayout
+ * TODO: the above alignment probably isn't portable without further work
+ */
+constexpr size_t INTERNAL_NODE_CAPACITY = 254;
+struct LBAInternalNode
+  : LBANode,
+    common::FixedKVNodeLayout<
+      INTERNAL_NODE_CAPACITY,
+      lba_node_meta_t, lba_node_meta_le_t,
+      laddr_t, laddr_le_t,
+      paddr_t, paddr_le_t> {
+  using Ref = TCachedExtentRef<LBAInternalNode>;
+  using internal_iterator_t = const_iterator;
+  template <typename... T>
+  LBAInternalNode(T&&... t) :
+    LBANode(std::forward<T>(t)...),
+    FixedKVNodeLayout(get_bptr().c_str()) {}
+
+  static constexpr extent_types_t TYPE = extent_types_t::LADDR_INTERNAL;
+
+  lba_node_meta_t get_node_meta() const { return get_meta(); }
+
+  CachedExtentRef duplicate_for_write() final {
+    assert(delta_buffer.empty());
+    return CachedExtentRef(new LBAInternalNode(*this));
+  };
+
+  delta_buffer_t delta_buffer;
+  delta_buffer_t *maybe_get_delta_buffer() {
+    return is_mutation_pending() ? &delta_buffer : nullptr;
+  }
 
-  /**
-   * mutate_mapping
-   *
-   * Lookups up laddr, calls f on value. If f returns a value, inserts it.
-   * If it returns nullopt, removes the value.
-   * Caller must already have merged if at_min_capacity().
-   *
-   * Recursive calls use mutate_mapping_internal.
-   *
-   * Precondition: !at_min_capacity()
-   */
-  using mutate_mapping_iertr = base_iertr::extend<
-    crimson::ct_error::enoent             ///< mapping does not exist
-    >;
-  using mutate_mapping_ret = mutate_mapping_iertr::future<
-    lba_map_val_t>;
-  using mutate_func_t = std::function<
-    lba_map_val_t(const lba_map_val_t &v)
-    >;
-  virtual mutate_mapping_ret mutate_mapping(
-    op_context_t c,
-    laddr_t laddr,
-    mutate_func_t &&f) = 0;
-  virtual mutate_mapping_ret mutate_mapping_internal(
-    op_context_t c,
-    laddr_t laddr,
-    bool is_root,
-    mutate_func_t &&f) = 0;
+  void update(
+    const_iterator iter,
+    paddr_t addr) {
+    return journal_update(
+      iter,
+      maybe_generate_relative(addr),
+      maybe_get_delta_buffer());
+  }
 
-  /**
-   * mutate_internal_address
-   *
-   * Looks up internal node mapping at laddr, depth and
-   * updates the mapping to paddr.  Returns previous paddr
-   * (for debugging purposes).
-   */
-  using mutate_internal_address_iertr = base_iertr::extend<
-    crimson::ct_error::enoent             ///< mapping does not exist
-    >;
-  using mutate_internal_address_ret = mutate_internal_address_iertr::future<
-    paddr_t>;
-  virtual mutate_internal_address_ret mutate_internal_address(
-    op_context_t c,
-    depth_t depth,
-    laddr_t laddr,
-    paddr_t paddr) = 0;
+  void insert(
+    const_iterator iter,
+    laddr_t pivot,
+    paddr_t addr) {
+    return journal_insert(
+      iter,
+      pivot,
+      maybe_generate_relative(addr),
+      maybe_get_delta_buffer());
+  }
 
-  /**
-   * make_split_children
-   *
-   * Generates appropriately typed left and right nodes formed from the
-   * contents of *this.
-   *
-   * Returns <left, right, pivot> where pivot is the first value of right.
-   */
-  virtual std::tuple<
-    LBANodeRef,
-    LBANodeRef,
-    laddr_t>
-  make_split_children(
-    op_context_t c) = 0;
+  void remove(const_iterator iter) {
+    return journal_remove(
+      iter,
+      maybe_get_delta_buffer());
+  }
 
-  /**
-   * make_full_merge
-   *
-   * Returns a single node formed from merging *this and right.
-   * Precondition: at_min_capacity() && right.at_min_capacity()
-   */
-  virtual LBANodeRef make_full_merge(
+  void replace(
+    const_iterator iter,
+    laddr_t pivot,
+    paddr_t addr) {
+    return journal_replace(
+      iter,
+      pivot,
+      maybe_generate_relative(addr),
+      maybe_get_delta_buffer());
+  }
+
+  std::tuple<Ref, Ref, laddr_t>
+  make_split_children(op_context_t c) {
+    auto left = c.cache.alloc_new_extent<LBAInternalNode>(
+      c.trans, LBA_BLOCK_SIZE);
+    auto right = c.cache.alloc_new_extent<LBAInternalNode>(
+      c.trans, LBA_BLOCK_SIZE);
+    auto pivot = split_into(*left, *right);
+    left->pin.set_range(left->get_meta());
+    right->pin.set_range(right->get_meta());
+    return std::make_tuple(
+      left,
+      right,
+      pivot);
+  }
+
+  Ref make_full_merge(
     op_context_t c,
-    LBANodeRef &right) = 0;
+    Ref &right) {
+    auto replacement = c.cache.alloc_new_extent<LBAInternalNode>(
+      c.trans, LBA_BLOCK_SIZE);
+    replacement->merge_from(*this, *right->cast<LBAInternalNode>());
+    replacement->pin.set_range(replacement->get_meta());
+    return replacement;
+  }
+
+  std::tuple<Ref, Ref, laddr_t>
+  make_balanced(
+    op_context_t c,
+    Ref &_right,
+    bool prefer_left) {
+    ceph_assert(_right->get_type() == get_type());
+    auto &right = *_right->cast<LBAInternalNode>();
+    auto replacement_left = c.cache.alloc_new_extent<LBAInternalNode>(
+      c.trans, LBA_BLOCK_SIZE);
+    auto replacement_right = c.cache.alloc_new_extent<LBAInternalNode>(
+      c.trans, LBA_BLOCK_SIZE);
+
+    auto pivot = balance_into_new_nodes(
+      *this,
+      right,
+      prefer_left,
+      *replacement_left,
+      *replacement_right);
+
+    replacement_left->pin.set_range(replacement_left->get_meta());
+    replacement_right->pin.set_range(replacement_right->get_meta());
+    return std::make_tuple(
+      replacement_left,
+      replacement_right,
+      pivot);
+  }
 
   /**
-   * make_balanced
+   * Internal relative addresses on read or in memory prior to commit
+   * are either record or block relative depending on whether this
+   * physical node is is_initial_pending() or just is_pending().
    *
-   * Returns nodes formed by balancing the contents of *this and right.
-   *
-   * Returns <left, right, pivot> where pivot is the first value of right.
+   * User passes appropriate base depending on lifecycle and
+   * resolve_relative_addrs fixes up relative internal references
+   * based on base.
    */
-  virtual std::tuple<
-    LBANodeRef,
-    LBANodeRef,
-    laddr_t>
-  make_balanced(
-    op_context_t c,
-    LBANodeRef &right,
-    bool prefer_left) = 0;
+  void resolve_relative_addrs(paddr_t base);
+  void node_resolve_vals(iterator from, iterator to) const final {
+    if (is_initial_pending()) {
+      for (auto i = from; i != to; ++i) {
+       if (i->get_val().is_relative()) {
+         assert(i->get_val().is_block_relative());
+         i->set_val(get_paddr().add_relative(i->get_val()));
+       }
+      }
+    }
+  }
+  void node_unresolve_vals(iterator from, iterator to) const final {
+    if (is_initial_pending()) {
+      for (auto i = from; i != to; ++i) {
+       if (i->get_val().is_relative()) {
+         assert(i->get_val().is_record_relative());
+         i->set_val(i->get_val() - get_paddr());
+       }
+      }
+    }
+  }
 
-  virtual bool at_max_capacity() const = 0;
-  virtual bool at_min_capacity() const = 0;
+  extent_types_t get_type() const final {
+    return TYPE;
+  }
 
-  virtual ~LBANode() = default;
+  std::ostream &print_detail(std::ostream &out) const final;
 
-  void on_delta_write(paddr_t record_block_offset) final {
-    // All in-memory relative addrs are necessarily record-relative
-    assert(get_prior_instance());
-    pin.take_pin(get_prior_instance()->cast<LBANode>()->pin);
-    resolve_relative_addrs(record_block_offset);
+  ceph::bufferlist get_delta() final {
+    assert(!delta_buffer.empty());
+    ceph::buffer::ptr bptr(delta_buffer.get_bytes());
+    delta_buffer.copy_out(bptr.c_str(), bptr.length());
+    ceph::bufferlist bl;
+    bl.push_back(bptr);
+    return bl;
   }
 
-  void on_initial_write() final {
-    // All in-memory relative addrs are necessarily block-relative
-    resolve_relative_addrs(get_paddr());
+  void apply_delta_and_adjust_crc(
+    paddr_t base, const ceph::bufferlist &_bl) final {
+    assert(_bl.length());
+    ceph::bufferlist bl = _bl;
+    bl.rebuild();
+    delta_buffer_t buffer;
+    buffer.copy_in(bl.front().c_str(), bl.front().length());
+    buffer.replay(*this);
+    set_last_committed_crc(get_crc32c());
+    resolve_relative_addrs(base);
   }
 
-  void on_clean_read() final {
-    // From initial write of block, relative addrs are necessarily block-relative
-    resolve_relative_addrs(get_paddr());
+  bool at_max_capacity() const {
+    return get_size() == get_capacity();
   }
 
-  virtual void resolve_relative_addrs(paddr_t base) = 0;
+  bool at_min_capacity() const {
+    return get_size() == (get_capacity() / 2);
+  }
 };
-using LBANodeRef = LBANode::LBANodeRef;
+using LBAInternalNodeRef = LBAInternalNode::Ref;
+
+/**
+ * LBALeafNode
+ *
+ * Abstracts operations on and layout of leaf nodes for the
+ * LBA Tree.
+ *
+ * Layout (4k):
+ *   size       : uint32_t[1]                4b
+ *   (padding)  :                            4b
+ *   meta       : lba_node_meta_le_t[3]      (1*24)b
+ *   keys       : laddr_t[170]               (145*8)b
+ *   values     : lba_map_val_t[170]         (145*20)b
+ *                                           = 4092
+ *
+ * TODO: update FixedKVNodeLayout to handle the above calculation
+ * TODO: the above alignment probably isn't portable without further work
+ */
+constexpr size_t LEAF_NODE_CAPACITY = 145;
 
 /**
- * get_lba_btree_extent
+ * lba_map_val_le_t
  *
- * Fetches node at depth of the appropriate type.
+ * On disk layout for lba_map_val_t.
  */
-using get_lba_node_iertr = base_iertr;
-using get_lba_node_ret = get_lba_node_iertr::future<LBANodeRef>;
-get_lba_node_ret get_lba_btree_extent(
-  op_context_t c, ///< [in] context structure
-  CachedExtentRef parent, ///< [in] paddr ref source
-  depth_t depth,  ///< [in] depth of node to fetch
-  paddr_t offset, ///< [in] physical addr of node
-  paddr_t base    ///< [in] depending on user, block addr or record addr
-                  ///       in case offset is relative
-);
+struct lba_map_val_le_t {
+  extent_len_le_t len = init_extent_len_le(0);
+  paddr_le_t paddr;
+  ceph_le32 refcount{0};
+  ceph_le32 checksum{0};
+
+  lba_map_val_le_t() = default;
+  lba_map_val_le_t(const lba_map_val_le_t &) = default;
+  explicit lba_map_val_le_t(const lba_map_val_t &val)
+    : len(init_extent_len_le(val.len)),
+      paddr(paddr_le_t(val.paddr)),
+      refcount(val.refcount),
+      checksum(val.checksum) {}
+
+  operator lba_map_val_t() const {
+    return lba_map_val_t{ len, paddr, refcount, checksum };
+  }
+};
+
+struct LBALeafNode
+  : LBANode,
+    common::FixedKVNodeLayout<
+      LEAF_NODE_CAPACITY,
+      lba_node_meta_t, lba_node_meta_le_t,
+      laddr_t, laddr_le_t,
+      lba_map_val_t, lba_map_val_le_t> {
+  using Ref = TCachedExtentRef<LBALeafNode>;
+  using internal_iterator_t = const_iterator;
+  template <typename... T>
+  LBALeafNode(T&&... t) :
+    LBANode(std::forward<T>(t)...),
+    FixedKVNodeLayout(get_bptr().c_str()) {}
+
+  static constexpr extent_types_t TYPE = extent_types_t::LADDR_LEAF;
+
+  lba_node_meta_t get_node_meta() const { return get_meta(); }
+
+  CachedExtentRef duplicate_for_write() final {
+    assert(delta_buffer.empty());
+    return CachedExtentRef(new LBALeafNode(*this));
+  };
+
+  delta_buffer_t delta_buffer;
+  delta_buffer_t *maybe_get_delta_buffer() {
+    return is_mutation_pending() ? &delta_buffer : nullptr;
+  }
+
+  void update(
+    const_iterator iter,
+    lba_map_val_t val) {
+    val.paddr = maybe_generate_relative(val.paddr);
+    return journal_update(
+      iter,
+      val,
+      maybe_get_delta_buffer());
+  }
+
+  auto insert(
+    const_iterator iter,
+    laddr_t addr,
+    lba_map_val_t val) {
+    val.paddr = maybe_generate_relative(val.paddr);
+    journal_insert(
+      iter,
+      addr,
+      val,
+      maybe_get_delta_buffer());
+    return iter;
+  }
+
+  void remove(const_iterator iter) {
+    return journal_remove(
+      iter,
+      maybe_get_delta_buffer());
+  }
+
+
+  std::tuple<Ref, Ref, laddr_t>
+  make_split_children(op_context_t c) {
+    auto left = c.cache.alloc_new_extent<LBALeafNode>(
+      c.trans, LBA_BLOCK_SIZE);
+    auto right = c.cache.alloc_new_extent<LBALeafNode>(
+      c.trans, LBA_BLOCK_SIZE);
+    auto pivot = split_into(*left, *right);
+    left->pin.set_range(left->get_meta());
+    right->pin.set_range(right->get_meta());
+    return std::make_tuple(
+      left,
+      right,
+      pivot);
+  }
+
+  Ref make_full_merge(
+    op_context_t c,
+    Ref &right) {
+    auto replacement = c.cache.alloc_new_extent<LBALeafNode>(
+      c.trans, LBA_BLOCK_SIZE);
+    replacement->merge_from(*this, *right->cast<LBALeafNode>());
+    replacement->pin.set_range(replacement->get_meta());
+    return replacement;
+  }
+
+  std::tuple<Ref, Ref, laddr_t>
+  make_balanced(
+    op_context_t c,
+    Ref &_right,
+    bool prefer_left) {
+    ceph_assert(_right->get_type() == get_type());
+    auto &right = *_right->cast<LBALeafNode>();
+    auto replacement_left = c.cache.alloc_new_extent<LBALeafNode>(
+      c.trans, LBA_BLOCK_SIZE);
+    auto replacement_right = c.cache.alloc_new_extent<LBALeafNode>(
+      c.trans, LBA_BLOCK_SIZE);
+
+    auto pivot = balance_into_new_nodes(
+      *this,
+      right,
+      prefer_left,
+      *replacement_left,
+      *replacement_right);
+
+    replacement_left->pin.set_range(replacement_left->get_meta());
+    replacement_right->pin.set_range(replacement_right->get_meta());
+    return std::make_tuple(
+      replacement_left,
+      replacement_right,
+      pivot);
+  }
+
+  // See LBAInternalNode, same concept
+  void resolve_relative_addrs(paddr_t base);
+  void node_resolve_vals(iterator from, iterator to) const final {
+    if (is_initial_pending()) {
+      for (auto i = from; i != to; ++i) {
+       auto val = i->get_val();
+       if (val.paddr.is_relative()) {
+         assert(val.paddr.is_block_relative());
+         val.paddr = get_paddr().add_relative(val.paddr);
+         i->set_val(val);
+       }
+      }
+    }
+  }
+  void node_unresolve_vals(iterator from, iterator to) const final {
+    if (is_initial_pending()) {
+      for (auto i = from; i != to; ++i) {
+       auto val = i->get_val();
+       if (val.paddr.is_relative()) {
+         auto val = i->get_val();
+         assert(val.paddr.is_record_relative());
+         val.paddr = val.paddr - get_paddr();
+         i->set_val(val);
+       }
+      }
+    }
+  }
+
+  ceph::bufferlist get_delta() final {
+    assert(!delta_buffer.empty());
+    ceph::buffer::ptr bptr(delta_buffer.get_bytes());
+    delta_buffer.copy_out(bptr.c_str(), bptr.length());
+    ceph::bufferlist bl;
+    bl.push_back(bptr);
+    return bl;
+  }
+
+  void apply_delta_and_adjust_crc(
+    paddr_t base, const ceph::bufferlist &_bl) final {
+    assert(_bl.length());
+    ceph::bufferlist bl = _bl;
+    bl.rebuild();
+    delta_buffer_t buffer;
+    buffer.copy_in(bl.front().c_str(), bl.front().length());
+    buffer.replay(*this);
+    set_last_committed_crc(get_crc32c());
+    resolve_relative_addrs(base);
+  }
+
+  extent_types_t get_type() const final {
+    return TYPE;
+  }
+
+  std::ostream &print_detail(std::ostream &out) const final;
+
+  bool at_max_capacity() const {
+    return get_size() == get_capacity();
+  }
+
+  bool at_min_capacity() const {
+    return get_size() == (get_capacity() / 2);
+  }
+};
+using LBALeafNodeRef = TCachedExtentRef<LBALeafNode>;
 
 }
diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.cc
deleted file mode 100644 (file)
index d11f27a..0000000
+++ /dev/null
@@ -1,792 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include <sys/mman.h>
-#include <string.h>
-
-#include <memory>
-#include <string.h>
-
-#include "include/buffer.h"
-#include "include/byteorder.h"
-
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h"
-
-namespace {
-  seastar::logger& logger() {
-    return crimson::get_logger(ceph_subsys_seastore);
-  }
-}
-
-namespace crimson::os::seastore::lba_manager::btree {
-
-std::ostream &LBAInternalNode::print_detail(std::ostream &out) const
-{
-  return out << ", size=" << get_size()
-            << ", meta=" << get_meta();
-}
-
-LBAInternalNode::lookup_ret LBAInternalNode::lookup(
-  op_context_t c,
-  laddr_t addr,
-  depth_t depth)
-{
-  auto meta = get_meta();
-  if (depth == get_meta().depth) {
-    return lookup_ret(
-      interruptible::ready_future_marker{},
-      this);
-  }
-  assert(meta.begin <= addr);
-  assert(meta.end > addr);
-
-  [[maybe_unused]] auto [iter, biter] = bound(addr, addr + 1);
-  assert(iter != biter);
-  assert(iter + 1 == biter);
-  return get_lba_btree_extent(
-    c,
-    this,
-    meta.depth - 1,
-    iter->get_val(),
-    get_paddr()).si_then([c, addr, depth](auto child) {
-      return child->lookup(c, addr, depth);
-    }).finally([ref=LBANodeRef(this)] {});
-}
-
-LBAInternalNode::lookup_range_ret LBAInternalNode::lookup_range(
-  op_context_t c,
-  laddr_t addr,
-  extent_len_t len)
-{
-  auto [begin, end] = bound(addr, addr + len);
-  auto result_up = std::make_unique<lba_pin_list_t>();
-  auto &result = *result_up;
-  return trans_intr::do_for_each(
-    std::move(begin),
-    std::move(end),
-    [this, c, &result, addr, len](const auto &val) {
-      return get_lba_btree_extent(
-       c,
-       this,
-       get_meta().depth - 1,
-       val.get_val(),
-       get_paddr()).si_then(
-         [c, &result, addr, len](auto extent) {
-           return extent->lookup_range(
-             c,
-             addr,
-             len).si_then(
-               [&result](auto pin_list) {
-                 result.splice(result.end(), pin_list,
-                               pin_list.begin(), pin_list.end());
-               });
-         });
-    }).si_then([result=std::move(result_up), ref=LBANodeRef(this)] {
-      return lookup_range_iertr::make_ready_future<lba_pin_list_t>(
-       std::move(*result));
-    });
-}
-
-LBAInternalNode::lookup_pin_ret LBAInternalNode::lookup_pin(
-  op_context_t c,
-  laddr_t addr)
-{
-  auto iter = get_containing_child(addr);
-  return get_lba_btree_extent(
-    c,
-    this,
-    get_meta().depth - 1,
-    iter->get_val(),
-    get_paddr()
-  ).si_then([c, addr](LBANodeRef extent) {
-    return extent->lookup_pin(c, addr);
-  }).finally([ref=LBANodeRef(this)] {});
-}
-
-LBAInternalNode::insert_ret LBAInternalNode::insert(
-  op_context_t c,
-  laddr_t laddr,
-  lba_map_val_t val)
-{
-  auto insertion_pt = get_containing_child(laddr);
-  return get_lba_btree_extent(
-    c,
-    this,
-    get_meta().depth - 1,
-    insertion_pt->get_val(),
-    get_paddr()).si_then(
-      [this, insertion_pt, c, laddr, val=std::move(val)](
-       auto extent) mutable {
-       return extent->at_max_capacity() ?
-         split_entry(c, laddr, insertion_pt, extent) :
-         insert_iertr::make_ready_future<LBANodeRef>(std::move(extent));
-      }).si_then([c, laddr, val=std::move(val)](
-                    LBANodeRef extent) mutable {
-       return extent->insert(c, laddr, val);
-      });
-}
-
-LBAInternalNode::mutate_mapping_ret LBAInternalNode::mutate_mapping(
-  op_context_t c,
-  laddr_t laddr,
-  mutate_func_t &&f)
-{
-  return mutate_mapping_internal(c, laddr, true, std::move(f));
-}
-
-LBAInternalNode::mutate_mapping_ret LBAInternalNode::mutate_mapping_internal(
-  op_context_t c,
-  laddr_t laddr,
-  bool is_root,
-  mutate_func_t &&f)
-{
-  auto mutation_pt = get_containing_child(laddr);
-  if (mutation_pt == end()) {
-    assert(0 == "impossible");
-    return crimson::ct_error::enoent::make();
-  }
-  return get_lba_btree_extent(
-    c,
-    this,
-    get_meta().depth - 1,
-    mutation_pt->get_val(),
-    get_paddr()
-  ).si_then([=](LBANodeRef extent) {
-    if (extent->at_min_capacity() && get_size() > 1) {
-      return merge_entry(
-       c,
-       laddr,
-       mutation_pt,
-       extent,
-       is_root);
-    } else {
-      return merge_iertr::make_ready_future<LBANodeRef>(
-       std::move(extent));
-    }
-  }).si_then([c, laddr, f=std::move(f)](LBANodeRef extent) mutable {
-    return extent->mutate_mapping_internal(c, laddr, false, std::move(f));
-  });
-}
-
-LBAInternalNode::mutate_internal_address_ret LBAInternalNode::mutate_internal_address(
-  op_context_t c,
-  depth_t depth,
-  laddr_t laddr,
-  paddr_t paddr)
-{
-  if (get_meta().depth == (depth + 1)) {
-    if (!is_pending()) {
-      return c.cache.duplicate_for_write(c.trans, this)->cast<LBAInternalNode>(
-      )->mutate_internal_address(
-       c,
-       depth,
-       laddr,
-       paddr);
-    }
-    auto iter = get_containing_child(laddr);
-    if (iter->get_key() != laddr) {
-      logger().debug(
-       "LBAInternalNode::mutate_internal_address laddr {} "
-       "not found in extent {}",
-       laddr,
-       *this);
-      return crimson::ct_error::enoent::make();
-    }
-
-    auto old_paddr = iter->get_val();
-
-    journal_update(
-      iter,
-      maybe_generate_relative(paddr),
-      maybe_get_delta_buffer());
-
-    return mutate_internal_address_ret(
-      interruptible::ready_future_marker{},
-      old_paddr
-    );
-  } else {
-    auto iter = get_containing_child(laddr);
-    return get_lba_btree_extent(
-      c,
-      this,
-      get_meta().depth - 1,
-      iter->get_val(),
-      get_paddr()
-    ).si_then([=](auto node) {
-      return node->mutate_internal_address(
-       c,
-       depth,
-       laddr,
-       paddr);
-    });
-  }
-}
-
-LBAInternalNode::find_hole_ret LBAInternalNode::find_hole(
-  op_context_t c,
-  laddr_t min_addr,
-  laddr_t max_addr,
-  extent_len_t len)
-{
-  logger().debug(
-    "LBAInternalNode::find_hole min={}, max={}, len={}, *this={}",
-    min_addr, max_addr, len, *this);
-  auto [begin, end] = bound(min_addr, max_addr);
-  return seastar::do_with(
-    begin,
-    L_ADDR_NULL,
-    [this, c, min_addr, len, end=end](auto &i, auto &ret) {
-      return trans_intr::repeat([=, &i, &ret]()
-        -> find_hole_iertr::future<seastar::stop_iteration> {
-       if (i == end) {
-         return seastar::make_ready_future<seastar::stop_iteration>(
-           seastar::stop_iteration::yes);
-       }
-       return get_lba_btree_extent(
-         c,
-         this,
-         get_meta().depth - 1,
-         i->get_val(),
-         get_paddr()
-       ).si_then([=, &i](auto extent) mutable {
-         auto lb = std::max(min_addr, i->get_key());
-         auto ub = i->get_next_key_or_max();
-         logger().debug("LBAInternalNode::find_hole extent {} lb {} ub {}",
-                        *extent, lb, ub);
-         return extent->find_hole(c, lb, ub, len);
-       }).si_then([&i, &ret](auto addr) {
-         if (addr == L_ADDR_NULL) {
-           ++i;
-           return seastar::make_ready_future<seastar::stop_iteration>(
-             seastar::stop_iteration::no);
-         } else {
-           ret = addr;
-           return seastar::make_ready_future<seastar::stop_iteration>(
-             seastar::stop_iteration::yes);
-         }
-       });
-       }).si_then([&ret, ref=LBANodeRef(this)]() {
-         return ret;
-       });
-    });
-}
-
-LBAInternalNode::scan_mappings_ret LBAInternalNode::scan_mappings(
-  op_context_t c,
-  laddr_t begin,
-  laddr_t end,
-  scan_mappings_func_t &f)
-{
-  auto [biter, eiter] = bound(begin, end);
-  return trans_intr::do_for_each(
-    std::move(biter),
-    std::move(eiter),
-    [=, &f](auto &viter) {
-      return get_lba_btree_extent(
-       c,
-       this,
-       get_meta().depth - 1,
-       viter->get_val(),
-       get_paddr()).si_then([=, &f](auto child) {
-         return child->scan_mappings(c, begin, end, f);
-       });
-    }).si_then([ref=LBANodeRef(this)] {});
-}
-
-LBAInternalNode::scan_mapped_space_ret LBAInternalNode::scan_mapped_space(
-  op_context_t c,
-  scan_mapped_space_func_t &f)
-{
-  f(get_paddr(), get_length());
-  return trans_intr::do_for_each(
-    begin(), end(),
-    [=, &f](auto &viter) {
-      return get_lba_btree_extent(
-       c,
-       this,
-       get_meta().depth - 1,
-       viter->get_val(),
-       get_paddr()).si_then([=, &f](auto child) {
-         return child->scan_mapped_space(c, f);
-       });
-    }).si_then([ref=LBANodeRef(this)]{});
-}
-
-
-void LBAInternalNode::resolve_relative_addrs(paddr_t base)
-{
-  for (auto i: *this) {
-    if (i->get_val().is_relative()) {
-      auto updated = base.add_relative(i->get_val());
-      logger().debug(
-       "LBAInternalNode::resolve_relative_addrs {} -> {}",
-       i->get_val(),
-       updated);
-      i->set_val(updated);
-    }
-  }
-}
-
-
-LBAInternalNode::split_ret
-LBAInternalNode::split_entry(
-  op_context_t c,
-  laddr_t addr,
-  internal_iterator_t iter, LBANodeRef entry)
-{
-  if (!is_pending()) {
-    auto mut = c.cache.duplicate_for_write(
-      c.trans, this)->cast<LBAInternalNode>();
-    auto mut_iter = mut->iter_idx(iter->get_offset());
-    return mut->split_entry(c, addr, mut_iter, entry);
-  }
-
-  ceph_assert(!at_max_capacity());
-  auto [left, right, pivot] = entry->make_split_children(c);
-
-  journal_update(
-    iter,
-    maybe_generate_relative(left->get_paddr()),
-    maybe_get_delta_buffer());
-  journal_insert(
-    iter + 1,
-    pivot,
-    maybe_generate_relative(right->get_paddr()),
-    maybe_get_delta_buffer());
-
-  c.cache.retire_extent(c.trans, entry);
-
-  logger().debug(
-    "LBAInternalNode::split_entry *this {} entry {} into left {} right {}",
-    *this,
-    *entry,
-    *left,
-    *right);
-
-  return split_iertr::make_ready_future<LBANodeRef>(
-    pivot > addr ? left : right
-  );
-}
-
-LBAInternalNode::merge_ret
-LBAInternalNode::merge_entry(
-  op_context_t c,
-  laddr_t addr,
-  internal_iterator_t iter,
-  LBANodeRef entry,
-  bool is_root)
-{
-  if (!is_pending()) {
-    auto mut = c.cache.duplicate_for_write(c.trans, this)->cast<LBAInternalNode>();
-    auto mut_iter = mut->iter_idx(iter->get_offset());
-    return mut->merge_entry(c, addr, mut_iter, entry, is_root);
-  }
-
-  logger().debug(
-    "LBAInternalNode: merge_entry: {}, {}",
-    *this,
-    *entry);
-  auto donor_is_left = (iter + 1) == end();
-  auto donor_iter = donor_is_left ? iter - 1 : iter + 1;
-  return get_lba_btree_extent(
-    c,
-    this,
-    get_meta().depth - 1,
-    donor_iter->get_val(),
-    get_paddr()
-  ).si_then([=](auto donor) mutable {
-    auto [l, r] = donor_is_left ?
-      std::make_pair(donor, entry) : std::make_pair(entry, donor);
-    auto [liter, riter] = donor_is_left ?
-      std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter);
-    if (donor->at_min_capacity()) {
-      auto replacement = l->make_full_merge(
-       c,
-       r);
-
-      journal_update(
-       liter,
-       maybe_generate_relative(replacement->get_paddr()),
-       maybe_get_delta_buffer());
-      journal_remove(riter, maybe_get_delta_buffer());
-
-      c.cache.retire_extent(c.trans, l);
-      c.cache.retire_extent(c.trans, r);
-
-      if (is_root && get_size() == 1) {
-       return c.cache.get_root(c.trans).si_then([=](RootBlockRef croot) {
-         {
-           auto mut_croot = c.cache.duplicate_for_write(c.trans, croot);
-           croot = mut_croot->cast<RootBlock>();
-         }
-         auto new_root_addr = begin()->get_val().maybe_relative_to(get_paddr());
-         auto new_depth = get_meta().depth - 1;
-         croot->get_root().lba_root = lba_root_t{
-           new_root_addr,
-           new_depth};
-         c.trans.get_lba_tree_stats().depth = new_depth;
-         logger().debug(
-           "LBAInternalNode::merge_entry: collapsing root {} to addr {}",
-           *this,
-           new_root_addr);
-         c.cache.retire_extent(c.trans, this);
-         return merge_iertr::make_ready_future<LBANodeRef>(replacement);
-       });
-      } else {
-       return merge_iertr::make_ready_future<LBANodeRef>(replacement);
-      }
-    } else {
-      logger().debug(
-       "LBAInternalEntry::merge_entry balanced l {} r {}",
-       *l,
-       *r);
-      auto [replacement_l, replacement_r, pivot] =
-       l->make_balanced(
-         c,
-         r,
-         !donor_is_left);
-
-      journal_update(
-       liter,
-       maybe_generate_relative(replacement_l->get_paddr()),
-       maybe_get_delta_buffer());
-      journal_replace(
-       riter,
-       pivot,
-       maybe_generate_relative(replacement_r->get_paddr()),
-       maybe_get_delta_buffer());
-
-      c.cache.retire_extent(c.trans, l);
-      c.cache.retire_extent(c.trans, r);
-      return merge_iertr::make_ready_future<LBANodeRef>(
-       addr >= pivot ? replacement_r : replacement_l
-      );
-    }
-  });
-}
-
-
-LBAInternalNode::internal_iterator_t
-LBAInternalNode::get_containing_child(laddr_t laddr)
-{
-  // TODO: binary search
-  for (auto i = begin(); i != end(); ++i) {
-    if (i.contains(laddr))
-      return i;
-  }
-  ceph_assert(0 == "invalid");
-  return end();
-}
-
-std::ostream &LBALeafNode::print_detail(std::ostream &out) const
-{
-  return out << ", size=" << get_size()
-            << ", meta=" << get_meta();
-}
-
-LBALeafNode::lookup_range_ret LBALeafNode::lookup_range(
-  op_context_t c,
-  laddr_t addr,
-  extent_len_t len)
-{
-  logger().debug(
-    "LBALeafNode::lookup_range {}~{}",
-    addr,
-    len);
-  auto ret = lba_pin_list_t();
-  auto [i, end] = get_leaf_entries(addr, len);
-  for (; i != end; ++i) {
-    auto val = i->get_val();
-    auto begin = i->get_key();
-    ret.emplace_back(
-      std::make_unique<BtreeLBAPin>(
-       this,
-       val.paddr.maybe_relative_to(get_paddr()),
-       lba_node_meta_t{ begin, begin + val.len, 0}));
-  }
-  return lookup_range_iertr::make_ready_future<lba_pin_list_t>(
-    std::move(ret));
-}
-
-LBALeafNode::lookup_pin_ret LBALeafNode::lookup_pin(
-  op_context_t c,
-  laddr_t addr)
-{
-  logger().debug("LBALeafNode::lookup_pin {}", addr);
-  auto iter = find(addr);
-  if (iter == end()) {
-    return crimson::ct_error::enoent::make();
-  }
-  auto val = iter->get_val();
-  auto begin = iter->get_key();
-  return lookup_pin_ret(
-    interruptible::ready_future_marker{},
-    std::make_unique<BtreeLBAPin>(
-      this,
-      val.paddr.maybe_relative_to(get_paddr()),
-      lba_node_meta_t{ begin, begin + val.len, 0}));
-}
-
-LBALeafNode::insert_ret LBALeafNode::insert(
-  op_context_t c,
-  laddr_t laddr,
-  lba_map_val_t val)
-{
-  ceph_assert(!at_max_capacity());
-
-  if (!is_pending()) {
-    return c.cache.duplicate_for_write(c.trans, this
-    )->cast<LBALeafNode>()->insert(c, laddr, val);
-  }
-
-  val.paddr = maybe_generate_relative(val.paddr);
-  logger().debug(
-    "LBALeafNode::insert: inserting {}~{} -> {}",
-    laddr,
-    val.len,
-    val.paddr);
-
-  auto insert_pt = lower_bound(laddr);
-  journal_insert(insert_pt, laddr, val, maybe_get_delta_buffer());
-
-  logger().debug(
-    "LBALeafNode::insert: inserted {}~{} -> {}",
-    insert_pt.get_key(),
-    insert_pt.get_val().len,
-    insert_pt.get_val().paddr);
-  auto begin = insert_pt.get_key();
-  return insert_ret(
-    interruptible::ready_future_marker{},
-    std::make_unique<BtreeLBAPin>(
-      this,
-      val.paddr.maybe_relative_to(get_paddr()),
-      lba_node_meta_t{ begin, begin + val.len, 0}));
-}
-
-LBALeafNode::mutate_mapping_ret LBALeafNode::mutate_mapping(
-  op_context_t c,
-  laddr_t laddr,
-  mutate_func_t &&f)
-{
-  return mutate_mapping_internal(c, laddr, true, std::move(f));
-}
-
-LBALeafNode::mutate_mapping_ret LBALeafNode::mutate_mapping_internal(
-  op_context_t c,
-  laddr_t laddr,
-  bool is_root,
-  mutate_func_t &&f)
-{
-  auto mutation_pt = find(laddr);
-  if (mutation_pt == end()) {
-    return crimson::ct_error::enoent::make();
-  }
-
-  if (!is_pending()) {
-    return c.cache.duplicate_for_write(c.trans, this)->cast<LBALeafNode>(
-    )->mutate_mapping_internal(
-      c,
-      laddr,
-      is_root,
-      std::move(f));
-  }
-
-  auto cur = mutation_pt.get_val();
-  auto mutated = f(cur);
-
-  mutated.paddr = maybe_generate_relative(mutated.paddr);
-
-  logger().debug(
-    "{}: mutate addr {}: {} -> {}",
-    __func__,
-    laddr,
-    cur.paddr,
-    mutated.paddr);
-
-  if (mutated.refcount > 0) {
-    journal_update(mutation_pt, mutated, maybe_get_delta_buffer());
-    return mutate_mapping_ret(
-      interruptible::ready_future_marker{},
-      mutated);
-  } else {
-    ++(c.trans.get_lba_tree_stats().num_erases);
-    journal_remove(mutation_pt, maybe_get_delta_buffer());
-    return mutate_mapping_ret(
-      interruptible::ready_future_marker{},
-      mutated);
-  }
-}
-
-LBALeafNode::mutate_internal_address_ret LBALeafNode::mutate_internal_address(
-  op_context_t c,
-  depth_t depth,
-  laddr_t laddr,
-  paddr_t paddr)
-{
-  ceph_assert(0 == "Impossible");
-  return mutate_internal_address_ret(
-    interruptible::ready_future_marker{},
-    paddr);
-}
-
-LBALeafNode::find_hole_ret LBALeafNode::find_hole(
-  op_context_t c,
-  laddr_t min,
-  laddr_t max,
-  extent_len_t len)
-{
-  logger().debug(
-    "LBALeafNode::find_hole min={} max={}, len={}, *this={}",
-    min, max, len, *this);
-  auto [liter, uiter] = bound(min, max);
-  for (auto i = liter; i != uiter; ++i) {
-    auto ub = i->get_key();
-    if (min + len <= ub) {
-      return find_hole_ret(
-       interruptible::ready_future_marker{},
-       min);
-    } else {
-      min = i->get_key() + i->get_val().len;
-    }
-  }
-  if (min + len <= max) {
-    return find_hole_ret(
-      interruptible::ready_future_marker{},
-      min);
-  } else {
-    return find_hole_ret(
-      interruptible::ready_future_marker{},
-      L_ADDR_MAX);
-  }
-}
-
-LBALeafNode::scan_mappings_ret LBALeafNode::scan_mappings(
-  op_context_t c,
-  laddr_t begin,
-  laddr_t end,
-  scan_mappings_func_t &f)
-{
-  auto [biter, eiter] = bound(begin, end);
-  for (auto i = biter; i != eiter; ++i) {
-    auto val = i->get_val();
-    f(i->get_key(), val.paddr, val.len);
-  }
-  return scan_mappings_iertr::now();
-}
-
-LBALeafNode::scan_mapped_space_ret LBALeafNode::scan_mapped_space(
-  op_context_t c,
-  scan_mapped_space_func_t &f)
-{
-  f(get_paddr(), get_length());
-  for (auto i = begin(); i != end(); ++i) {
-    auto val = i->get_val();
-    f(val.paddr, val.len);
-  }
-  return scan_mappings_iertr::now();
-}
-
-
-void LBALeafNode::resolve_relative_addrs(paddr_t base)
-{
-  for (auto i: *this) {
-    if (i->get_val().paddr.is_relative()) {
-      auto val = i->get_val();
-      val.paddr = base.add_relative(val.paddr);
-      logger().debug(
-       "LBALeafNode::resolve_relative_addrs {} -> {}",
-       i->get_val().paddr,
-       val.paddr);
-      i->set_val(val);
-    }
-  }
-}
-
-std::pair<LBALeafNode::internal_iterator_t, LBALeafNode::internal_iterator_t>
-LBALeafNode::get_leaf_entries(laddr_t addr, extent_len_t len)
-{
-  return bound(addr, addr + len);
-}
-
-get_lba_node_ret get_lba_btree_extent(
-  op_context_t c,
-  CachedExtentRef parent,
-  depth_t depth,
-  paddr_t offset,
-  paddr_t base)
-{
-  offset = offset.maybe_relative_to(base);
-  ceph_assert(depth > 0);
-  if (depth > 1) {
-    logger().debug(
-      "get_lba_btree_extent: reading internal at offset {}, depth {}",
-      offset,
-      depth);
-    return c.cache.get_extent<LBAInternalNode>(
-      c.trans,
-      offset,
-      LBA_BLOCK_SIZE).si_then([c, parent](auto ret)
-                               -> get_lba_node_ret {
-       auto meta = ret->get_meta();
-       if (ret->get_size()) {
-         ceph_assert(meta.begin <= ret->begin()->get_key());
-         ceph_assert(meta.end > (ret->end() - 1)->get_key());
-       }
-       if (parent->has_been_invalidated() || ret->has_been_invalidated()) {
-         logger().debug(
-           "get_lba_btree_extent: parent {} or ret {} is invalid, transaction {} is conflicted: {}",
-           *parent,
-           *ret,
-           (void*)&c.trans,
-           c.trans.is_conflicted());
-         assert(!(parent->has_been_invalidated() || ret->has_been_invalidated()));
-       }
-       if (!ret->is_pending() && !ret->pin.is_linked()) {
-         ret->pin.set_range(meta);
-         c.pins.add_pin(ret->pin);
-       }
-       return get_lba_node_ret(
-         interruptible::ready_future_marker{},
-         LBANodeRef(ret.detach(), /* add_ref = */ false));
-      });
-  } else {
-    logger().debug(
-      "get_lba_btree_extent: reading leaf at offset {}, depth {}",
-      offset,
-      depth);
-    return c.cache.get_extent<LBALeafNode>(
-      c.trans,
-      offset,
-      LBA_BLOCK_SIZE).si_then([offset, c, parent](auto ret)
-                               -> get_lba_node_ret {
-       logger().debug(
-         "get_lba_btree_extent: read leaf at offset {} {}, parent {}",
-         offset,
-         *ret,
-         *parent);
-       auto meta = ret->get_meta();
-       if (ret->get_size()) {
-         ceph_assert(meta.begin <= ret->begin()->get_key());
-         ceph_assert(meta.end > (ret->end() - 1)->get_key());
-       }
-       if (parent->has_been_invalidated() || ret->has_been_invalidated()) {
-         logger().debug(
-           "get_lba_btree_extent: parent {} or ret {} is invalid, transaction {} is conflicted: {}",
-           *parent,
-           *ret,
-           (void*)&c.trans,
-           c.trans.is_conflicted());
-         assert(!(parent->has_been_invalidated() || ret->has_been_invalidated()));
-       }
-       if (!ret->is_pending() && !ret->pin.is_linked()) {
-         ret->pin.set_range(meta);
-         c.pins.add_pin(ret->pin);
-       }
-       return get_lba_node_ret(
-         interruptible::ready_future_marker{},
-         LBANodeRef(ret.detach(), /* add_ref = */ false));
-      });
-  }
-}
-
-}
diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h b/src/crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h
deleted file mode 100644 (file)
index fc9fd06..0000000
+++ /dev/null
@@ -1,557 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#pragma once
-
-#include <sys/mman.h>
-#include <string.h>
-
-#include <memory>
-#include <string.h>
-
-#include "include/buffer.h"
-
-#include "crimson/common/fixed_kv_node_layout.h"
-#include "crimson/common/errorator.h"
-#include "crimson/os/seastore/lba_manager.h"
-#include "crimson/os/seastore/seastore_types.h"
-#include "crimson/os/seastore/cache.h"
-#include "crimson/os/seastore/cached_extent.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
-#include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h"
-
-namespace crimson::os::seastore::lba_manager::btree {
-
-constexpr size_t LBA_BLOCK_SIZE = 4096;
-
-/**
- * lba_node_meta_le_t
- *
- * On disk layout for lba_node_meta_t
- */
-struct lba_node_meta_le_t {
-  laddr_le_t begin = laddr_le_t(0);
-  laddr_le_t end = laddr_le_t(0);
-  depth_le_t depth = init_depth_le(0);
-
-  lba_node_meta_le_t() = default;
-  lba_node_meta_le_t(const lba_node_meta_le_t &) = default;
-  explicit lba_node_meta_le_t(const lba_node_meta_t &val)
-    : begin(ceph_le64(val.begin)),
-      end(ceph_le64(val.end)),
-      depth(init_depth_le(val.depth)) {}
-
-  operator lba_node_meta_t() const {
-    return lba_node_meta_t{ begin, end, depth };
-  }
-};
-
-
-/**
- * LBAInternalNode
- *
- * Abstracts operations on and layout of internal nodes for the
- * LBA Tree.
- *
- * Layout (4k):
- *   size       : uint32_t[1]                4b
- *   (padding)  :                            4b
- *   meta       : lba_node_meta_le_t[3]      (1*24)b
- *   keys       : laddr_t[255]               (254*8)b
- *   values     : paddr_t[255]               (254*8)b
- *                                           = 4096
-
- * TODO: make the above capacity calculation part of FixedKVNodeLayout
- * TODO: the above alignment probably isn't portable without further work
- */
-constexpr size_t INTERNAL_NODE_CAPACITY = 254;
-struct LBAInternalNode
-  : LBANode,
-    common::FixedKVNodeLayout<
-      INTERNAL_NODE_CAPACITY,
-      lba_node_meta_t, lba_node_meta_le_t,
-      laddr_t, laddr_le_t,
-      paddr_t, paddr_le_t> {
-  using internal_iterator_t = const_iterator;
-  template <typename... T>
-  LBAInternalNode(T&&... t) :
-    LBANode(std::forward<T>(t)...),
-    FixedKVNodeLayout(get_bptr().c_str()) {}
-
-  static constexpr extent_types_t TYPE = extent_types_t::LADDR_INTERNAL;
-
-  lba_node_meta_t get_node_meta() const final { return get_meta(); }
-
-  CachedExtentRef duplicate_for_write() final {
-    assert(delta_buffer.empty());
-    return CachedExtentRef(new LBAInternalNode(*this));
-  };
-
-  delta_buffer_t delta_buffer;
-  delta_buffer_t *maybe_get_delta_buffer() {
-    return is_mutation_pending() ? &delta_buffer : nullptr;
-  }
-
-  lookup_ret lookup(op_context_t c, laddr_t addr, depth_t depth) final;
-
-  lookup_range_ret lookup_range(
-    op_context_t c,
-    laddr_t addr,
-    extent_len_t len) final;
-
-  lookup_pin_ret lookup_pin(
-    op_context_t c,
-    laddr_t addr) final;
-
-  insert_ret insert(
-    op_context_t c,
-    laddr_t laddr,
-    lba_map_val_t val) final;
-
-  mutate_mapping_ret mutate_mapping(
-    op_context_t c,
-    laddr_t laddr,
-    mutate_func_t &&f) final;
-  mutate_mapping_ret mutate_mapping_internal(
-    op_context_t c,
-    laddr_t laddr,
-    bool is_root,
-    mutate_func_t &&f) final;
-
-  mutate_internal_address_ret mutate_internal_address(
-    op_context_t c,
-    depth_t depth,
-    laddr_t laddr,
-    paddr_t paddr) final;
-
-  find_hole_ret find_hole(
-    op_context_t c,
-    laddr_t min,
-    laddr_t max,
-    extent_len_t len) final;
-
-  scan_mappings_ret scan_mappings(
-    op_context_t c,
-    laddr_t begin,
-    laddr_t end,
-    scan_mappings_func_t &f) final;
-
-  scan_mapped_space_ret scan_mapped_space(
-    op_context_t c,
-    scan_mapped_space_func_t &f) final;
-
-  std::tuple<LBANodeRef, LBANodeRef, laddr_t>
-  make_split_children(op_context_t c) final {
-    auto left = c.cache.alloc_new_extent<LBAInternalNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    auto right = c.cache.alloc_new_extent<LBAInternalNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    auto pivot = split_into(*left, *right);
-    left->pin.set_range(left->get_meta());
-    right->pin.set_range(right->get_meta());
-    return std::make_tuple(
-      left,
-      right,
-      pivot);
-  }
-
-  LBANodeRef make_full_merge(
-    op_context_t c,
-    LBANodeRef &right) final {
-    auto replacement = c.cache.alloc_new_extent<LBAInternalNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    replacement->merge_from(*this, *right->cast<LBAInternalNode>());
-    replacement->pin.set_range(replacement->get_meta());
-    return replacement;
-  }
-
-  std::tuple<LBANodeRef, LBANodeRef, laddr_t>
-  make_balanced(
-    op_context_t c,
-    LBANodeRef &_right,
-    bool prefer_left) final {
-    ceph_assert(_right->get_type() == TYPE);
-    auto &right = *_right->cast<LBAInternalNode>();
-    auto replacement_left = c.cache.alloc_new_extent<LBAInternalNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    auto replacement_right = c.cache.alloc_new_extent<LBAInternalNode>(
-      c.trans, LBA_BLOCK_SIZE);
-
-    auto pivot = balance_into_new_nodes(
-      *this,
-      right,
-      prefer_left,
-      *replacement_left,
-      *replacement_right);
-
-    replacement_left->pin.set_range(replacement_left->get_meta());
-    replacement_right->pin.set_range(replacement_right->get_meta());
-    return std::make_tuple(
-      replacement_left,
-      replacement_right,
-      pivot);
-  }
-
-  /**
-   * Internal relative addresses on read or in memory prior to commit
-   * are either record or block relative depending on whether this
-   * physical node is is_initial_pending() or just is_pending().
-   *
-   * User passes appropriate base depending on lifecycle and
-   * resolve_relative_addrs fixes up relative internal references
-   * based on base.
-   */
-  void resolve_relative_addrs(paddr_t base) final;
-  void node_resolve_vals(iterator from, iterator to) const final {
-    if (is_initial_pending()) {
-      for (auto i = from; i != to; ++i) {
-       if (i->get_val().is_relative()) {
-         assert(i->get_val().is_block_relative());
-         i->set_val(get_paddr().add_relative(i->get_val()));
-       }
-      }
-    }
-  }
-  void node_unresolve_vals(iterator from, iterator to) const final {
-    if (is_initial_pending()) {
-      for (auto i = from; i != to; ++i) {
-       if (i->get_val().is_relative()) {
-         assert(i->get_val().is_record_relative());
-         i->set_val(i->get_val() - get_paddr());
-       }
-      }
-    }
-  }
-
-  extent_types_t get_type() const final {
-    return TYPE;
-  }
-
-  std::ostream &print_detail(std::ostream &out) const final;
-
-  ceph::bufferlist get_delta() final {
-    ceph::buffer::ptr bptr(delta_buffer.get_bytes());
-    delta_buffer.copy_out(bptr.c_str(), bptr.length());
-    ceph::bufferlist bl;
-    bl.push_back(bptr);
-    return bl;
-  }
-
-  void apply_delta_and_adjust_crc(
-    paddr_t base, const ceph::bufferlist &_bl) final {
-    assert(_bl.length());
-    ceph::bufferlist bl = _bl;
-    bl.rebuild();
-    delta_buffer_t buffer;
-    buffer.copy_in(bl.front().c_str(), bl.front().length());
-    buffer.replay(*this);
-    set_last_committed_crc(get_crc32c());
-    resolve_relative_addrs(base);
-  }
-
-  bool at_max_capacity() const final {
-    return get_size() == get_capacity();
-  }
-
-  bool at_min_capacity() const {
-    return get_size() == (get_capacity() / 2);
-  }
-
-  /// returns iterators containing [l, r)
-  std::pair<internal_iterator_t, internal_iterator_t> bound(
-    laddr_t l, laddr_t r) {
-    // TODO: inefficient
-    auto retl = begin();
-    for (; retl != end(); ++retl) {
-      if (retl->get_next_key_or_max() > l)
-       break;
-    }
-    auto retr = retl;
-    for (; retr != end(); ++retr) {
-      if (retr->get_key() >= r)
-       break;
-    }
-    return std::make_pair(retl, retr);
-  }
-
-  using split_iertr = base_iertr;
-  using split_ret = split_iertr::future<LBANodeRef>;
-  split_ret split_entry(
-    op_context_t c,
-    laddr_t addr,
-    internal_iterator_t,
-    LBANodeRef entry);
-
-  using merge_iertr = base_iertr;
-  using merge_ret = merge_iertr::future<LBANodeRef>;
-  merge_ret merge_entry(
-    op_context_t c,
-    laddr_t addr,
-    internal_iterator_t,
-    LBANodeRef entry,
-    bool is_root);
-
-  /// returns iterator for subtree containing laddr
-  internal_iterator_t get_containing_child(laddr_t laddr);
-};
-
-/**
- * LBALeafNode
- *
- * Abstracts operations on and layout of leaf nodes for the
- * LBA Tree.
- *
- * Layout (4k):
- *   size       : uint32_t[1]                4b
- *   (padding)  :                            4b
- *   meta       : lba_node_meta_le_t[3]      (1*24)b
- *   keys       : laddr_t[170]               (145*8)b
- *   values     : lba_map_val_t[170]         (145*20)b
- *                                           = 4092
- *
- * TODO: update FixedKVNodeLayout to handle the above calculation
- * TODO: the above alignment probably isn't portable without further work
- */
-constexpr size_t LEAF_NODE_CAPACITY = 145;
-
-/**
- * lba_map_val_le_t
- *
- * On disk layout for lba_map_val_t.
- */
-struct lba_map_val_le_t {
-  extent_len_le_t len = init_extent_len_le(0);
-  paddr_le_t paddr;
-  ceph_le32 refcount{0};
-  ceph_le32 checksum{0};
-
-  lba_map_val_le_t() = default;
-  lba_map_val_le_t(const lba_map_val_le_t &) = default;
-  explicit lba_map_val_le_t(const lba_map_val_t &val)
-    : len(init_extent_len_le(val.len)),
-      paddr(paddr_le_t(val.paddr)),
-      refcount(val.refcount),
-      checksum(val.checksum) {}
-
-  operator lba_map_val_t() const {
-    return lba_map_val_t{ len, paddr, refcount, checksum };
-  }
-};
-
-struct LBALeafNode
-  : LBANode,
-    common::FixedKVNodeLayout<
-      LEAF_NODE_CAPACITY,
-      lba_node_meta_t, lba_node_meta_le_t,
-      laddr_t, laddr_le_t,
-      lba_map_val_t, lba_map_val_le_t> {
-  using internal_iterator_t = const_iterator;
-  template <typename... T>
-  LBALeafNode(T&&... t) :
-    LBANode(std::forward<T>(t)...),
-    FixedKVNodeLayout(get_bptr().c_str()) {}
-
-  static constexpr extent_types_t TYPE = extent_types_t::LADDR_LEAF;
-
-  lba_node_meta_t get_node_meta() const final { return get_meta(); }
-
-  CachedExtentRef duplicate_for_write() final {
-    assert(delta_buffer.empty());
-    return CachedExtentRef(new LBALeafNode(*this));
-  };
-
-  delta_buffer_t delta_buffer;
-  delta_buffer_t *maybe_get_delta_buffer() {
-    return is_mutation_pending() ? &delta_buffer : nullptr;
-  }
-
-  lookup_ret lookup(op_context_t c, laddr_t addr, depth_t depth) final
-  {
-    return lookup_ret(
-      interruptible::ready_future_marker{},
-      this);
-  }
-
-  lookup_range_ret lookup_range(
-    op_context_t c,
-    laddr_t addr,
-    extent_len_t len) final;
-
-  lookup_pin_ret lookup_pin(
-    op_context_t c,
-    laddr_t addr) final;
-
-  insert_ret insert(
-    op_context_t c,
-    laddr_t laddr,
-    lba_map_val_t val) final;
-
-  mutate_mapping_ret mutate_mapping(
-    op_context_t c,
-    laddr_t laddr,
-    mutate_func_t &&f) final;
-  mutate_mapping_ret mutate_mapping_internal(
-    op_context_t c,
-    laddr_t laddr,
-    bool is_root,
-    mutate_func_t &&f) final;
-
-  mutate_internal_address_ret mutate_internal_address(
-    op_context_t c,
-    depth_t depth,
-    laddr_t laddr,
-    paddr_t paddr) final;
-
-  find_hole_ret find_hole(
-    op_context_t c,
-    laddr_t min,
-    laddr_t max,
-    extent_len_t len) final;
-
-  scan_mappings_ret scan_mappings(
-    op_context_t c,
-    laddr_t begin,
-    laddr_t end,
-    scan_mappings_func_t &f) final;
-
-  scan_mapped_space_ret scan_mapped_space(
-    op_context_t c,
-    scan_mapped_space_func_t &f) final;
-
-  std::tuple<LBANodeRef, LBANodeRef, laddr_t>
-  make_split_children(op_context_t c) final {
-    auto left = c.cache.alloc_new_extent<LBALeafNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    auto right = c.cache.alloc_new_extent<LBALeafNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    auto pivot = split_into(*left, *right);
-    left->pin.set_range(left->get_meta());
-    right->pin.set_range(right->get_meta());
-    return std::make_tuple(
-      left,
-      right,
-      pivot);
-  }
-
-  LBANodeRef make_full_merge(
-    op_context_t c,
-    LBANodeRef &right) final {
-    auto replacement = c.cache.alloc_new_extent<LBALeafNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    replacement->merge_from(*this, *right->cast<LBALeafNode>());
-    replacement->pin.set_range(replacement->get_meta());
-    return replacement;
-  }
-
-  std::tuple<LBANodeRef, LBANodeRef, laddr_t>
-  make_balanced(
-    op_context_t c,
-    LBANodeRef &_right,
-    bool prefer_left) final {
-    ceph_assert(_right->get_type() == TYPE);
-    auto &right = *_right->cast<LBALeafNode>();
-    auto replacement_left = c.cache.alloc_new_extent<LBALeafNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    auto replacement_right = c.cache.alloc_new_extent<LBALeafNode>(
-      c.trans, LBA_BLOCK_SIZE);
-
-    auto pivot = balance_into_new_nodes(
-      *this,
-      right,
-      prefer_left,
-      *replacement_left,
-      *replacement_right);
-
-    replacement_left->pin.set_range(replacement_left->get_meta());
-    replacement_right->pin.set_range(replacement_right->get_meta());
-    return std::make_tuple(
-      replacement_left,
-      replacement_right,
-      pivot);
-  }
-
-  // See LBAInternalNode, same concept
-  void resolve_relative_addrs(paddr_t base) final;
-  void node_resolve_vals(iterator from, iterator to) const final {
-    if (is_initial_pending()) {
-      for (auto i = from; i != to; ++i) {
-       auto val = i->get_val();
-       if (val.paddr.is_relative()) {
-         assert(val.paddr.is_block_relative());
-         val.paddr = get_paddr().add_relative(val.paddr);
-         i->set_val(val);
-       }
-      }
-    }
-  }
-  void node_unresolve_vals(iterator from, iterator to) const final {
-    if (is_initial_pending()) {
-      for (auto i = from; i != to; ++i) {
-       auto val = i->get_val();
-       if (val.paddr.is_relative()) {
-         auto val = i->get_val();
-         assert(val.paddr.is_record_relative());
-         val.paddr = val.paddr - get_paddr();
-         i->set_val(val);
-       }
-      }
-    }
-  }
-
-  ceph::bufferlist get_delta() final {
-    ceph::buffer::ptr bptr(delta_buffer.get_bytes());
-    delta_buffer.copy_out(bptr.c_str(), bptr.length());
-    ceph::bufferlist bl;
-    bl.push_back(bptr);
-    return bl;
-  }
-
-  void apply_delta_and_adjust_crc(
-    paddr_t base, const ceph::bufferlist &_bl) final {
-    assert(_bl.length());
-    ceph::bufferlist bl = _bl;
-    bl.rebuild();
-    delta_buffer_t buffer;
-    buffer.copy_in(bl.front().c_str(), bl.front().length());
-    buffer.replay(*this);
-    set_last_committed_crc(get_crc32c());
-    resolve_relative_addrs(base);
-  }
-
-  extent_types_t get_type() const final {
-    return TYPE;
-  }
-
-  std::ostream &print_detail(std::ostream &out) const final;
-
-  bool at_max_capacity() const final {
-    return get_size() == get_capacity();
-  }
-
-  bool at_min_capacity() const final {
-    return get_size() == (get_capacity() / 2);
-  }
-
-  /// returns iterators <lb, ub> containing addresses [l, r)
-  std::pair<internal_iterator_t, internal_iterator_t> bound(
-    laddr_t l, laddr_t r) {
-    // TODO: inefficient
-    auto retl = begin();
-    for (; retl != end(); ++retl) {
-      if (retl->get_key() >= l || (retl->get_key() + retl->get_val().len) > l)
-       break;
-    }
-    auto retr = retl;
-    for (; retr != end(); ++retr) {
-      if (retr->get_key() >= r)
-       break;
-    }
-    return std::make_pair(retl, retr);
-  }
-
-  std::pair<internal_iterator_t, internal_iterator_t>
-  get_leaf_entries(laddr_t addr, extent_len_t len);
-};
-using LBALeafNodeRef = TCachedExtentRef<LBALeafNode>;
-
-}
index 3ea9abf02f03206dabd8dcecfb719aea56cc87c9..5ecd1640e0f8f9d3d23fac376aecd2f6a9e4d43b 100644 (file)
@@ -187,34 +187,6 @@ struct btree_lba_manager_test :
     return ret;
   }
 
-  auto set_mapping(
-    test_transaction_t &t,
-    laddr_t addr,
-    size_t len,
-    paddr_t paddr) {
-    auto [b, e] = get_overlap(t, addr, len);
-    EXPECT_EQ(b, e);
-
-    auto ret = with_trans_intr(
-      *t.t,
-      [=](auto &t) {
-       return lba_manager->set_extent(t, addr, len, paddr);
-      }).unsafe_get0();
-    EXPECT_EQ(addr, ret->get_laddr());
-    EXPECT_EQ(len, ret->get_length());
-    EXPECT_EQ(paddr, ret->get_paddr());
-    t.mappings.emplace(
-      std::make_pair(
-       ret->get_laddr(),
-       test_extent_t{
-         ret->get_paddr(),
-         ret->get_length(),
-         1
-        }
-      ));
-    return ret;
-  }
-
   auto decref_mapping(
     test_transaction_t &t,
     laddr_t addr) {