]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: extract fixed kv btree implementation out of lba manager
authorXuehan Xu <xxhdx1985126@gmail.com>
Fri, 28 Jan 2022 05:04:03 +0000 (13:04 +0800)
committerXuehan Xu <xxhdx1985126@gmail.com>
Sun, 13 Mar 2022 09:16:53 +0000 (17:16 +0800)
Basically, this pr moves the current LBABtree and lba_range_pin out of lba manager,
and rename LBABtree to FixedKVBtree. This is the preparation for implementing backrefs

Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
16 files changed:
src/crimson/os/seastore/CMakeLists.txt
src/crimson/os/seastore/btree/btree_range_pin.h [new file with mode: 0644]
src/crimson/os/seastore/btree/fixed_kv_btree.h [new file with mode: 0644]
src/crimson/os/seastore/cached_extent.cc
src/crimson/os/seastore/cached_extent.h
src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.cc
src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h
src/crimson/os/seastore/lba_manager/btree/btree_range_pin.cc [deleted file]
src/crimson/os/seastore/lba_manager/btree/btree_range_pin.h [deleted file]
src/crimson/os/seastore/lba_manager/btree/lba_btree.cc [deleted file]
src/crimson/os/seastore/lba_manager/btree/lba_btree.h [deleted file]
src/crimson/os/seastore/lba_manager/btree/lba_btree_node.h
src/crimson/os/seastore/object_data_handler.cc
src/crimson/os/seastore/seastore_types.h
src/crimson/os/seastore/transaction_manager.cc
src/test/crimson/seastore/test_btree_lba_manager.cc

index cec75b10471d4e66c456b378633956405a10a9a9..96e9384b6c173c642801bce6ee6617f76483e09d 100644 (file)
@@ -11,8 +11,6 @@ set(crimson_seastore_srcs
   lba_manager.cc
   segment_cleaner.cc
   lba_manager/btree/btree_lba_manager.cc
-  lba_manager/btree/btree_range_pin.cc
-  lba_manager/btree/lba_btree.cc
   lba_manager/btree/lba_btree_node.cc
   omap_manager.cc
   omap_manager/btree/btree_omap_manager.cc
diff --git a/src/crimson/os/seastore/btree/btree_range_pin.h b/src/crimson/os/seastore/btree/btree_range_pin.h
new file mode 100644 (file)
index 0000000..4791a9b
--- /dev/null
@@ -0,0 +1,447 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <boost/intrusive/set.hpp>
+
+#include "crimson/common/log.h"
+
+#include "crimson/os/seastore/cached_extent.h"
+#include "crimson/os/seastore/seastore_types.h"
+
+namespace crimson::os::seastore {
+
+template <typename bound_t>
+struct fixed_kv_node_meta_t {
+  bound_t begin = 0;
+  bound_t end = 0;
+  depth_t depth = 0;
+
+  bool is_parent_of(const fixed_kv_node_meta_t &other) const {
+    return (depth == other.depth + 1) &&
+      (begin <= other.begin) &&
+      (end > other.begin);
+  }
+
+  std::pair<fixed_kv_node_meta_t, fixed_kv_node_meta_t> split_into(bound_t pivot) const {
+    return std::make_pair(
+      fixed_kv_node_meta_t{begin, pivot, depth},
+      fixed_kv_node_meta_t{pivot, end, depth});
+  }
+
+  static fixed_kv_node_meta_t merge_from(
+    const fixed_kv_node_meta_t &lhs, const fixed_kv_node_meta_t &rhs) {
+    ceph_assert(lhs.depth == rhs.depth);
+    return fixed_kv_node_meta_t{lhs.begin, rhs.end, lhs.depth};
+  }
+
+  static std::pair<fixed_kv_node_meta_t, fixed_kv_node_meta_t>
+  rebalance(const fixed_kv_node_meta_t &lhs, const fixed_kv_node_meta_t &rhs, bound_t pivot) {
+    ceph_assert(lhs.depth == rhs.depth);
+    return std::make_pair(
+      fixed_kv_node_meta_t{lhs.begin, pivot, lhs.depth},
+      fixed_kv_node_meta_t{pivot, rhs.end, lhs.depth});
+  }
+
+  bool is_root() const {
+    return begin == 0 && end == L_ADDR_MAX;
+  }
+};
+
+template <typename bound_t>
+inline std::ostream &operator<<(
+  std::ostream &lhs,
+  const fixed_kv_node_meta_t<bound_t> &rhs)
+{
+  return lhs << "btree_node_meta_t("
+            << "begin=" << rhs.begin
+            << ", end=" << rhs.end
+            << ", depth=" << rhs.depth
+            << ")";
+}
+/**
+ * btree_range_pin_t
+ *
+ * Element tracked by btree_pin_set_t below.  Encapsulates the intrusive_set
+ * hook, the fixed_kv_node_meta_t representing the key range covered by a node,
+ * and extent and ref members intended to hold a reference when the extent
+ * should be pinned.
+ */
+template <typename T>
+class btree_pin_set_t;
+
+template <typename node_bound_t>
+class btree_range_pin_t : public boost::intrusive::set_base_hook<> {
+  friend class btree_pin_set_t<node_bound_t>;
+  fixed_kv_node_meta_t<node_bound_t> range;
+
+  btree_pin_set_t<node_bound_t> *pins = nullptr;
+
+  // We need to be able to remember extent without holding a reference,
+  // but we can do it more compactly -- TODO
+  CachedExtent *extent = nullptr;
+  CachedExtentRef ref;
+
+  using index_t = boost::intrusive::set<btree_range_pin_t>;
+
+  static auto get_tuple(const fixed_kv_node_meta_t<node_bound_t> &meta) {
+    return std::make_tuple(-meta.depth, meta.begin);
+  }
+
+  void acquire_ref() {
+    ref = CachedExtentRef(extent);
+  }
+
+  void drop_ref() {
+    ref.reset();
+  }
+
+public:
+  btree_range_pin_t() = default;
+  btree_range_pin_t(CachedExtent *extent)
+    : extent(extent) {}
+  btree_range_pin_t(const btree_range_pin_t &rhs, CachedExtent *extent)
+    : range(rhs.range), extent(extent) {}
+
+  bool has_ref() const {
+    return !!ref;
+  }
+
+  bool is_root() const {
+    return range.is_root();
+  }
+
+  void set_range(const fixed_kv_node_meta_t<node_bound_t> &nrange) {
+    range = nrange;
+  }
+  void set_extent(CachedExtent *nextent) {
+    ceph_assert(!extent);
+    extent = nextent;
+  }
+
+  CachedExtent &get_extent() {
+    assert(extent);
+    return *extent;
+  }
+
+  bool has_ref() {
+    return !!ref;
+  }
+
+  void take_pin(btree_range_pin_t &other)
+  {
+    ceph_assert(other.extent);
+    if (other.pins) {
+      other.pins->replace_pin(*this, other);
+      pins = other.pins;
+      other.pins = nullptr;
+
+      if (other.has_ref()) {
+       other.drop_ref();
+       acquire_ref();
+      }
+    }
+  }
+
+  friend bool operator<(
+    const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
+    return get_tuple(lhs.range) < get_tuple(rhs.range);
+  }
+  friend bool operator>(
+    const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
+    return get_tuple(lhs.range) > get_tuple(rhs.range);
+  }
+  friend bool operator==(
+    const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
+    return get_tuple(lhs.range) == rhs.get_tuple(rhs.range);
+  }
+
+  struct meta_cmp_t {
+    bool operator()(
+      const btree_range_pin_t &lhs, const fixed_kv_node_meta_t<node_bound_t> &rhs) const {
+      return get_tuple(lhs.range) < get_tuple(rhs);
+    }
+    bool operator()(
+      const fixed_kv_node_meta_t<node_bound_t> &lhs, const btree_range_pin_t &rhs) const {
+      return get_tuple(lhs) < get_tuple(rhs.range);
+    }
+  };
+
+  friend std::ostream &operator<<(
+    std::ostream &lhs,
+    const btree_range_pin_t<node_bound_t> &rhs) {
+    return lhs << "btree_range_pin_t("
+              << "begin=" << rhs.range.begin
+              << ", end=" << rhs.range.end
+              << ", depth=" << rhs.range.depth
+              << ", extent=" << rhs.extent
+              << ")";
+  }
+
+  template <typename>
+  friend class BtreeNodePin;
+  ~btree_range_pin_t()
+  {
+    ceph_assert(!pins == !is_linked());
+    ceph_assert(!ref);
+    if (pins) {
+      crimson::get_logger(ceph_subsys_seastore_lba
+       ).debug("{}: removing {}", __func__, *this);
+      pins->remove_pin(*this, true);
+    }
+    extent = nullptr;
+  }
+
+};
+
+/**
+ * btree_pin_set_t
+ *
+ * Ensures that for every cached node, all parent btree nodes required
+ * to map it are present in cache.  Relocating these nodes can
+ * therefore be done without further reads or cache space.
+ *
+ * Contains a btree_range_pin_t for every clean or dirty btree node
+ * or LogicalCachedExtent instance in cache at any point in time.
+ * For any btree node, the contained btree_range_pin_t will hold
+ * a reference to that node pinning it in cache as long as that
+ * node has children in the set.  This invariant can be violated
+ * only by calling retire_extent and is repaired by calling
+ * check_parent synchronously after adding any new extents.
+ */
+template <typename node_bound_t>
+class btree_pin_set_t {
+  friend class btree_range_pin_t<node_bound_t>;
+  using pins_t = typename btree_range_pin_t<node_bound_t>::index_t;
+  pins_t pins;
+
+  /// Removes pin from set optionally checking whether parent has other children
+  void remove_pin(btree_range_pin_t<node_bound_t> &pin, bool do_check_parent)
+  {
+    crimson::get_logger(ceph_subsys_seastore_lba).debug("{}: {}", __func__, pin);
+    ceph_assert(pin.is_linked());
+    ceph_assert(pin.pins);
+    ceph_assert(!pin.ref);
+
+    pins.erase(pin);
+    pin.pins = nullptr;
+
+    if (do_check_parent) {
+      check_parent(pin);
+    }
+  }
+
+  void replace_pin(
+    btree_range_pin_t<node_bound_t> &to,
+    btree_range_pin_t<node_bound_t> &from)
+  {
+    pins.replace_node(pins.iterator_to(from), to);
+  }
+
+  /// Returns parent pin if exists
+  btree_range_pin_t<node_bound_t> *maybe_get_parent(
+    const fixed_kv_node_meta_t<node_bound_t> &meta)
+  {
+    auto cmeta = meta;
+    cmeta.depth++;
+    auto iter = pins.upper_bound(
+      cmeta,
+      typename btree_range_pin_t<node_bound_t>::meta_cmp_t());
+    if (iter == pins.begin()) {
+      return nullptr;
+    } else {
+      --iter;
+      if (iter->range.is_parent_of(meta)) {
+       return &*iter;
+      } else {
+       return nullptr;
+      }
+    }
+  }
+
+  /// Returns earliest child pin if exist
+  const btree_range_pin_t<node_bound_t>
+  *maybe_get_first_child(const fixed_kv_node_meta_t<node_bound_t> &meta) const
+  {
+    if (meta.depth == 0) {
+      return nullptr;
+    }
+
+    auto cmeta = meta;
+    cmeta.depth--;
+
+    auto iter = pins.lower_bound(
+      cmeta,
+      typename btree_range_pin_t<node_bound_t>::meta_cmp_t());
+    if (iter == pins.end()) {
+      return nullptr;
+    } else if (meta.is_parent_of(iter->range)) {
+      return &*iter;
+    } else {
+      return nullptr;
+    }
+  }
+
+  /// Releases pin if it has no children
+  void release_if_no_children(btree_range_pin_t<node_bound_t> &pin)
+  {
+    ceph_assert(pin.is_linked());
+    if (maybe_get_first_child(pin.range) == nullptr) {
+      pin.drop_ref();
+    }
+  }
+
+public:
+  /// Adds pin to set, assumes set is consistent
+  void add_pin(btree_range_pin_t<node_bound_t> &pin)
+  {
+    ceph_assert(!pin.is_linked());
+    ceph_assert(!pin.pins);
+    ceph_assert(!pin.ref);
+
+    auto [prev, inserted] = pins.insert(pin);
+    if (!inserted) {
+      crimson::get_logger(ceph_subsys_seastore_lba).error(
+       "{}: unable to add {} ({}), found {} ({})",
+       __func__,
+       pin,
+       *(pin.extent),
+       *prev,
+       *(prev->extent));
+      ceph_assert(0 == "impossible");
+      return;
+    }
+    pin.pins = this;
+    if (!pin.is_root()) {
+      auto *parent = maybe_get_parent(pin.range);
+      ceph_assert(parent);
+      if (!parent->has_ref()) {
+       crimson::get_logger(ceph_subsys_seastore_lba
+         ).debug("{}: acquiring parent {}", __func__,
+           static_cast<void*>(parent));
+       parent->acquire_ref();
+      } else {
+       crimson::get_logger(ceph_subsys_seastore_lba).debug(
+         "{}: parent has ref {}", __func__,
+         static_cast<void*>(parent));
+      }
+    }
+    if (maybe_get_first_child(pin.range) != nullptr) {
+      crimson::get_logger(ceph_subsys_seastore_lba).debug(
+       "{}: acquiring self {}", __func__, pin);
+      pin.acquire_ref();
+    }
+  }
+
+
+  /**
+   * retire/check_parent
+   *
+   * See BtreeLBAManager::complete_transaction.
+   * retire removes the specified pin from the set, but does not
+   * check parents.  After any new extents are added to the set,
+   * the caller is required to call check_parent to restore the
+   * invariant.
+   */
+  void retire(btree_range_pin_t<node_bound_t> &pin)
+  {
+    pin.drop_ref();
+    remove_pin(pin, false);
+  }
+
+  void check_parent(btree_range_pin_t<node_bound_t> &pin)
+  {
+    auto parent = maybe_get_parent(pin.range);
+    if (parent) {
+      crimson::get_logger(ceph_subsys_seastore_lba
+       ).debug("{}: releasing parent {}", __func__, *parent);
+      release_if_no_children(*parent);
+    }
+  }
+
+  template <typename F>
+  void scan(F &&f) {
+    for (auto &i : pins) {
+      std::invoke(f, i);
+    }
+  }
+
+  ~btree_pin_set_t() {
+    ceph_assert(pins.empty());
+  }
+};
+
+template <typename key_t>
+class BtreeNodePin : public PhysicalNodePin<key_t> {
+
+  /**
+   * parent
+   *
+   * populated until link_extent is called to ensure cache residence
+   * until add_pin is called.
+   */
+  CachedExtentRef parent;
+
+  paddr_t paddr;
+  btree_range_pin_t<key_t> pin;
+
+public:
+  BtreeNodePin() = default;
+
+  BtreeNodePin(
+    CachedExtentRef parent,
+    paddr_t paddr,
+    fixed_kv_node_meta_t<key_t> &&meta)
+    : parent(parent), paddr(paddr) {
+    pin.set_range(std::move(meta));
+  }
+
+  btree_range_pin_t<key_t>& get_range_pin() {
+    return pin;
+  }
+
+  CachedExtentRef get_parent() {
+    return parent;
+  }
+
+  void set_parent(CachedExtentRef pin) {
+    parent = pin;
+  }
+
+  void link_extent(LogicalCachedExtent *ref) final {
+    pin.set_extent(ref);
+  }
+
+  extent_len_t get_length() const final {
+    ceph_assert(pin.range.end > pin.range.begin);
+    return pin.range.end - pin.range.begin;
+  }
+
+  paddr_t get_paddr() const final {
+    return paddr;
+  }
+
+  key_t get_key() const final {
+    return pin.range.begin;
+  }
+
+  PhysicalNodePinRef<key_t> duplicate() const final {
+    auto ret = std::unique_ptr<BtreeNodePin<key_t>>(
+      new BtreeNodePin<key_t>);
+    ret->pin.set_range(pin.range);
+    ret->paddr = paddr;
+    ret->parent = parent;
+    return ret;
+  }
+
+  void take_pin(PhysicalNodePin<key_t> &opin) final {
+    pin.take_pin(static_cast<BtreeNodePin<key_t>&>(opin).pin);
+  }
+
+  bool has_been_invalidated() const final {
+    return parent->has_been_invalidated();
+  }
+};
+
+}
diff --git a/src/crimson/os/seastore/btree/fixed_kv_btree.h b/src/crimson/os/seastore/btree/fixed_kv_btree.h
new file mode 100644 (file)
index 0000000..1892992
--- /dev/null
@@ -0,0 +1,1680 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 smarttab expandtab
+
+#pragma once
+
+#include <boost/container/static_vector.hpp>
+#include <sys/mman.h>
+#include <memory>
+#include <string.h>
+
+#include "crimson/os/seastore/logging.h"
+
+#include "crimson/os/seastore/lba_manager.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/btree/btree_range_pin.h"
+
+namespace crimson::os::seastore {
+
+template <typename node_key_t>
+struct op_context_t {
+  Cache &cache;
+  Transaction &trans;
+  btree_pin_set_t<node_key_t> *pins = nullptr;
+};
+
+template <typename T>
+Transaction::tree_stats_t& get_tree_stats(Transaction &t);
+
+template <
+  typename node_key_t,
+  typename node_val_t,
+  typename internal_node_t,
+  typename leaf_node_t,
+  size_t node_size>
+class FixedKVBtree {
+  static constexpr size_t MAX_DEPTH = 16;
+  using self_type = FixedKVBtree<
+    node_key_t,
+    node_val_t,
+    internal_node_t,
+    leaf_node_t,
+    node_size>;
+public:
+  using InternalNodeRef = TCachedExtentRef<internal_node_t>;
+  using LeafNodeRef = TCachedExtentRef<leaf_node_t>;
+
+  using base_ertr = crimson::errorator<
+    crimson::ct_error::input_output_error>;
+  using base_iertr = trans_iertr<base_ertr>;
+
+  class iterator;
+  using iterator_fut = base_iertr::future<iterator>;
+
+  using mapped_space_visitor_t = std::function<
+    void(paddr_t, extent_len_t)>;
+
+  class iterator {
+  public:
+    iterator(const iterator &rhs) noexcept :
+      internal(rhs.internal), leaf(rhs.leaf) {}
+    iterator(iterator &&rhs) noexcept :
+      internal(std::move(rhs.internal)), leaf(std::move(rhs.leaf)) {}
+
+    iterator &operator=(const iterator &) = default;
+    iterator &operator=(iterator &&) = default;
+
+    iterator_fut next(
+      op_context_t<node_key_t> c,
+      mapped_space_visitor_t *visitor=nullptr) const
+    {
+      assert_valid();
+      assert(!is_end());
+
+      auto ret = *this;
+      ret.leaf.pos++;
+      if (ret.at_boundary()) {
+        return seastar::do_with(
+          ret,
+          [c, visitor](auto &ret) mutable {
+            return ret.handle_boundary(
+              c, visitor
+            ).si_then([&ret] {
+              return std::move(ret);
+            });
+          });
+      } else {
+        return iterator_fut(
+          interruptible::ready_future_marker{},
+          ret);
+      }
+
+    }
+
+    iterator_fut prev(op_context_t<node_key_t> c) const
+    {
+      assert_valid();
+      assert(!is_begin());
+
+      auto ret = *this;
+
+      if (ret.leaf.pos > 0) {
+        ret.leaf.pos--;
+        return iterator_fut(
+          interruptible::ready_future_marker{},
+          ret);
+      }
+
+      depth_t depth_with_space = 2;
+      for (; depth_with_space <= get_depth(); ++depth_with_space) {
+        if (ret.get_internal(depth_with_space).pos > 0) {
+          break;
+        }
+      }
+
+      assert(depth_with_space <= ret.get_depth()); // must not be begin()
+      return seastar::do_with(
+        std::move(ret),
+        [](const internal_node_t &internal) { return --internal.end(); },
+        [](const leaf_node_t &leaf) { return --leaf.end(); },
+        [c, depth_with_space](auto &ret, auto &li, auto &ll) {
+          for (depth_t depth = 2; depth < depth_with_space; ++depth) {
+            ret.get_internal(depth).reset();
+          }
+          ret.leaf.reset();
+          ret.get_internal(depth_with_space).pos--;
+          // note, cannot result in at_boundary() by construction
+          return lookup_depth_range(
+            c, ret, depth_with_space - 1, 0, li, ll, nullptr
+          ).si_then([&ret] {
+            assert(!ret.at_boundary());
+            return std::move(ret);
+          });
+        });
+    }
+
+    void assert_valid() const {
+      assert(leaf.node);
+      assert(leaf.pos <= leaf.node->get_size());
+
+      for (auto &i: internal) {
+       (void)i;
+       assert(i.node);
+       assert(i.pos < i.node->get_size());
+      }
+    }
+
+    depth_t get_depth() const {
+      return internal.size() + 1;
+    }
+
+    auto &get_internal(depth_t depth) {
+      assert(depth > 1);
+      assert((depth - 2) < internal.size());
+      return internal[depth - 2];
+    }
+
+    const auto &get_internal(depth_t depth) const {
+      assert(depth > 1);
+      assert((depth - 2) < internal.size());
+      return internal[depth - 2];
+    }
+
+    node_key_t get_key() const {
+      assert(!is_end());
+      return leaf.node->iter_idx(leaf.pos).get_key();
+    }
+    node_val_t get_val() const {
+      assert(!is_end());
+      auto ret = leaf.node->iter_idx(leaf.pos).get_val();
+      ret.paddr = ret.paddr.maybe_relative_to(leaf.node->get_paddr());
+      return ret;
+    }
+
+    bool is_end() const {
+      // external methods may only resolve at a boundary if at end
+      return at_boundary();
+    }
+
+    bool is_begin() const {
+      for (auto &i: internal) {
+       if (i.pos != 0)
+         return false;
+      }
+      return leaf.pos == 0;
+    }
+
+    PhysicalNodePinRef<node_key_t> get_pin() const {
+      assert(!is_end());
+      auto val = get_val();
+      auto key = get_key();
+      return std::make_unique<BtreeNodePin<node_key_t>>(
+       leaf.node,
+       val.paddr,
+       fixed_kv_node_meta_t<node_key_t>{ key, key + val.len, 0 });
+    }
+
+    typename leaf_node_t::Ref get_leaf_node() {
+      return leaf.node;
+    }
+
+  private:
+    iterator() noexcept {}
+    iterator(depth_t depth) noexcept : internal(depth - 1) {}
+
+    friend class FixedKVBtree;
+    static constexpr uint16_t INVALID = std::numeric_limits<uint16_t>::max();
+    template <typename NodeType>
+    struct node_position_t {
+      typename NodeType::Ref node;
+      uint16_t pos = INVALID;
+
+      void reset() {
+       *this = node_position_t{};
+      }
+
+      auto get_iter() {
+       assert(pos != INVALID);
+       assert(pos < node->get_size());
+       return node->iter_idx(pos);
+      }
+    };
+    boost::container::static_vector<
+      node_position_t<internal_node_t>, MAX_DEPTH> internal;
+    node_position_t<leaf_node_t> leaf;
+
+    bool at_boundary() const {
+      assert(leaf.pos <= leaf.node->get_size());
+      return leaf.pos == leaf.node->get_size();
+    }
+
+    using handle_boundary_ertr = base_iertr;
+    using handle_boundary_ret = handle_boundary_ertr::future<>;
+    handle_boundary_ret handle_boundary(
+      op_context_t<node_key_t> c,
+      mapped_space_visitor_t *visitor)
+    {
+      assert(at_boundary());
+      depth_t depth_with_space = 2;
+      for (; depth_with_space <= get_depth(); ++depth_with_space) {
+        if ((get_internal(depth_with_space).pos + 1) <
+            get_internal(depth_with_space).node->get_size()) {
+          break;
+        }
+      }
+
+      if (depth_with_space <= get_depth()) {
+        return seastar::do_with(
+          [](const internal_node_t &internal) { return internal.begin(); },
+          [](const leaf_node_t &leaf) { return leaf.begin(); },
+          [this, c, depth_with_space, visitor](auto &li, auto &ll) {
+            for (depth_t depth = 2; depth < depth_with_space; ++depth) {
+              get_internal(depth).reset();
+            }
+            leaf.reset();
+            get_internal(depth_with_space).pos++;
+            // note, cannot result in at_boundary() by construction
+            return lookup_depth_range(
+              c, *this, depth_with_space - 1, 0, li, ll, visitor
+            );
+          });
+      } else {
+        // end
+        return seastar::now();
+      }
+    }
+
+    depth_t check_split() const {
+      if (!leaf.node->at_max_capacity()) {
+       return 0;
+      }
+      for (depth_t split_from = 1; split_from < get_depth(); ++split_from) {
+       if (!get_internal(split_from + 1).node->at_max_capacity())
+         return split_from;
+      }
+      return get_depth();
+    }
+
+    depth_t check_merge() const {
+      if (!leaf.node->below_min_capacity()) {
+       return 0;
+      }
+      for (depth_t merge_from = 1; merge_from < get_depth(); ++merge_from) {
+       if (!get_internal(merge_from + 1).node->below_min_capacity())
+         return merge_from;
+      }
+      return get_depth();
+    }
+  };
+
+  FixedKVBtree(phy_tree_root_t root) : root(root) {}
+
+  bool is_root_dirty() const {
+    return root_dirty;
+  }
+  phy_tree_root_t get_root_undirty() {
+    ceph_assert(root_dirty);
+    root_dirty = false;
+    return root;
+  }
+
+  /// mkfs
+  using mkfs_ret = phy_tree_root_t;
+  static mkfs_ret mkfs(op_context_t<node_key_t> c) {
+    auto root_leaf = c.cache.template alloc_new_extent<leaf_node_t>(
+      c.trans,
+      node_size);
+    root_leaf->set_size(0);
+    fixed_kv_node_meta_t<node_key_t> meta{0, L_ADDR_MAX, 1};
+    root_leaf->set_meta(meta);
+    root_leaf->pin.set_range(meta);
+    c.trans.get_lba_tree_stats().depth = 1u;
+    return phy_tree_root_t{root_leaf->get_paddr(), 1u};
+  }
+
+  /**
+   * lower_bound
+   *
+   * @param c [in] context
+   * @param addr [in] ddr
+   * @return least iterator >= key
+   */
+  iterator_fut lower_bound(
+    op_context_t<node_key_t> c,
+    node_key_t addr,
+    mapped_space_visitor_t *visitor=nullptr) const
+  {
+    LOG_PREFIX(FixedKVBtree::lower_bound);
+    return lookup(
+      c,
+      [addr](const internal_node_t &internal) {
+        assert(internal.get_size() > 0);
+        auto iter = internal.upper_bound(addr);
+        assert(iter != internal.begin());
+        --iter;
+        return iter;
+      },
+      [FNAME, c, addr](const leaf_node_t &leaf) {
+        auto ret = leaf.lower_bound(addr);
+        SUBDEBUGT(
+          seastore_lba_details,
+          "leaf addr {}, got ret offset {}, size {}, end {}",
+          c.trans,
+          addr,
+          ret.get_offset(),
+          leaf.get_size(),
+          ret == leaf.end());
+        return ret;
+      },
+      visitor
+    ).si_then([FNAME, c](auto &&ret) {
+      SUBDEBUGT(
+        seastore_lba_details,
+        "ret.leaf.pos {}",
+        c.trans,
+        ret.leaf.pos);
+      ret.assert_valid();
+      return std::move(ret);
+    });
+  }
+
+
+  /**
+   * upper_bound
+   *
+   * @param c [in] context
+   * @param addr [in] ddr
+   * @return least iterator > key
+   */
+  iterator_fut upper_bound(
+    op_context_t<node_key_t> c,
+    node_key_t addr
+  ) const {
+    return lower_bound(
+      c, addr
+    ).si_then([c, addr](auto iter) {
+      if (!iter.is_end() && iter.get_key() == addr) {
+       return iter.next(c);
+      } else {
+       return iterator_fut(
+         interruptible::ready_future_marker{},
+         iter);
+      }
+    });
+  }
+
+  /**
+   * upper_bound_right
+   *
+   * @param c [in] context
+   * @param addr [in] addr
+   * @return least iterator i s.t. i.get_key() + i.get_val().len > key
+   */
+  iterator_fut upper_bound_right(
+    op_context_t<node_key_t> c,
+    node_key_t addr) const
+  {
+    return lower_bound(
+      c, addr
+    ).si_then([c, addr](auto iter) {
+      if (iter.is_begin()) {
+       return iterator_fut(
+         interruptible::ready_future_marker{},
+         iter);
+      } else {
+       return iter.prev(
+         c
+       ).si_then([iter, addr](auto prev) {
+         if ((prev.get_key() + prev.get_val().len) > addr) {
+           return iterator_fut(
+             interruptible::ready_future_marker{},
+             prev);
+         } else {
+           return iterator_fut(
+             interruptible::ready_future_marker{},
+             iter);
+         }
+       });
+      }
+    });
+  }
+
+  iterator_fut begin(op_context_t<node_key_t> c) const {
+    return lower_bound(c, 0);
+  }
+  iterator_fut end(op_context_t<node_key_t> c) const {
+    return upper_bound(c, L_ADDR_MAX);
+  }
+
+  using iterate_repeat_ret_inner = base_iertr::future<
+    seastar::stop_iteration>;
+  template <typename F>
+  static base_iertr::future<> iterate_repeat(
+    op_context_t<node_key_t> c,
+    iterator_fut &&iter_fut,
+    F &&f,
+    mapped_space_visitor_t *visitor=nullptr) {
+    return std::move(
+      iter_fut
+    ).si_then([c, visitor, f=std::forward<F>(f)](auto iter) {
+      return seastar::do_with(
+       iter,
+       std::move(f),
+       [c, visitor](auto &pos, auto &f) {
+         return trans_intr::repeat(
+           [c, visitor, &f, &pos] {
+             return f(
+               pos
+             ).si_then([c, visitor, &pos](auto done) {
+               if (done == seastar::stop_iteration::yes) {
+                 return iterate_repeat_ret_inner(
+                   interruptible::ready_future_marker{},
+                   seastar::stop_iteration::yes);
+               } else {
+                 ceph_assert(!pos.is_end());
+                 return pos.next(
+                   c, visitor
+                 ).si_then([&pos](auto next) {
+                   pos = next;
+                   return iterate_repeat_ret_inner(
+                     interruptible::ready_future_marker{},
+                     seastar::stop_iteration::no);
+                 });
+               }
+             });
+           });
+       });
+    });
+  }
+
+  /**
+   * insert
+   *
+   * Inserts val at laddr with iter as a hint.  If element at laddr already
+   * exists returns iterator to that element unchanged and returns false.
+   *
+   * Invalidates all outstanding iterators for this tree on this transaction.
+   *
+   * @param c [in] op context
+   * @param iter [in] hint, insertion constant if immediately prior to iter
+   * @param laddr [in] addr at which to insert
+   * @param val [in] val to insert
+   * @return pair<iter, bool> where iter points to element at addr, bool true
+   *         iff element at laddr did not exist.
+   */
+  using insert_iertr = base_iertr;
+  using insert_ret = insert_iertr::future<std::pair<iterator, bool>>;
+  insert_ret insert(
+    op_context_t<node_key_t> c,
+    iterator iter,
+    node_key_t laddr,
+    node_val_t val
+  ) {
+    LOG_PREFIX(FixedKVBtree::insert);
+    SUBDEBUGT(
+      seastore_lba_details,
+      "inserting laddr {} at iter {}",
+      c.trans,
+      laddr,
+      iter.is_end() ? L_ADDR_MAX : iter.get_key());
+    return seastar::do_with(
+      iter,
+      [this, c, laddr, val](auto &ret) {
+        return find_insertion(
+          c, laddr, ret
+        ).si_then([this, c, laddr, val, &ret] {
+          if (!ret.at_boundary() && ret.get_key() == laddr) {
+            return insert_ret(
+              interruptible::ready_future_marker{},
+              std::make_pair(ret, false));
+          } else {
+            ++(c.trans.get_lba_tree_stats().num_inserts);
+            return handle_split(
+              c, ret
+            ).si_then([c, laddr, val, &ret] {
+              if (!ret.leaf.node->is_pending()) {
+                CachedExtentRef mut = c.cache.duplicate_for_write(
+                  c.trans, ret.leaf.node
+                );
+                ret.leaf.node = mut->cast<leaf_node_t>();
+              }
+              auto iter = typename leaf_node_t::const_iterator(
+                  ret.leaf.node.get(), ret.leaf.pos);
+              assert(iter == ret.leaf.node->lower_bound(laddr));
+              assert(iter == ret.leaf.node->end() || iter->get_key() > laddr);
+              assert(laddr >= ret.leaf.node->get_meta().begin &&
+                     laddr < ret.leaf.node->get_meta().end);
+              ret.leaf.node->insert(iter, laddr, val);
+              return insert_ret(
+                interruptible::ready_future_marker{},
+                std::make_pair(ret, true));
+            });
+          }
+        });
+      });
+  }
+
+  insert_ret insert(
+    op_context_t<node_key_t> c,
+    node_key_t laddr,
+    node_val_t val) {
+    return lower_bound(
+      c, laddr
+    ).si_then([this, c, laddr, val](auto iter) {
+      return this->insert(c, iter, laddr, val);
+    });
+  }
+
+  /**
+   * update
+   *
+   * Invalidates all outstanding iterators for this tree on this transaction.
+   *
+   * @param c [in] op context
+   * @param iter [in] iterator to element to update, must not be end
+   * @param val [in] val with which to update
+   * @return iterator to newly updated element
+   */
+  using update_iertr = base_iertr;
+  using update_ret = update_iertr::future<iterator>;
+  update_ret update(
+    op_context_t<node_key_t> c,
+    iterator iter,
+    node_val_t val)
+  {
+    LOG_PREFIX(FixedKVBtree::update);
+    SUBDEBUGT(
+      seastore_lba_details,
+      "update element at {}",
+      c.trans,
+      iter.is_end() ? L_ADDR_MAX : iter.get_key());
+    if (!iter.leaf.node->is_pending()) {
+      CachedExtentRef mut = c.cache.duplicate_for_write(
+        c.trans, iter.leaf.node
+      );
+      iter.leaf.node = mut->cast<leaf_node_t>();
+    }
+    iter.leaf.node->update(
+      iter.leaf.node->iter_idx(iter.leaf.pos),
+      val);
+    return update_ret(
+      interruptible::ready_future_marker{},
+      iter);
+  }
+
+
+  /**
+   * remove
+   *
+   * Invalidates all outstanding iterators for this tree on this transaction.
+   *
+   * @param c [in] op context
+   * @param iter [in] iterator to element to remove, must not be end
+   */
+  using remove_iertr = base_iertr;
+  using remove_ret = remove_iertr::future<>;
+  remove_ret remove(
+    op_context_t<node_key_t> c,
+    iterator iter)
+  {
+    LOG_PREFIX(FixedKVBtree::remove);
+    SUBDEBUGT(
+      seastore_lba_details,
+      "remove element at {}",
+      c.trans,
+      iter.is_end() ? L_ADDR_MAX : iter.get_key());
+    assert(!iter.is_end());
+    ++(c.trans.get_lba_tree_stats().num_erases);
+    return seastar::do_with(
+      iter,
+      [this, c](auto &ret) {
+        if (!ret.leaf.node->is_pending()) {
+          CachedExtentRef mut = c.cache.duplicate_for_write(
+            c.trans, ret.leaf.node
+          );
+          ret.leaf.node = mut->cast<leaf_node_t>();
+        }
+        ret.leaf.node->remove(
+          ret.leaf.node->iter_idx(ret.leaf.pos));
+
+        return handle_merge(
+          c, ret
+        );
+      });
+  }
+    
+  /**
+   * init_cached_extent
+   *
+   * Checks whether e is live (reachable from fixed kv tree) and drops or initializes
+   * accordingly. 
+   *
+   * Returns if e is live.
+   */
+  using init_cached_extent_iertr = base_iertr;
+  using init_cached_extent_ret = init_cached_extent_iertr::future<bool>;
+  init_cached_extent_ret init_cached_extent(
+    op_context_t<node_key_t> c,
+    CachedExtentRef e)
+  {
+    assert(!e->is_logical());
+    LOG_PREFIX(FixedKVTree::init_cached_extent);
+    SUBDEBUGT(seastore_lba_details, "extent {}", c.trans, *e);
+    if (e->get_type() == internal_node_t::TYPE) {
+      auto eint = e->cast<internal_node_t>();
+      return lower_bound(
+        c, eint->get_node_meta().begin
+      ).si_then([e, c, eint](auto iter) {
+        // Note, this check is valid even if iter.is_end()
+        LOG_PREFIX(FixedKVTree::init_cached_extent);
+        depth_t cand_depth = eint->get_node_meta().depth;
+        if (cand_depth <= iter.get_depth() &&
+            &*iter.get_internal(cand_depth).node == &*eint) {
+          SUBDEBUGT(
+            seastore_lba_details,
+            "extent {} is live",
+            c.trans,
+            *eint);
+          return true;
+        } else {
+          SUBDEBUGT(
+            seastore_lba_details,
+            "extent {} is not live",
+            c.trans,
+            *eint);
+          return false;
+        }
+      });
+    } else if (e->get_type() == leaf_node_t::TYPE) {
+      auto eleaf = e->cast<leaf_node_t>();
+      return lower_bound(
+        c, eleaf->get_node_meta().begin
+      ).si_then([c, e, eleaf](auto iter) {
+        // Note, this check is valid even if iter.is_end()
+        LOG_PREFIX(FixedKVTree::init_cached_extent);
+        if (iter.leaf.node == &*eleaf) {
+          SUBDEBUGT(
+            seastore_lba_details,
+            "extent {} is live",
+            c.trans,
+            *eleaf);
+          return true;
+        } else {
+          SUBDEBUGT(
+            seastore_lba_details,
+            "extent {} is not live",
+            c.trans,
+            *eleaf);
+          return false;
+        }
+      });
+    } else {
+      SUBDEBUGT(
+        seastore_lba_details,
+        "found other extent {} type {}",
+        c.trans,
+        *e,
+        e->get_type());
+      return init_cached_extent_ret(
+        interruptible::ready_future_marker{},
+        true);
+    }
+  }
+
+  /// get_leaf_if_live: get leaf node at laddr/addr if still live
+  using get_leaf_if_live_iertr = base_iertr;
+  using get_leaf_if_live_ret = get_leaf_if_live_iertr::future<CachedExtentRef>;
+  get_leaf_if_live_ret get_leaf_if_live(
+    op_context_t<node_key_t> c,
+    paddr_t addr,
+    node_key_t laddr,
+    seastore_off_t len)
+  {
+    LOG_PREFIX(FixedKVBtree::get_leaf_if_live);
+    return lower_bound(
+      c, laddr
+    ).si_then([FNAME, c, addr, laddr, len](auto iter) {
+      if (iter.leaf.node->get_paddr() == addr) {
+        SUBDEBUGT(
+          seastore_lba_details,
+          "extent laddr {} addr {}~{} found: {}",
+          c.trans,
+          laddr,
+          addr,
+          len,
+          *iter.leaf.node);
+        return CachedExtentRef(iter.leaf.node);
+      } else {
+        SUBDEBUGT(
+          seastore_lba_details,
+          "extent laddr {} addr {}~{} is not live, does not match node {}",
+          c.trans,
+          laddr,
+          addr,
+          len,
+          *iter.leaf.node);
+        return CachedExtentRef();
+      }
+    });
+  }
+
+
+  /// get_internal_if_live: get internal node at laddr/addr if still live
+  using get_internal_if_live_iertr = base_iertr;
+  using get_internal_if_live_ret = get_internal_if_live_iertr::future<CachedExtentRef>;
+  get_internal_if_live_ret get_internal_if_live(
+    op_context_t<node_key_t> c,
+    paddr_t addr,
+    node_key_t laddr,
+    seastore_off_t len)
+  {
+    LOG_PREFIX(FixedKVBtree::get_leaf_if_live);
+    return lower_bound(
+      c, laddr
+    ).si_then([FNAME, c, addr, laddr, len](auto iter) {
+      for (depth_t d = 2; d <= iter.get_depth(); ++d) {
+        CachedExtent &node = *iter.get_internal(d).node;
+        auto internal_node = node.cast<internal_node_t>();
+        if (internal_node->get_paddr() == addr) {
+          SUBDEBUGT(
+            seastore_lba_details,
+            "extent laddr {} addr {}~{} found: {}",
+            c.trans,
+            laddr,
+            addr,
+            len,
+            *internal_node);
+          assert(internal_node->get_node_meta().begin == laddr);
+          return CachedExtentRef(internal_node);
+        }
+      }
+      SUBDEBUGT(
+        seastore_lba_details,
+        "extent laddr {} addr {}~{} is not live, no matching internal node",
+        c.trans,
+        laddr,
+        addr,
+        len);
+      return CachedExtentRef();
+    });
+  }
+
+
+  /**
+   * rewrite_extent
+   *
+   * Rewrites a fresh copy of extent into transaction and updates internal
+   * references.
+   */
+  using rewrite_extent_iertr = base_iertr;
+  using rewrite_extent_ret = rewrite_extent_iertr::future<>;
+  rewrite_extent_ret rewrite_extent(
+    op_context_t<node_key_t> c,
+    CachedExtentRef e) {
+    LOG_PREFIX(FixedKVBtree::rewrite_extent);
+    assert(e->get_type() == extent_types_t::LADDR_INTERNAL ||
+           e->get_type() == extent_types_t::LADDR_LEAF);
+    
+    auto do_rewrite = [&](auto &fixed_kv_extent) {
+      auto n_fixed_kv_extent = c.cache.template alloc_new_extent<
+        std::remove_reference_t<decltype(fixed_kv_extent)>
+        >(
+        c.trans,
+        fixed_kv_extent.get_length());
+      fixed_kv_extent.get_bptr().copy_out(
+        0,
+        fixed_kv_extent.get_length(),
+        n_fixed_kv_extent->get_bptr().c_str());
+      n_fixed_kv_extent->pin.set_range(n_fixed_kv_extent->get_node_meta());
+      
+      /* This is a bit underhanded.  Any relative addrs here must necessarily
+       * be record relative as we are rewriting a dirty extent.  Thus, we
+       * are using resolve_relative_addrs with a (likely negative) block
+       * relative offset to correct them to block-relative offsets adjusted
+       * for our new transaction location.
+       *
+       * Upon commit, these now block relative addresses will be interpretted
+       * against the real final address.
+       */
+      n_fixed_kv_extent->resolve_relative_addrs(
+        make_record_relative_paddr(0) - n_fixed_kv_extent->get_paddr());
+      
+      SUBDEBUGT(
+        seastore_lba_details,
+        "rewriting {} into {}",
+        c.trans,
+        fixed_kv_extent,
+        *n_fixed_kv_extent);
+      
+      return update_internal_mapping(
+        c,
+        n_fixed_kv_extent->get_node_meta().depth,
+        n_fixed_kv_extent->get_node_meta().begin,
+        e->get_paddr(),
+        n_fixed_kv_extent->get_paddr()
+      ).si_then([c, e] {
+        c.cache.retire_extent(c.trans, e);
+      });
+    };
+    
+    CachedExtentRef n_fixed_kv_extent;
+    if (e->get_type() == internal_node_t::TYPE) {
+      auto lint = e->cast<internal_node_t>();
+      return do_rewrite(*lint);
+    } else {
+      assert(e->get_type() == leaf_node_t::TYPE);
+      auto lleaf = e->cast<leaf_node_t>();
+      return do_rewrite(*lleaf);
+    }
+  }
+
+  using update_internal_mapping_iertr = base_iertr;
+  using update_internal_mapping_ret = update_internal_mapping_iertr::future<>;
+  update_internal_mapping_ret update_internal_mapping(
+    op_context_t<node_key_t> c,
+    depth_t depth,
+    node_key_t laddr,
+    paddr_t old_addr,
+    paddr_t new_addr)
+  {
+    LOG_PREFIX(FixedKVBtree::update_internal_mapping);
+    SUBDEBUGT(
+      seastore_lba_details,
+      "updating laddr {} at depth {} from {} to {}",
+      c.trans,
+      laddr,
+      depth,
+      old_addr,
+      new_addr);
+
+    return lower_bound(
+      c, laddr
+    ).si_then([=](auto iter) {
+      assert(iter.get_depth() >= depth);
+      if (depth == iter.get_depth()) {
+        SUBDEBUGT(seastore_lba_details, "update at root", c.trans);
+
+        if (laddr != 0) {
+          SUBERRORT(
+            seastore_lba_details,
+            "updating root laddr {} at depth {} from {} to {},"
+            "laddr is not 0",
+            c.trans,
+            laddr,
+            depth,
+            old_addr,
+            new_addr,
+            root.get_location());
+          ceph_assert(0 == "impossible");
+        }
+
+        if (root.get_location() != old_addr) {
+          SUBERRORT(
+            seastore_lba_details,
+            "updating root laddr {} at depth {} from {} to {},"
+            "root addr {} does not match",
+            c.trans,
+            laddr,
+            depth,
+            old_addr,
+            new_addr,
+            root.get_location());
+          ceph_assert(0 == "impossible");
+        }
+
+        root.set_location(new_addr);
+        root_dirty = true;
+      } else {
+        auto &parent = iter.get_internal(depth + 1);
+        assert(parent.node);
+        assert(parent.pos < parent.node->get_size());
+        auto piter = parent.node->iter_idx(parent.pos);
+
+        if (piter->get_key() != laddr) {
+          SUBERRORT(
+            seastore_lba_details,
+            "updating laddr {} at depth {} from {} to {},"
+            "node {} pos {} val pivot addr {} does not match",
+            c.trans,
+            laddr,
+            depth,
+            old_addr,
+            new_addr,
+            *(parent.node),
+            parent.pos,
+            piter->get_key());
+          ceph_assert(0 == "impossible");
+        }
+
+
+        if (piter->get_val() != old_addr) {
+          SUBERRORT(
+            seastore_lba_details,
+            "updating laddr {} at depth {} from {} to {},"
+            "node {} pos {} val addr {} does not match",
+            c.trans,
+            laddr,
+            depth,
+            old_addr,
+            new_addr,
+            *(parent.node),
+            parent.pos,
+            piter->get_val());
+          ceph_assert(0 == "impossible");
+        }
+
+        CachedExtentRef mut = c.cache.duplicate_for_write(
+          c.trans,
+          parent.node
+        );
+        typename internal_node_t::Ref mparent = mut->cast<internal_node_t>();
+        mparent->update(piter, new_addr);
+
+        /* Note, iter is now invalid as we didn't udpate either the parent
+         * node reference to the new mutable instance nor did we update the
+         * child pointer to the new node.  Not a problem as we'll now just
+         * destruct it.
+         */
+      }
+      return seastar::now();
+    });
+  }
+
+
+private:
+  phy_tree_root_t root;
+  bool root_dirty = false;
+
+  using get_internal_node_iertr = base_iertr;
+  using get_internal_node_ret = get_internal_node_iertr::future<InternalNodeRef>;
+  static get_internal_node_ret get_internal_node(
+    op_context_t<node_key_t> c,
+    depth_t depth,
+    paddr_t offset,
+    node_key_t begin,
+    node_key_t end)
+  {
+    LOG_PREFIX(FixedKVBtree::get_internal_node);
+    SUBDEBUGT(
+      seastore_lba_details,
+      "reading internal at offset {}, depth {}, begin {}, end {}",
+      c.trans,
+      offset,
+      depth,
+      begin,
+      end);
+    assert(depth > 1);
+    auto init_internal = [c, depth, begin, end](internal_node_t &node) {
+      assert(!node.is_pending());
+      assert(!node.pin.is_linked());
+      node.pin.set_range(fixed_kv_node_meta_t<node_key_t>{begin, end, depth});
+      if (c.pins) {
+        c.pins->add_pin(node.pin);
+      }
+    };
+    return c.cache.template get_extent<internal_node_t>(
+      c.trans,
+      offset,
+      node_size,
+      init_internal
+    ).si_then([FNAME, c, offset, init_internal, depth, begin, end](
+                typename internal_node_t::Ref ret) {
+      SUBDEBUGT(
+        seastore_lba_details,
+        "read internal at offset {} {}",
+        c.trans,
+        offset,
+        *ret);
+      // This can only happen during init_cached_extent
+      if (c.pins && !ret->is_pending() && !ret->pin.is_linked()) {
+        assert(ret->is_dirty());
+        init_internal(*ret);
+      }
+      auto meta = ret->get_meta();
+      if (ret->get_size()) {
+        ceph_assert(meta.begin <= ret->begin()->get_key());
+        ceph_assert(meta.end > (ret->end() - 1)->get_key());
+      }
+      ceph_assert(depth == meta.depth);
+      ceph_assert(begin == meta.begin);
+      ceph_assert(end == meta.end);
+      return get_internal_node_ret(
+        interruptible::ready_future_marker{},
+        ret);
+    });
+  }
+
+
+  using get_leaf_node_iertr = base_iertr;
+  using get_leaf_node_ret = get_leaf_node_iertr::future<LeafNodeRef>;
+  static get_leaf_node_ret get_leaf_node(
+    op_context_t<node_key_t> c,
+    paddr_t offset,
+    node_key_t begin,
+    node_key_t end)
+  {
+    LOG_PREFIX(FixedKVBtree::get_leaf_node);
+    SUBDEBUGT(
+      seastore_lba_details,
+      "reading leaf at offset {}, begin {}, end {}",
+      c.trans,
+      offset,
+      begin,
+      end);
+    auto init_leaf = [c, begin, end](leaf_node_t &node) {
+      assert(!node.is_pending());
+      assert(!node.pin.is_linked());
+      node.pin.set_range(fixed_kv_node_meta_t<node_key_t>{begin, end, 1});
+      if (c.pins) {
+        c.pins->add_pin(node.pin);
+      }
+    };
+    return c.cache.template get_extent<leaf_node_t>(
+      c.trans,
+      offset,
+      node_size,
+      init_leaf
+    ).si_then([FNAME, c, offset, init_leaf, begin, end]
+      (typename leaf_node_t::Ref ret) {
+      SUBDEBUGT(
+        seastore_lba_details,
+        "read leaf at offset {} {}",
+        c.trans,
+        offset,
+        *ret);
+      // This can only happen during init_cached_extent
+      if (c.pins && !ret->is_pending() && !ret->pin.is_linked()) {
+        assert(ret->is_dirty());
+        init_leaf(*ret);
+      }
+      auto meta = ret->get_meta();
+      if (ret->get_size()) {
+        ceph_assert(meta.begin <= ret->begin()->get_key());
+        ceph_assert(meta.end > (ret->end() - 1)->get_key());
+      }
+      ceph_assert(1 == meta.depth);
+      ceph_assert(begin == meta.begin);
+      ceph_assert(end == meta.end);
+      return get_leaf_node_ret(
+        interruptible::ready_future_marker{},
+        ret);
+    });
+  }
+
+  using lookup_root_iertr = base_iertr;
+  using lookup_root_ret = lookup_root_iertr::future<>;
+  lookup_root_ret lookup_root(
+    op_context_t<node_key_t> c,
+    iterator &iter,
+    mapped_space_visitor_t *visitor) const {
+    if (root.get_depth() > 1) {
+      return get_internal_node(
+       c,
+       root.get_depth(),
+       root.get_location(),
+       0,
+       L_ADDR_MAX
+      ).si_then([this, visitor, &iter](InternalNodeRef root_node) {
+       iter.get_internal(root.get_depth()).node = root_node;
+       if (visitor) (*visitor)(root_node->get_paddr(), root_node->get_length());
+       return lookup_root_iertr::now();
+      });
+    } else {
+      return get_leaf_node(
+       c,
+       root.get_location(),
+       0,
+       L_ADDR_MAX
+      ).si_then([visitor, &iter](LeafNodeRef root_node) {
+       iter.leaf.node = root_node;
+       if (visitor) (*visitor)(root_node->get_paddr(), root_node->get_length());
+       return lookup_root_iertr::now();
+      });
+    }
+  }
+
+  using lookup_internal_level_iertr = base_iertr;
+  using lookup_internal_level_ret = lookup_internal_level_iertr::future<>;
+  template <typename F>
+  static lookup_internal_level_ret lookup_internal_level(
+    op_context_t<node_key_t> c,
+    depth_t depth,
+    iterator &iter,
+    F &f,
+    mapped_space_visitor_t *visitor
+  ) {
+    assert(depth > 1);
+    auto &parent_entry = iter.get_internal(depth + 1);
+    auto parent = parent_entry.node;
+    auto node_iter = parent->iter_idx(parent_entry.pos);
+    auto next_iter = node_iter + 1;
+    auto begin = node_iter->get_key();
+    auto end = next_iter == parent->end()
+      ? parent->get_node_meta().end
+      : next_iter->get_key();
+    return get_internal_node(
+      c,
+      depth,
+      node_iter->get_val().maybe_relative_to(parent->get_paddr()),
+      begin,
+      end
+    ).si_then([depth, visitor, &iter, &f](InternalNodeRef node) {
+      auto &entry = iter.get_internal(depth);
+      entry.node = node;
+      auto node_iter = f(*node);
+      assert(node_iter != node->end());
+      entry.pos = node_iter->get_offset();
+      if (visitor) (*visitor)(node->get_paddr(), node->get_length());
+      return seastar::now();
+    });
+  }
+
+  using lookup_leaf_iertr = base_iertr;
+  using lookup_leaf_ret = lookup_leaf_iertr::future<>;
+  template <typename F>
+  static lookup_internal_level_ret lookup_leaf(
+    op_context_t<node_key_t> c,
+    iterator &iter,
+    F &f,
+    mapped_space_visitor_t *visitor
+  ) {
+    auto &parent_entry = iter.get_internal(2);
+    auto parent = parent_entry.node;
+    assert(parent);
+    auto node_iter = parent->iter_idx(parent_entry.pos);
+    auto next_iter = node_iter + 1;
+    auto begin = node_iter->get_key();
+    auto end = next_iter == parent->end()
+      ? parent->get_node_meta().end
+      : next_iter->get_key();
+
+    return get_leaf_node(
+      c,
+      node_iter->get_val().maybe_relative_to(parent->get_paddr()),
+      begin,
+      end
+    ).si_then([visitor, &iter, &f](LeafNodeRef node) {
+      iter.leaf.node = node;
+      auto node_iter = f(*node);
+      iter.leaf.pos = node_iter->get_offset();
+      if (visitor) (*visitor)(node->get_paddr(), node->get_length());
+      return seastar::now();
+    });
+  }
+
+  /**
+   * lookup_depth_range
+   *
+   * Performs node lookups on depths [from, to) using li and ll to
+   * specific target at each level.  Note, may leave the iterator
+   * at_boundary(), call handle_boundary() prior to returning out
+   * lf FixedKVBtree.
+   */
+  using lookup_depth_range_iertr = base_iertr;
+  using lookup_depth_range_ret = lookup_depth_range_iertr::future<>;
+  template <typename LI, typename LL>
+  static lookup_depth_range_ret lookup_depth_range(
+    op_context_t<node_key_t> c, ///< [in] context
+    iterator &iter, ///< [in,out] iterator to populate
+    depth_t from,   ///< [in] from inclusive
+    depth_t to,     ///< [in] to exclusive, (to <= from, to == from is a noop)
+    LI &li,         ///< [in] internal->iterator
+    LL &ll,         ///< [in] leaf->iterator
+    mapped_space_visitor_t *visitor ///< [in] mapped space visitor
+  ) {
+    LOG_PREFIX(FixedKVBtree::lookup_depth_range);
+    SUBDEBUGT(seastore_lba_details, "{} -> {}", c.trans, from, to);
+    return seastar::do_with(
+      from,
+      [c, to, visitor, &iter, &li, &ll](auto &d) {
+       return trans_intr::repeat(
+         [c, to, visitor, &iter, &li, &ll, &d] {
+           if (d > to) {
+             return [&] {
+               if (d > 1) {
+                 return lookup_internal_level(
+                   c,
+                   d,
+                   iter,
+                   li,
+                   visitor);
+               } else {
+                 assert(d == 1);
+                 return lookup_leaf(
+                   c,
+                   iter,
+                   ll,
+                   visitor);
+               }
+             }().si_then([&d] {
+               --d;
+               return lookup_depth_range_iertr::make_ready_future<
+                 seastar::stop_iteration
+                 >(seastar::stop_iteration::no);
+             });
+           } else {
+             return lookup_depth_range_iertr::make_ready_future<
+               seastar::stop_iteration
+               >(seastar::stop_iteration::yes);
+           }
+         });
+      });
+  }
+
+  using lookup_iertr = base_iertr;
+  using lookup_ret = lookup_iertr::future<iterator>;
+  template <typename LI, typename LL>
+  lookup_ret lookup(
+    op_context_t<node_key_t> c,
+    LI &&lookup_internal,
+    LL &&lookup_leaf,
+    mapped_space_visitor_t *visitor
+  ) const {
+    LOG_PREFIX(FixedKVBtree::lookup);
+    return seastar::do_with(
+      iterator{root.get_depth()},
+      std::forward<LI>(lookup_internal),
+      std::forward<LL>(lookup_leaf),
+      [FNAME, this, visitor, c](auto &iter, auto &li, auto &ll) {
+       return lookup_root(
+         c, iter, visitor
+       ).si_then([FNAME, this, visitor, c, &iter, &li, &ll] {
+         if (iter.get_depth() > 1) {
+           auto &root_entry = *(iter.internal.rbegin());
+           root_entry.pos = li(*(root_entry.node)).get_offset();
+         } else {
+           auto &root_entry = iter.leaf;
+           auto riter = ll(*(root_entry.node));
+           root_entry.pos = riter->get_offset();
+         }
+         SUBDEBUGT(seastore_lba_details, "got root, depth {}", c.trans, root.get_depth());
+         return lookup_depth_range(
+           c,
+           iter,
+           root.get_depth() - 1,
+           0,
+           li,
+           ll,
+           visitor
+         ).si_then([c, visitor, &iter] {
+           if (iter.at_boundary()) {
+             return iter.handle_boundary(c, visitor);
+           } else {
+             return lookup_iertr::now();
+           }
+         });
+       }).si_then([&iter] {
+         return std::move(iter);
+       });
+      });
+  }
+
+  /**
+   * find_insertion
+   *
+   * Prepare iter for insertion.  iter should begin pointing at
+   * the valid insertion point (lower_bound(laddr)).
+   *
+   * Upon completion, iter will point at the
+   * position at which laddr should be inserted.  iter may, upon completion,
+   * point at the end of a leaf other than the end leaf if that's the correct
+   * insertion point.
+   */
+  using find_insertion_iertr = base_iertr;
+  using find_insertion_ret = find_insertion_iertr::future<>;
+  static find_insertion_ret find_insertion(
+    op_context_t<node_key_t> c,
+    node_key_t laddr,
+    iterator &iter)
+  {
+    assert(iter.is_end() || iter.get_key() >= laddr);
+    if (!iter.is_end() && iter.get_key() == laddr) {
+      return seastar::now();
+    } else if (iter.leaf.node->get_node_meta().begin <= laddr) {
+#ifndef NDEBUG
+      auto p = iter;
+      if (p.leaf.pos > 0) {
+        --p.leaf.pos;
+        assert(p.get_key() < laddr);
+      }
+#endif
+      return seastar::now();
+    } else {
+      assert(iter.leaf.pos == 0);
+      return iter.prev(
+        c
+      ).si_then([laddr, &iter](auto p) {
+        boost::ignore_unused(laddr); // avoid clang warning;
+        assert(p.leaf.node->get_node_meta().begin <= laddr);
+        assert(p.get_key() < laddr);
+        // Note, this is specifically allowed to violate the iterator
+        // invariant that pos is a valid index for the node in the event
+        // that the insertion point is at the end of a node.
+        p.leaf.pos++;
+        assert(p.at_boundary());
+        iter = p;
+        return seastar::now();
+      });
+    }
+  }
+
+  /**
+   * handle_split
+   *
+   * Split nodes in iter as needed for insertion. First, scan iter from leaf
+   * to find first non-full level.  Then, split from there towards leaf.
+   *
+   * Upon completion, iter will point at the newly split insertion point.  As
+   * with find_insertion, iter's leaf pointer may be end without iter being
+   * end.
+   */
+  using handle_split_iertr = base_iertr;
+  using handle_split_ret = handle_split_iertr::future<>;
+  handle_split_ret handle_split(
+    op_context_t<node_key_t> c,
+    iterator &iter)
+  {
+    LOG_PREFIX(FixedKVBtree::handle_split);
+
+    depth_t split_from = iter.check_split();
+
+    SUBDEBUGT(seastore_lba_details, "split_from {}, depth {}", c.trans, split_from, iter.get_depth());
+
+    if (split_from == iter.get_depth()) {
+      auto nroot = c.cache.template alloc_new_extent<internal_node_t>(
+        c.trans, node_size);
+      fixed_kv_node_meta_t<node_key_t> meta{0, L_ADDR_MAX, iter.get_depth() + 1};
+      nroot->set_meta(meta);
+      nroot->pin.set_range(meta);
+      nroot->journal_insert(
+        nroot->begin(),
+        L_ADDR_MIN,
+        root.get_location(),
+        nullptr);
+      iter.internal.push_back({nroot, 0});
+
+      root.set_location(nroot->get_paddr());
+      root.set_depth(iter.get_depth());
+      c.trans.get_lba_tree_stats().depth = iter.get_depth();
+      root_dirty = true;
+    }
+
+    /* pos may be either node_position_t<leaf_node_t> or
+     * node_position_t<internal_node_t> */
+    auto split_level = [&](auto &parent_pos, auto &pos) {
+      LOG_PREFIX(FixedKVBtree::handle_split);
+      auto [left, right, pivot] = pos.node->make_split_children(c);
+
+      auto parent_node = parent_pos.node;
+      auto parent_iter = parent_pos.get_iter();
+
+      parent_node->update(
+        parent_iter,
+        left->get_paddr());
+      parent_node->insert(
+        parent_iter + 1,
+        pivot,
+        right->get_paddr());
+
+      SUBDEBUGT(
+        seastore_lba_details,
+        "splitted {} into left: {}, right: {}",
+        c.trans,
+        *pos.node,
+        *left,
+        *right);
+      c.cache.retire_extent(c.trans, pos.node);
+
+      return std::make_pair(left, right);
+    };
+
+    for (; split_from > 0; --split_from) {
+      auto &parent_pos = iter.get_internal(split_from + 1);
+      if (!parent_pos.node->is_pending()) {
+        parent_pos.node = c.cache.duplicate_for_write(
+          c.trans, parent_pos.node
+        )->template cast<internal_node_t>();
+      }
+
+      if (split_from > 1) {
+        auto &pos = iter.get_internal(split_from);
+        SUBDEBUGT(
+          seastore_lba_details,
+          "splitting internal {} at depth {}, parent: {} at pos: {}",
+          c.trans,
+          *pos.node,
+          split_from,
+          *parent_pos.node,
+          parent_pos.pos);
+        auto [left, right] = split_level(parent_pos, pos);
+
+        if (pos.pos < left->get_size()) {
+          pos.node = left;
+        } else {
+          pos.node = right;
+          pos.pos -= left->get_size();
+
+          parent_pos.pos += 1;
+        }
+      } else {
+        auto &pos = iter.leaf;
+        SUBDEBUGT(
+          seastore_lba_details,
+          "splitting leaf {}, parent: {} at pos: {}",
+          c.trans,
+          *pos.node,
+          *parent_pos.node,
+          parent_pos.pos);
+        auto [left, right] = split_level(parent_pos, pos);
+
+        /* right->get_node_meta().begin == pivot == right->begin()->get_key()
+         * Thus, if pos.pos == left->get_size(), we want iter to point to
+         * left with pos.pos at the end rather than right with pos.pos = 0
+         * since the insertion would be to the left of the first element
+         * of right and thus necessarily less than right->get_node_meta().begin.
+         */
+        if (pos.pos <= left->get_size()) {
+          pos.node = left;
+        } else {
+          pos.node = right;
+          pos.pos -= left->get_size();
+
+          parent_pos.pos += 1;
+        }
+      }
+    }
+
+    return seastar::now();
+  }
+
+
+  using handle_merge_iertr = base_iertr;
+  using handle_merge_ret = handle_merge_iertr::future<>;
+  handle_merge_ret handle_merge(
+    op_context_t<node_key_t> c,
+    iterator &iter)
+  {
+    LOG_PREFIX(FixedKVBtree::handle_merge);
+    if (iter.get_depth() == 1 ||
+        !iter.leaf.node->below_min_capacity()) {
+      SUBDEBUGT(
+        seastore_lba_details,
+        "no need to merge leaf, leaf size {}, depth {}",
+        c.trans,
+        iter.leaf.node->get_size(),
+        iter.get_depth());
+      return seastar::now();
+    }
+
+    return seastar::do_with(
+      depth_t{1},
+      [FNAME, this, c, &iter](auto &to_merge) {
+        return trans_intr::repeat(
+          [FNAME, this, c, &iter, &to_merge] {
+            SUBDEBUGT(
+              seastore_lba_details,
+              "merging depth {}",
+              c.trans,
+              to_merge);
+            auto &parent_pos = iter.get_internal(to_merge + 1);
+            auto merge_fut = handle_merge_iertr::now();
+            if (to_merge > 1) {
+              auto &pos = iter.get_internal(to_merge);
+              merge_fut = merge_level(c, to_merge, parent_pos, pos);
+            } else {
+              auto &pos = iter.leaf;
+              merge_fut = merge_level(c, to_merge, parent_pos, pos);
+            }
+
+            return merge_fut.si_then([FNAME, this, c, &iter, &to_merge] {
+              ++to_merge;
+              auto &pos = iter.get_internal(to_merge);
+              if (to_merge == iter.get_depth()) {
+                if (pos.node->get_size() == 1) {
+                  SUBDEBUGT(seastore_lba_details, "collapsing root", c.trans);
+                  c.cache.retire_extent(c.trans, pos.node);
+                  assert(pos.pos == 0);
+                  auto node_iter = pos.get_iter();
+                  root.set_location(
+                    node_iter->get_val().maybe_relative_to(pos.node->get_paddr()));
+                  iter.internal.pop_back();
+                  root.set_depth(iter.get_depth());
+                  get_tree_stats<self_type>(c.trans).depth = iter.get_depth();
+                  root_dirty = true;
+                } else {
+                  SUBDEBUGT(seastore_lba_details, "no need to collapse root", c.trans);
+                }
+                return seastar::stop_iteration::yes;
+              } else if (pos.node->below_min_capacity()) {
+                SUBDEBUGT(
+                  seastore_lba_details,
+                  "continuing, next node {} depth {} at min",
+                  c.trans,
+                  *pos.node,
+                  to_merge);
+                return seastar::stop_iteration::no;
+              } else {
+                SUBDEBUGT(
+                  seastore_lba_details,
+                  "complete, next node {} depth {} not min",
+                  c.trans,
+                  *pos.node,
+                  to_merge);
+                return seastar::stop_iteration::yes;
+              }
+            });
+          });
+      });
+  }
+
+  template <typename T>
+  using node_position_t = typename iterator::template node_position_t<T>;
+
+  template <typename NodeType,
+            std::enable_if_t<std::is_same_v<NodeType, leaf_node_t>, int> = 0>
+  base_iertr::future<typename NodeType::Ref> get_node(
+    op_context_t<node_key_t> c,
+    depth_t depth,
+    paddr_t addr,
+    laddr_t begin,
+    laddr_t end) {
+    assert(depth == 1);
+    return get_leaf_node(c, addr, begin, end);
+  }
+
+  template <typename NodeType,
+            std::enable_if_t<std::is_same_v<NodeType, internal_node_t>, int> = 0>
+  base_iertr::future<typename NodeType::Ref> get_node(
+    op_context_t<node_key_t> c,
+    depth_t depth,
+    paddr_t addr,
+    laddr_t begin,
+    laddr_t end) {
+    return get_internal_node(c, depth, addr, begin, end);
+  }
+
+  template <typename NodeType>
+  handle_merge_ret merge_level(
+    op_context_t<node_key_t> c,
+    depth_t depth,
+    node_position_t<internal_node_t> &parent_pos,
+    node_position_t<NodeType> &pos)
+  {
+    LOG_PREFIX(FixedKVBtree::merge_level);
+    if (!parent_pos.node->is_pending()) {
+      parent_pos.node = c.cache.duplicate_for_write(
+        c.trans, parent_pos.node
+      )->template cast<internal_node_t>();
+    }
+
+    auto iter = parent_pos.get_iter();
+    assert(iter.get_offset() < parent_pos.node->get_size());
+    bool donor_is_left = ((iter.get_offset() + 1) == parent_pos.node->get_size());
+    auto donor_iter = donor_is_left ? (iter - 1) : (iter + 1);
+    auto next_iter = donor_iter + 1;
+    auto begin = donor_iter->get_key();
+    auto end = next_iter == parent_pos.node->end()
+      ? parent_pos.node->get_node_meta().end
+      : next_iter->get_key();
+    
+    SUBDEBUGT(seastore_lba_details, "parent: {}, node: {}", c.trans, *parent_pos.node, *pos.node);
+    return get_node<NodeType>(
+      c,
+      depth,
+      donor_iter.get_val().maybe_relative_to(parent_pos.node->get_paddr()),
+      begin,
+      end
+    ).si_then([c, iter, donor_iter, donor_is_left, &parent_pos, &pos](
+                typename NodeType::Ref donor) {
+      LOG_PREFIX(FixedKVBtree::merge_level);
+      auto [l, r] = donor_is_left ?
+        std::make_pair(donor, pos.node) : std::make_pair(pos.node, donor);
+
+      auto [liter, riter] = donor_is_left ?
+        std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter);
+
+      if (donor->at_min_capacity()) {
+        auto replacement = l->make_full_merge(c, r);
+
+        parent_pos.node->update(
+          liter,
+          replacement->get_paddr());
+        parent_pos.node->remove(riter);
+
+        pos.node = replacement;
+        if (donor_is_left) {
+          pos.pos += r->get_size();
+          parent_pos.pos--;
+        }
+
+        SUBDEBUGT(seastore_lba_details, "l: {}, r: {}, replacement: {}", c.trans, *l, *r, *replacement);
+        c.cache.retire_extent(c.trans, l);
+        c.cache.retire_extent(c.trans, r);
+      } else {
+        LOG_PREFIX(FixedKVBtree::merge_level);
+        auto [replacement_l, replacement_r, pivot] =
+          l->make_balanced(
+            c,
+            r,
+            !donor_is_left);
+
+        parent_pos.node->update(
+          liter,
+          replacement_l->get_paddr());
+        parent_pos.node->replace(
+          riter,
+          pivot,
+          replacement_r->get_paddr());
+
+        if (donor_is_left) {
+          assert(parent_pos.pos > 0);
+          parent_pos.pos--;
+        }
+
+        auto orig_position = donor_is_left ?
+          l->get_size() + pos.pos :
+          pos.pos;
+        if (orig_position < replacement_l->get_size()) {
+          pos.node = replacement_l;
+          pos.pos = orig_position;
+        } else {
+          parent_pos.pos++;
+          pos.node = replacement_r;
+          pos.pos = orig_position - replacement_l->get_size();
+        }
+
+        SUBDEBUGT(
+          seastore_lba_details,
+          "l: {}, r: {}, replacement_l: {}, replacement_r: {}",
+          c.trans, *l, *r, *replacement_l, *replacement_r);
+        c.cache.retire_extent(c.trans, l);
+        c.cache.retire_extent(c.trans, r);
+      }
+
+      return seastar::now();
+    });
+  }
+};
+
+}
+
index 08c9424960416d8c87f59622fdaacbda07726b8a..42fbc6c5e93b3f835461498c773fbe9370405723 100644 (file)
@@ -81,7 +81,7 @@ std::ostream &LogicalCachedExtent::print_detail(std::ostream &out) const
 
 std::ostream &operator<<(std::ostream &out, const LBAPin &rhs)
 {
-  return out << "LBAPin(" << rhs.get_laddr() << "~" << rhs.get_length()
+  return out << "LBAPin(" << rhs.get_key() << "~" << rhs.get_length()
             << "->" << rhs.get_paddr();
 }
 
index f1063c1a02c181729969b8661f02aadce53c563a..a0f8686ee97ddb9f869d8d2409a1f9c5330e8fea 100644 (file)
@@ -666,20 +666,30 @@ private:
 };
 
 class LogicalCachedExtent;
-class LBAPin;
-using LBAPinRef = std::unique_ptr<LBAPin>;
-class LBAPin {
+
+template <typename key_t>
+class PhysicalNodePin;
+
+template <typename key_t>
+using PhysicalNodePinRef = std::unique_ptr<PhysicalNodePin<key_t>>;
+
+template <typename key_t>
+class PhysicalNodePin {
 public:
   virtual void link_extent(LogicalCachedExtent *ref) = 0;
-  virtual void take_pin(LBAPin &pin) = 0;
+  virtual void take_pin(PhysicalNodePin<key_t> &pin) = 0;
   virtual extent_len_t get_length() const = 0;
   virtual paddr_t get_paddr() const = 0;
-  virtual laddr_t get_laddr() const = 0;
-  virtual LBAPinRef duplicate() const = 0;
+  virtual key_t get_key() const = 0;
+  virtual PhysicalNodePinRef<key_t> duplicate() const = 0;
   virtual bool has_been_invalidated() const = 0;
 
-  virtual ~LBAPin() {}
+  virtual ~PhysicalNodePin() {}
 };
+
+using LBAPin = PhysicalNodePin<laddr_t>;
+using LBAPinRef = PhysicalNodePinRef<laddr_t>;
+
 std::ostream &operator<<(std::ostream &out, const LBAPin &rhs);
 
 using lba_pin_list_t = std::list<LBAPinRef>;
@@ -756,7 +766,7 @@ public:
   void set_pin(LBAPinRef &&npin) {
     assert(!pin);
     pin = std::move(npin);
-    laddr = pin->get_laddr();
+    laddr = pin->get_key();
     pin->link_extent(this);
   }
 
index 0c4d77dae62fcaefac862dd69f91053ba6150c33..c43fa4470bd4433120872fa3b85672c8760f084a 100644 (file)
@@ -9,7 +9,6 @@
 #include "include/buffer.h"
 #include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
 #include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree.h"
 #include "crimson/os/seastore/logging.h"
 
 SET_SUBSYS(seastore_lba);
@@ -20,6 +19,15 @@ SET_SUBSYS(seastore_lba);
  * - TRACE: read operations, DEBUG details
  */
 
+namespace crimson::os::seastore {
+
+template<>
+Transaction::tree_stats_t& get_tree_stats<
+  crimson::os::seastore::lba_manager::btree::LBABtree>(Transaction &t) {
+  return t.get_lba_tree_stats();
+}
+}
+
 namespace crimson::os::seastore::lba_manager::btree {
 
 BtreeLBAManager::mkfs_ret BtreeLBAManager::mkfs(
@@ -210,13 +218,13 @@ static bool is_lba_node(const CachedExtent &e)
   return is_lba_node(e.get_type());
 }
 
-btree_range_pin_t &BtreeLBAManager::get_pin(CachedExtent &e)
+btree_range_pin_t<laddr_t> &BtreeLBAManager::get_pin(CachedExtent &e)
 {
   if (is_lba_node(e)) {
     return e.cast<LBANode>()->pin;
   } else if (e.is_logical()) {
     return static_cast<BtreeLBAPin &>(
-      e.cast<LogicalCachedExtent>()->get_pin()).pin;
+      e.cast<LogicalCachedExtent>()->get_pin()).get_range_pin();
   } else {
     ceph_abort_msg("impossible");
   }
@@ -280,23 +288,57 @@ void BtreeLBAManager::complete_transaction(
   }
 }
 
+BtreeLBAManager::base_iertr::future<> _init_cached_extent(
+  op_context_t<laddr_t> c,
+  const CachedExtentRef &e,
+  LBABtree &btree,
+  bool &ret)
+{
+  if (e->is_logical()) {
+    auto logn = e->cast<LogicalCachedExtent>();
+    return btree.lower_bound(
+      c,
+      logn->get_laddr()
+    ).si_then([e, c, logn, &ret](auto iter) {
+      LOG_PREFIX(BtreeLBAManager::init_cached_extent);
+      if (!iter.is_end() &&
+         iter.get_key() == logn->get_laddr() &&
+         iter.get_val().paddr == logn->get_paddr()) {
+       logn->set_pin(iter.get_pin());
+       ceph_assert(iter.get_val().len == e->get_length());
+       if (c.pins) {
+         c.pins->add_pin(
+           static_cast<BtreeLBAPin&>(logn->get_pin()).get_range_pin());
+       }
+       DEBUGT("logical extent {} live", c.trans, *logn);
+       ret = true;
+      } else {
+       DEBUGT("logical extent {} not live", c.trans, *logn);
+       ret = false;
+      }
+    });
+  } else {
+    return btree.init_cached_extent(c, e
+    ).si_then([&ret](bool is_alive) {
+      ret = is_alive;
+    });
+  }
+}
+
 BtreeLBAManager::init_cached_extent_ret BtreeLBAManager::init_cached_extent(
   Transaction &t,
   CachedExtentRef e)
 {
   LOG_PREFIX(BtreeLBAManager::init_cached_extent);
   TRACET("{}", t, *e);
-  return seastar::do_with(bool(), [this, e, FNAME, &t](bool& ret) {
+  return seastar::do_with(bool(), [this, e, &t](bool &ret) {
     auto c = get_context(t);
-    return with_btree(c, [c, e, &ret](auto &btree) {
-      return btree.init_cached_extent(c, e
-      ).si_then([&ret](bool is_alive) {
-        ret = is_alive;
-      });
-    }).si_then([&ret, e, FNAME, c] {
-      DEBUGT("is_alive={} -- {}", c.trans, ret, *e);
-      return ret;
-    });
+    return with_btree(c, [c, e, &ret](auto &btree)
+      -> base_iertr::future<> {
+      LOG_PREFIX(BtreeLBAManager::init_cached_extent);
+      DEBUGT("extent {}", c.trans, *e);
+      return _init_cached_extent(c, e, btree, ret);
+    }).si_then([&ret] { return ret; });
   });
 }
 
@@ -380,7 +422,7 @@ BtreeLBAManager::rewrite_extent_ret BtreeLBAManager::rewrite_extent(
     return with_btree(
       c,
       [c, extent](auto &btree) mutable {
-       return btree.rewrite_lba_extent(c, extent);
+       return btree.rewrite_extent(c, extent);
       });
   } else {
     DEBUGT("skip non lba extent -- {}", t, *extent);
index b02a84949e03de5910a71a691501fbefdbd5a734..5cf8c5a05bc573eede93c47d6e654a7b35c27468 100644 (file)
 #include "common/interval_map.h"
 #include "crimson/osd/exceptions.h"
 
+#include "crimson/os/seastore/btree/fixed_kv_btree.h"
 #include "crimson/os/seastore/seastore_types.h"
 #include "crimson/os/seastore/lba_manager.h"
 #include "crimson/os/seastore/cache.h"
 #include "crimson/os/seastore/segment_manager.h"
 
 #include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree.h"
+#include "crimson/os/seastore/btree/btree_range_pin.h"
 
 namespace crimson::os::seastore::lba_manager::btree {
 
+using LBABtree = FixedKVBtree<laddr_t, lba_map_val_t, LBAInternalNode, LBALeafNode, LBA_BLOCK_SIZE>;
+
+using BtreeLBAPin = BtreeNodePin<laddr_t>;
+
 /**
  * BtreeLBAManager
  *
@@ -84,6 +89,14 @@ public:
   void complete_transaction(
     Transaction &t) final;
 
+  /**
+   * init_cached_extent
+   *
+   * Checks whether e is live (reachable from lba tree) and drops or initializes
+   * accordingly.
+   *
+   * Returns if e is live.
+   */
   init_cached_extent_ret init_cached_extent(
     Transaction &t,
     CachedExtentRef e) final;
@@ -117,8 +130,8 @@ public:
 
   void add_pin(LBAPin &pin) final {
     auto *bpin = reinterpret_cast<BtreeLBAPin*>(&pin);
-    pin_set.add_pin(bpin->pin);
-    bpin->parent = nullptr;
+    pin_set.add_pin(bpin->get_range_pin());
+    bpin->set_parent(nullptr);
   }
 
   ~BtreeLBAManager();
@@ -126,24 +139,24 @@ private:
   SegmentManager &segment_manager;
   Cache &cache;
 
-  btree_pin_set_t pin_set;
+  btree_pin_set_t<laddr_t> pin_set;
 
   struct {
     uint64_t num_alloc_extents = 0;
     uint64_t num_alloc_extents_iter_nexts = 0;
   } stats;
 
-  op_context_t get_context(Transaction &t) {
-    return op_context_t{cache, t, &pin_set};
+  op_context_t<laddr_t> get_context(Transaction &t) {
+    return op_context_t<laddr_t>{cache, t, &pin_set};
   }
 
-  static btree_range_pin_t &get_pin(CachedExtent &e);
+  static btree_range_pin_t<laddr_t> &get_pin(CachedExtent &e);
 
   seastar::metrics::metric_group metrics;
   void register_metrics();
   template <typename F, typename... Args>
   auto with_btree(
-    op_context_t c,
+    op_context_t<laddr_t> c,
     F &&f) {
     return cache.get_root(
       c.trans
@@ -168,7 +181,7 @@ private:
 
   template <typename State, typename F>
   auto with_btree_state(
-    op_context_t c,
+    op_context_t<laddr_t> c,
     State &&init,
     F &&f) {
     return seastar::do_with(
@@ -185,14 +198,14 @@ private:
 
   template <typename State, typename F>
   auto with_btree_state(
-    op_context_t c,
+    op_context_t<laddr_t> c,
     F &&f) {
     return with_btree_state<State, F>(c, State{}, std::forward<F>(f));
   }
 
   template <typename Ret, typename F>
   auto with_btree_ret(
-    op_context_t c,
+    op_context_t<laddr_t> c,
     F &&f) {
     return with_btree_state<Ret>(
       c,
diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_range_pin.cc b/src/crimson/os/seastore/lba_manager/btree/btree_range_pin.cc
deleted file mode 100644 (file)
index 21c4279..0000000
+++ /dev/null
@@ -1,155 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h"
-#include "crimson/os/seastore/logging.h"
-
-SET_SUBSYS(seastore_lba);
-
-namespace crimson::os::seastore::lba_manager::btree {
-
-void btree_range_pin_t::take_pin(btree_range_pin_t &other)
-{
-  ceph_assert(other.extent);
-  if (other.pins) {
-    other.pins->replace_pin(*this, other);
-    pins = other.pins;
-    other.pins = nullptr;
-
-    if (other.has_ref()) {
-      other.drop_ref();
-      acquire_ref();
-    }
-  }
-}
-
-btree_range_pin_t::~btree_range_pin_t()
-{
-  LOG_PREFIX(btree_range_pin_t::~btree_range_pin_t);
-  ceph_assert(!pins == !is_linked());
-  ceph_assert(!ref);
-  if (pins) {
-    TRACE("removing {}", *this);
-    pins->remove_pin(*this, true);
-  }
-  extent = nullptr;
-}
-
-void btree_pin_set_t::replace_pin(btree_range_pin_t &to, btree_range_pin_t &from)
-{
-  pins.replace_node(pins.iterator_to(from), to);
-}
-
-void btree_pin_set_t::remove_pin(btree_range_pin_t &pin, bool do_check_parent)
-{
-  LOG_PREFIX(btree_pin_set_t::remove_pin);
-  TRACE("{}", pin);
-  ceph_assert(pin.is_linked());
-  ceph_assert(pin.pins);
-  ceph_assert(!pin.ref);
-
-  pins.erase(pin);
-  pin.pins = nullptr;
-
-  if (do_check_parent) {
-    check_parent(pin);
-  }
-}
-
-btree_range_pin_t *btree_pin_set_t::maybe_get_parent(
-  const lba_node_meta_t &meta)
-{
-  auto cmeta = meta;
-  cmeta.depth++;
-  auto iter = pins.upper_bound(cmeta, btree_range_pin_t::meta_cmp_t());
-  if (iter == pins.begin()) {
-    return nullptr;
-  } else {
-    --iter;
-    if (iter->range.is_parent_of(meta)) {
-      return &*iter;
-    } else {
-      return nullptr;
-    }
-  }
-}
-
-const btree_range_pin_t *btree_pin_set_t::maybe_get_first_child(
-  const lba_node_meta_t &meta) const
-{
-  if (meta.depth == 0) {
-    return nullptr;
-  }
-
-  auto cmeta = meta;
-  cmeta.depth--;
-
-  auto iter = pins.lower_bound(cmeta, btree_range_pin_t::meta_cmp_t());
-  if (iter == pins.end()) {
-    return nullptr;
-  } else if (meta.is_parent_of(iter->range)) {
-    return &*iter;
-  } else {
-    return nullptr;
-  }
-}
-
-void btree_pin_set_t::release_if_no_children(btree_range_pin_t &pin)
-{
-  ceph_assert(pin.is_linked());
-  if (maybe_get_first_child(pin.range) == nullptr) {
-    pin.drop_ref();
-  }
-}
-
-void btree_pin_set_t::add_pin(btree_range_pin_t &pin)
-{
-  LOG_PREFIX(btree_pin_set_t::add_pin);
-  ceph_assert(!pin.is_linked());
-  ceph_assert(!pin.pins);
-  ceph_assert(!pin.ref);
-
-  auto [prev, inserted] = pins.insert(pin);
-  if (!inserted) {
-    ERROR("unable to add {} ({}), found {} ({})",
-      pin,
-      *(pin.extent),
-      *prev,
-      *(prev->extent));
-    ceph_assert(0 == "impossible");
-    return;
-  }
-  pin.pins = this;
-  if (!pin.is_root()) {
-    auto *parent = maybe_get_parent(pin.range);
-    ceph_assert(parent);
-    if (!parent->has_ref()) {
-      TRACE("acquiring parent {}", static_cast<void*>(parent));
-      parent->acquire_ref();
-    } else {
-      TRACE("parent has ref {}", static_cast<void*>(parent));
-    }
-  }
-  if (maybe_get_first_child(pin.range) != nullptr) {
-    TRACE("acquiring self {}", pin);
-    pin.acquire_ref();
-  }
-}
-
-void btree_pin_set_t::retire(btree_range_pin_t &pin)
-{
-  pin.drop_ref();
-  remove_pin(pin, false);
-}
-
-void btree_pin_set_t::check_parent(btree_range_pin_t &pin)
-{
-  LOG_PREFIX(btree_pin_set_t::check_parent);
-  auto parent = maybe_get_parent(pin.range);
-  if (parent) {
-    TRACE("releasing parent {}", *parent);
-    release_if_no_children(*parent);
-  }
-}
-
-}
diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_range_pin.h b/src/crimson/os/seastore/lba_manager/btree/btree_range_pin.h
deleted file mode 100644 (file)
index b80e748..0000000
+++ /dev/null
@@ -1,292 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#pragma once
-
-#include <boost/intrusive/set.hpp>
-
-#include "crimson/os/seastore/cached_extent.h"
-#include "crimson/os/seastore/seastore_types.h"
-
-namespace crimson::os::seastore::lba_manager::btree {
-
-class LBANode;
-using LBANodeRef = TCachedExtentRef<LBANode>;
-
-struct lba_node_meta_t {
-  laddr_t begin = 0;
-  laddr_t end = 0;
-  depth_t depth = 0;
-
-  bool is_parent_of(const lba_node_meta_t &other) const {
-    return (depth == other.depth + 1) &&
-      (begin <= other.begin) &&
-      (end > other.begin);
-  }
-
-  std::pair<lba_node_meta_t, lba_node_meta_t> split_into(laddr_t pivot) const {
-    return std::make_pair(
-      lba_node_meta_t{begin, pivot, depth},
-      lba_node_meta_t{pivot, end, depth});
-  }
-
-  static lba_node_meta_t merge_from(
-    const lba_node_meta_t &lhs, const lba_node_meta_t &rhs) {
-    ceph_assert(lhs.depth == rhs.depth);
-    return lba_node_meta_t{lhs.begin, rhs.end, lhs.depth};
-  }
-
-  static std::pair<lba_node_meta_t, lba_node_meta_t>
-  rebalance(const lba_node_meta_t &lhs, const lba_node_meta_t &rhs, laddr_t pivot) {
-    ceph_assert(lhs.depth == rhs.depth);
-    return std::make_pair(
-      lba_node_meta_t{lhs.begin, pivot, lhs.depth},
-      lba_node_meta_t{pivot, rhs.end, lhs.depth});
-  }
-
-  bool is_root() const {
-    return begin == 0 && end == L_ADDR_MAX;
-  }
-};
-
-inline std::ostream &operator<<(
-  std::ostream &lhs,
-  const lba_node_meta_t &rhs)
-{
-  return lhs << "btree_node_meta_t("
-            << "begin=" << rhs.begin
-            << ", end=" << rhs.end
-            << ", depth=" << rhs.depth
-            << ")";
-}
-
-/**
- * btree_range_pin_t
- *
- * Element tracked by btree_pin_set_t below.  Encapsulates the intrusive_set
- * hook, the lba_node_meta_t representing the lba range covered by a node,
- * and extent and ref members intended to hold a reference when the extent
- * should be pinned.
- */
-class btree_pin_set_t;
-class btree_range_pin_t : public boost::intrusive::set_base_hook<> {
-  friend class btree_pin_set_t;
-  lba_node_meta_t range;
-
-  btree_pin_set_t *pins = nullptr;
-
-  // We need to be able to remember extent without holding a reference,
-  // but we can do it more compactly -- TODO
-  CachedExtent *extent = nullptr;
-  CachedExtentRef ref;
-
-  using index_t = boost::intrusive::set<btree_range_pin_t>;
-
-  static auto get_tuple(const lba_node_meta_t &meta) {
-    return std::make_tuple(-meta.depth, meta.begin);
-  }
-
-  void acquire_ref() {
-    ref = CachedExtentRef(extent);
-  }
-
-  void drop_ref() {
-    ref.reset();
-  }
-
-public:
-  btree_range_pin_t() = default;
-  btree_range_pin_t(CachedExtent *extent)
-    : extent(extent) {}
-  btree_range_pin_t(const btree_range_pin_t &rhs, CachedExtent *extent)
-    : range(rhs.range), extent(extent) {}
-
-  bool has_ref() const {
-    return !!ref;
-  }
-
-  bool is_root() const {
-    return range.is_root();
-  }
-
-  void set_range(const lba_node_meta_t &nrange) {
-    range = nrange;
-  }
-  void set_extent(CachedExtent *nextent) {
-    ceph_assert(!extent);
-    extent = nextent;
-  }
-
-  CachedExtent &get_extent() {
-    assert(extent);
-    return *extent;
-  }
-
-  bool has_ref() {
-    return !!ref;
-  }
-
-  void take_pin(btree_range_pin_t &other);
-
-  friend bool operator<(
-    const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
-    return get_tuple(lhs.range) < get_tuple(rhs.range);
-  }
-  friend bool operator>(
-    const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
-    return get_tuple(lhs.range) > get_tuple(rhs.range);
-  }
-  friend bool operator==(
-    const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
-    return get_tuple(lhs.range) == rhs.get_tuple(rhs.range);
-  }
-
-  struct meta_cmp_t {
-    bool operator()(
-      const btree_range_pin_t &lhs, const lba_node_meta_t &rhs) const {
-      return get_tuple(lhs.range) < get_tuple(rhs);
-    }
-    bool operator()(
-      const lba_node_meta_t &lhs, const btree_range_pin_t &rhs) const {
-      return get_tuple(lhs) < get_tuple(rhs.range);
-    }
-  };
-
-  friend std::ostream &operator<<(
-    std::ostream &lhs,
-    const btree_range_pin_t &rhs) {
-    return lhs << "btree_range_pin_t("
-              << "begin=" << rhs.range.begin
-              << ", end=" << rhs.range.end
-              << ", depth=" << rhs.range.depth
-              << ", extent=" << rhs.extent
-              << ")";
-  }
-
-  friend class BtreeLBAPin;
-  ~btree_range_pin_t();
-};
-
-/**
- * btree_pin_set_t
- *
- * Ensures that for every cached node, all parent LBANodes required
- * to map it are present in cache.  Relocating these nodes can
- * therefore be done without further reads or cache space.
- *
- * Contains a btree_range_pin_t for every clean or dirty LBANode
- * or LogicalCachedExtent instance in cache at any point in time.
- * For any LBANode, the contained btree_range_pin_t will hold
- * a reference to that node pinning it in cache as long as that
- * node has children in the set.  This invariant can be violated
- * only by calling retire_extent and is repaired by calling
- * check_parent synchronously after adding any new extents.
- */
-class btree_pin_set_t {
-  friend class btree_range_pin_t;
-  using pins_t = btree_range_pin_t::index_t;
-  pins_t pins;
-
-  /// Removes pin from set optionally checking whether parent has other children
-  void remove_pin(btree_range_pin_t &pin, bool check_parent);
-
-  void replace_pin(btree_range_pin_t &to, btree_range_pin_t &from);
-
-  /// Returns parent pin if exists
-  btree_range_pin_t *maybe_get_parent(const lba_node_meta_t &pin);
-
-  /// Returns earliest child pin if exist
-  const btree_range_pin_t *maybe_get_first_child(const lba_node_meta_t &pin) const;
-
-  /// Releases pin if it has no children
-  void release_if_no_children(btree_range_pin_t &pin);
-
-public:
-  /// Adds pin to set, assumes set is consistent
-  void add_pin(btree_range_pin_t &pin);
-
-  /**
-   * retire/check_parent
-   *
-   * See BtreeLBAManager::complete_transaction.
-   * retire removes the specified pin from the set, but does not
-   * check parents.  After any new extents are added to the set,
-   * the caller is required to call check_parent to restore the
-   * invariant.
-   */
-  void retire(btree_range_pin_t &pin);
-  void check_parent(btree_range_pin_t &pin);
-
-  template <typename F>
-  void scan(F &&f) {
-    for (auto &i : pins) {
-      std::invoke(f, i);
-    }
-  }
-
-  ~btree_pin_set_t() {
-    ceph_assert(pins.empty());
-  }
-};
-
-class BtreeLBAPin : public LBAPin {
-  friend class BtreeLBAManager;
-  friend class LBABtree;
-
-  /**
-   * parent
-   *
-   * populated until link_extent is called to ensure cache residence
-   * until add_pin is called.
-   */
-  CachedExtentRef parent;
-
-  paddr_t paddr;
-  btree_range_pin_t pin;
-
-public:
-  BtreeLBAPin() = default;
-
-  BtreeLBAPin(
-    CachedExtentRef parent,
-    paddr_t paddr,
-    lba_node_meta_t &&meta)
-    : parent(parent), paddr(paddr) {
-    pin.set_range(std::move(meta));
-  }
-
-  void link_extent(LogicalCachedExtent *ref) final {
-    pin.set_extent(ref);
-  }
-
-  extent_len_t get_length() const final {
-    ceph_assert(pin.range.end > pin.range.begin);
-    return pin.range.end - pin.range.begin;
-  }
-
-  paddr_t get_paddr() const final {
-    return paddr;
-  }
-
-  laddr_t get_laddr() const final {
-    return pin.range.begin;
-  }
-
-  LBAPinRef duplicate() const final {
-    auto ret = std::unique_ptr<BtreeLBAPin>(new BtreeLBAPin);
-    ret->pin.set_range(pin.range);
-    ret->paddr = paddr;
-    ret->parent = parent;
-    return ret;
-  }
-
-  void take_pin(LBAPin &opin) final {
-    pin.take_pin(static_cast<BtreeLBAPin&>(opin).pin);
-  }
-
-  bool has_been_invalidated() const final {
-    return parent->has_been_invalidated();
-  }
-};
-
-}
diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree.cc b/src/crimson/os/seastore/lba_manager/btree/lba_btree.cc
deleted file mode 100644 (file)
index f1f1eee..0000000
+++ /dev/null
@@ -1,1022 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include "crimson/os/seastore/lba_manager/btree/lba_btree.h"
-
-SET_SUBSYS(seastore_lba_details);
-
-namespace crimson::os::seastore::lba_manager::btree {
-
-LBABtree::mkfs_ret LBABtree::mkfs(op_context_t c)
-{
-  auto root_leaf = c.cache.alloc_new_extent<LBALeafNode>(
-    c.trans,
-    LBA_BLOCK_SIZE);
-  root_leaf->set_size(0);
-  lba_node_meta_t meta{0, L_ADDR_MAX, 1};
-  root_leaf->set_meta(meta);
-  root_leaf->pin.set_range(meta);
-  c.trans.get_lba_tree_stats().depth = 1u;
-  return lba_root_t{root_leaf->get_paddr(), 1u};
-}
-
-LBABtree::iterator::handle_boundary_ret LBABtree::iterator::handle_boundary(
-  op_context_t c,
-  mapped_space_visitor_t *visitor)
-{
-  assert(at_boundary());
-  depth_t depth_with_space = 2;
-  for (; depth_with_space <= get_depth(); ++depth_with_space) {
-    if ((get_internal(depth_with_space).pos + 1) <
-       get_internal(depth_with_space).node->get_size()) {
-      break;
-    }
-  }
-
-  if (depth_with_space <= get_depth()) {
-    return seastar::do_with(
-      [](const LBAInternalNode &internal) { return internal.begin(); },
-      [](const LBALeafNode &leaf) { return leaf.begin(); },
-      [this, c, depth_with_space, visitor](auto &li, auto &ll) {
-       for (depth_t depth = 2; depth < depth_with_space; ++depth) {
-         get_internal(depth).reset();
-       }
-       leaf.reset();
-       get_internal(depth_with_space).pos++;
-       // note, cannot result in at_boundary() by construction
-       return lookup_depth_range(
-         c, *this, depth_with_space - 1, 0, li, ll, visitor
-       );
-      });
-  } else {
-    // end
-    return seastar::now();
-  }
-}
-
-LBABtree::iterator_fut LBABtree::iterator::next(
-  op_context_t c,
-  mapped_space_visitor_t *visitor) const
-{
-  assert_valid();
-  assert(!is_end());
-
-  auto ret = *this;
-  ret.leaf.pos++;
-  if (ret.at_boundary()) {
-    return seastar::do_with(
-      ret,
-      [c, visitor](auto &ret) mutable {
-       return ret.handle_boundary(
-         c, visitor
-       ).si_then([&ret] {
-         return std::move(ret);
-       });
-      });
-  } else {
-    return iterator_fut(
-      interruptible::ready_future_marker{},
-      ret);
-  }
-
-}
-
-LBABtree::iterator_fut LBABtree::iterator::prev(op_context_t c) const
-{
-  assert_valid();
-  assert(!is_begin());
-
-  auto ret = *this;
-
-  if (ret.leaf.pos > 0) {
-    ret.leaf.pos--;
-    return iterator_fut(
-      interruptible::ready_future_marker{},
-      ret);
-  }
-
-  depth_t depth_with_space = 2;
-  for (; depth_with_space <= get_depth(); ++depth_with_space) {
-    if (ret.get_internal(depth_with_space).pos > 0) {
-      break;
-    }
-  }
-
-  assert(depth_with_space <= ret.get_depth()); // must not be begin()
-  return seastar::do_with(
-    std::move(ret),
-    [](const LBAInternalNode &internal) { return --internal.end(); },
-    [](const LBALeafNode &leaf) { return --leaf.end(); },
-    [c, depth_with_space](auto &ret, auto &li, auto &ll) {
-      for (depth_t depth = 2; depth < depth_with_space; ++depth) {
-       ret.get_internal(depth).reset();
-      }
-      ret.leaf.reset();
-      ret.get_internal(depth_with_space).pos--;
-      // note, cannot result in at_boundary() by construction
-      return lookup_depth_range(
-       c, ret, depth_with_space - 1, 0, li, ll, nullptr
-      ).si_then([&ret] {
-       assert(!ret.at_boundary());
-       return std::move(ret);
-      });
-    });
-}
-
-LBABtree::iterator_fut LBABtree::lower_bound(
-  op_context_t c,
-  laddr_t addr,
-  mapped_space_visitor_t *visitor) const
-{
-  LOG_PREFIX(LBATree::lower_bound);
-  return lookup(
-    c,
-    [addr](const LBAInternalNode &internal) {
-      assert(internal.get_size() > 0);
-      auto iter = internal.upper_bound(addr);
-      assert(iter != internal.begin());
-      --iter;
-      return iter;
-    },
-    [FNAME, c, addr](const LBALeafNode &leaf) {
-      auto ret = leaf.lower_bound(addr);
-      DEBUGT(
-       "leaf addr {}, got ret offset {}, size {}, end {}",
-       c.trans,
-       addr,
-       ret.get_offset(),
-       leaf.get_size(),
-       ret == leaf.end());
-      return ret;
-    },
-    visitor
-  ).si_then([FNAME, c](auto &&ret) {
-    DEBUGT(
-      "ret.leaf.pos {}",
-      c.trans,
-      ret.leaf.pos);
-    ret.assert_valid();
-    return std::move(ret);
-  });
-}
-
-LBABtree::insert_ret LBABtree::insert(
-  op_context_t c,
-  iterator iter,
-  laddr_t laddr,
-  lba_map_val_t val)
-{
-  LOG_PREFIX(LBATree::insert);
-  DEBUGT(
-    "inserting laddr {} at iter {}",
-    c.trans,
-    laddr,
-    iter.is_end() ? L_ADDR_MAX : iter.get_key());
-  return seastar::do_with(
-    iter,
-    [this, c, laddr, val](auto &ret) {
-      return find_insertion(
-       c, laddr, ret
-      ).si_then([this, c, laddr, val, &ret] {
-       if (!ret.at_boundary() && ret.get_key() == laddr) {
-         return insert_ret(
-           interruptible::ready_future_marker{},
-           std::make_pair(ret, false));
-       } else {
-         ++(c.trans.get_lba_tree_stats().num_inserts);
-         return handle_split(
-           c, ret
-         ).si_then([c, laddr, val, &ret] {
-           if (!ret.leaf.node->is_pending()) {
-             CachedExtentRef mut = c.cache.duplicate_for_write(
-               c.trans, ret.leaf.node
-             );
-             ret.leaf.node = mut->cast<LBALeafNode>();
-           }
-           auto iter = LBALeafNode::const_iterator(
-               ret.leaf.node.get(), ret.leaf.pos);
-           assert(iter == ret.leaf.node->lower_bound(laddr));
-           assert(iter == ret.leaf.node->end() || iter->get_key() > laddr);
-           assert(laddr >= ret.leaf.node->get_meta().begin &&
-                  laddr < ret.leaf.node->get_meta().end);
-           ret.leaf.node->insert(iter, laddr, val);
-           return insert_ret(
-             interruptible::ready_future_marker{},
-             std::make_pair(ret, true));
-         });
-       }
-      });
-    });
-}
-
-LBABtree::update_ret LBABtree::update(
-  op_context_t c,
-  iterator iter,
-  lba_map_val_t val)
-{
-  LOG_PREFIX(LBATree::update);
-  DEBUGT(
-    "update element at {}",
-    c.trans,
-    iter.is_end() ? L_ADDR_MAX : iter.get_key());
-  if (!iter.leaf.node->is_pending()) {
-    CachedExtentRef mut = c.cache.duplicate_for_write(
-      c.trans, iter.leaf.node
-    );
-    iter.leaf.node = mut->cast<LBALeafNode>();
-  }
-  iter.leaf.node->update(
-    iter.leaf.node->iter_idx(iter.leaf.pos),
-    val);
-  return update_ret(
-    interruptible::ready_future_marker{},
-    iter);
-}
-
-LBABtree::remove_ret LBABtree::remove(
-  op_context_t c,
-  iterator iter)
-{
-  LOG_PREFIX(LBATree::remove);
-  DEBUGT(
-    "remove element at {}",
-    c.trans,
-    iter.is_end() ? L_ADDR_MAX : iter.get_key());
-  assert(!iter.is_end());
-  ++(c.trans.get_lba_tree_stats().num_erases);
-  return seastar::do_with(
-    iter,
-    [this, c](auto &ret) {
-      if (!ret.leaf.node->is_pending()) {
-       CachedExtentRef mut = c.cache.duplicate_for_write(
-         c.trans, ret.leaf.node
-       );
-       ret.leaf.node = mut->cast<LBALeafNode>();
-      }
-      ret.leaf.node->remove(
-       ret.leaf.node->iter_idx(ret.leaf.pos));
-
-      return handle_merge(
-       c, ret
-      );
-    });
-}
-
-LBABtree::init_cached_extent_ret LBABtree::init_cached_extent(
-  op_context_t c,
-  CachedExtentRef e)
-{
-  LOG_PREFIX(LBATree::init_cached_extent);
-  DEBUGT("extent {}", c.trans, *e);
-  if (e->is_logical()) {
-    auto logn = e->cast<LogicalCachedExtent>();
-    return lower_bound(
-      c,
-      logn->get_laddr()
-    ).si_then([FNAME, e, c, logn](auto iter) {
-      if (!iter.is_end() &&
-         iter.get_key() == logn->get_laddr() &&
-         iter.get_val().paddr == logn->get_paddr()) {
-       logn->set_pin(iter.get_pin());
-       ceph_assert(iter.get_val().len == e->get_length());
-       if (c.pins) {
-         c.pins->add_pin(
-           static_cast<BtreeLBAPin&>(logn->get_pin()).pin);
-       }
-       DEBUGT("logical extent {} live", c.trans, *logn);
-       return true;
-      } else {
-       DEBUGT("logical extent {} not live", c.trans, *logn);
-       return false;
-      }
-    });
-  } else if (e->get_type() == extent_types_t::LADDR_INTERNAL) {
-    auto eint = e->cast<LBAInternalNode>();
-    return lower_bound(
-      c, eint->get_node_meta().begin
-    ).si_then([FNAME, e, c, eint](auto iter) {
-      // Note, this check is valid even if iter.is_end()
-      depth_t cand_depth = eint->get_node_meta().depth;
-      if (cand_depth <= iter.get_depth() &&
-         &*iter.get_internal(cand_depth).node == &*eint) {
-       DEBUGT("extent {} is live", c.trans, *eint);
-       return true;
-      } else {
-       DEBUGT("extent {} is not live", c.trans, *eint);
-       return false;
-      }
-    });
-  } else if (e->get_type() == extent_types_t::LADDR_LEAF) {
-    auto eleaf = e->cast<LBALeafNode>();
-    return lower_bound(
-      c, eleaf->get_node_meta().begin
-    ).si_then([FNAME, c, e, eleaf](auto iter) {
-      // Note, this check is valid even if iter.is_end()
-      if (iter.leaf.node == &*eleaf) {
-       DEBUGT("extent {} is live", c.trans, *eleaf);
-       return true;
-      } else {
-       DEBUGT("extent {} is not live", c.trans, *eleaf);
-       return false;
-      }
-    });
-  } else {
-    DEBUGT(
-      "found other extent {} type {}",
-      c.trans,
-      *e,
-      e->get_type());
-    return init_cached_extent_ret(
-      interruptible::ready_future_marker{},
-      true);
-  }
-}
-
-LBABtree::get_internal_if_live_ret
-LBABtree::get_internal_if_live(
-  op_context_t c,
-  paddr_t addr,
-  laddr_t laddr,
-  seastore_off_t len)
-{
-  LOG_PREFIX(LBABtree::get_internal_if_live);
-  return lower_bound(
-    c, laddr
-  ).si_then([FNAME, c, addr, laddr, len](auto iter) {
-    for (depth_t d = 2; d <= iter.get_depth(); ++d) {
-      CachedExtent &node = *iter.get_internal(d).node;
-      auto internal_node = node.cast<LBAInternalNode>();
-      if (internal_node->get_paddr() == addr) {
-       DEBUGT(
-         "extent laddr {} addr {}~{} found: {}",
-         c.trans,
-         laddr,
-         addr,
-         len,
-         *internal_node);
-       assert(internal_node->get_node_meta().begin == laddr);
-       return CachedExtentRef(internal_node);
-      }
-    }
-    DEBUGT(
-      "extent laddr {} addr {}~{} is not live, no matching internal node",
-      c.trans,
-      laddr,
-      addr,
-      len);
-    return CachedExtentRef();
-  });
-}
-
-LBABtree::get_leaf_if_live_ret
-LBABtree::get_leaf_if_live(
-  op_context_t c,
-  paddr_t addr,
-  laddr_t laddr,
-  seastore_off_t len)
-{
-  LOG_PREFIX(LBABtree::get_leaf_if_live);
-  return lower_bound(
-    c, laddr
-  ).si_then([FNAME, c, addr, laddr, len](auto iter) {
-    if (iter.leaf.node->get_paddr() == addr) {
-      DEBUGT(
-       "extent laddr {} addr {}~{} found: {}",
-       c.trans,
-       laddr,
-       addr,
-       len,
-       *iter.leaf.node);
-      return CachedExtentRef(iter.leaf.node);
-    } else {
-      DEBUGT(
-       "extent laddr {} addr {}~{} is not live, does not match node {}",
-       c.trans,
-       laddr,
-       addr,
-       len,
-       *iter.leaf.node);
-      return CachedExtentRef();
-    }
-  });
-}
-
-
-LBABtree::rewrite_lba_extent_ret LBABtree::rewrite_lba_extent(
-  op_context_t c,
-  CachedExtentRef e)
-{
-  LOG_PREFIX(LBABtree::rewrite_lba_extent);
-  assert(e->get_type() == extent_types_t::LADDR_INTERNAL ||
-        e->get_type() == extent_types_t::LADDR_LEAF);
-
-  auto do_rewrite = [&](auto &lba_extent) {
-    auto nlba_extent = c.cache.alloc_new_extent<
-      std::remove_reference_t<decltype(lba_extent)>
-      >(
-      c.trans,
-      lba_extent.get_length());
-    lba_extent.get_bptr().copy_out(
-      0,
-      lba_extent.get_length(),
-      nlba_extent->get_bptr().c_str());
-    nlba_extent->pin.set_range(nlba_extent->get_node_meta());
-    nlba_extent->set_last_modified(lba_extent.get_last_modified());
-
-    /* This is a bit underhanded.  Any relative addrs here must necessarily
-     * be record relative as we are rewriting a dirty extent.  Thus, we
-     * are using resolve_relative_addrs with a (likely negative) block
-     * relative offset to correct them to block-relative offsets adjusted
-     * for our new transaction location.
-     *
-     * Upon commit, these now block relative addresses will be interpretted
-     * against the real final address.
-     */
-    nlba_extent->resolve_relative_addrs(
-      make_record_relative_paddr(0) - nlba_extent->get_paddr());
-
-    DEBUGT(
-      "rewriting {} into {}",
-      c.trans,
-      lba_extent,
-      *nlba_extent);
-
-    return update_internal_mapping(
-      c,
-      nlba_extent->get_node_meta().depth,
-      nlba_extent->get_node_meta().begin,
-      e->get_paddr(),
-      nlba_extent->get_paddr()
-    ).si_then([c, e] {
-      c.cache.retire_extent(c.trans, e);
-    });
-  };
-
-  CachedExtentRef nlba_extent;
-  if (e->get_type() == extent_types_t::LADDR_INTERNAL) {
-    auto lint = e->cast<LBAInternalNode>();
-    return do_rewrite(*lint);
-  } else {
-    assert(e->get_type() == extent_types_t::LADDR_LEAF);
-    auto lleaf = e->cast<LBALeafNode>();
-    return do_rewrite(*lleaf);
-  }
-}
-
-LBABtree::get_internal_node_ret LBABtree::get_internal_node(
-  op_context_t c,
-  depth_t depth,
-  paddr_t offset,
-  laddr_t begin,
-  laddr_t end)
-{
-  LOG_PREFIX(LBATree::get_internal_node);
-  DEBUGT(
-    "reading internal at offset {}, depth {}, begin {}, end {}",
-    c.trans,
-    offset,
-    depth,
-    begin,
-    end);
-  assert(depth > 1);
-  auto init_internal = [c, depth, begin, end](LBAInternalNode &node) {
-    assert(!node.is_pending());
-    assert(!node.pin.is_linked());
-    node.pin.set_range(lba_node_meta_t{begin, end, depth});
-    if (c.pins) {
-      c.pins->add_pin(node.pin);
-    }
-  };
-  return c.cache.get_extent<LBAInternalNode>(
-    c.trans,
-    offset,
-    LBA_BLOCK_SIZE,
-    init_internal
-  ).si_then([FNAME, c, offset, init_internal, depth, begin, end](
-             LBAInternalNodeRef ret) {
-    DEBUGT(
-      "read internal at offset {} {}",
-      c.trans,
-      offset,
-      *ret);
-    // This can only happen during init_cached_extent
-    if (c.pins && !ret->is_pending() && !ret->pin.is_linked()) {
-      assert(ret->is_dirty());
-      init_internal(*ret);
-    }
-    auto meta = ret->get_meta();
-    if (ret->get_size()) {
-      ceph_assert(meta.begin <= ret->begin()->get_key());
-      ceph_assert(meta.end > (ret->end() - 1)->get_key());
-    }
-    ceph_assert(depth == meta.depth);
-    ceph_assert(begin == meta.begin);
-    ceph_assert(end == meta.end);
-    return get_internal_node_ret(
-      interruptible::ready_future_marker{},
-      ret);
-  });
-}
-
-LBABtree::get_leaf_node_ret LBABtree::get_leaf_node(
-  op_context_t c,
-  paddr_t offset,
-  laddr_t begin,
-  laddr_t end)
-{
-  LOG_PREFIX(LBATree::get_leaf_node);
-  DEBUGT(
-    "reading leaf at offset {}, begin {}, end {}",
-    c.trans,
-    offset,
-    begin,
-    end);
-  auto init_leaf = [c, begin, end](LBALeafNode &node) {
-    assert(!node.is_pending());
-    assert(!node.pin.is_linked());
-    node.pin.set_range(lba_node_meta_t{begin, end, 1});
-    if (c.pins) {
-      c.pins->add_pin(node.pin);
-    }
-  };
-  return c.cache.get_extent<LBALeafNode>(
-    c.trans,
-    offset,
-    LBA_BLOCK_SIZE,
-    init_leaf
-  ).si_then([FNAME, c, offset, init_leaf, begin, end](LBALeafNodeRef ret) {
-    DEBUGT(
-      "read leaf at offset {} {}",
-      c.trans,
-      offset,
-      *ret);
-    // This can only happen during init_cached_extent
-    if (c.pins && !ret->is_pending() && !ret->pin.is_linked()) {
-      assert(ret->is_dirty());
-      init_leaf(*ret);
-    }
-    auto meta = ret->get_meta();
-    if (ret->get_size()) {
-      ceph_assert(meta.begin <= ret->begin()->get_key());
-      ceph_assert(meta.end > (ret->end() - 1)->get_key());
-    }
-    ceph_assert(1 == meta.depth);
-    ceph_assert(begin == meta.begin);
-    ceph_assert(end == meta.end);
-    return get_leaf_node_ret(
-      interruptible::ready_future_marker{},
-      ret);
-  });
-}
-
-LBABtree::find_insertion_ret LBABtree::find_insertion(
-  op_context_t c,
-  laddr_t laddr,
-  iterator &iter)
-{
-  assert(iter.is_end() || iter.get_key() >= laddr);
-  if (!iter.is_end() && iter.get_key() == laddr) {
-    return seastar::now();
-  } else if (iter.leaf.node->get_node_meta().begin <= laddr) {
-#ifndef NDEBUG
-    auto p = iter;
-    if (p.leaf.pos > 0) {
-      --p.leaf.pos;
-      assert(p.get_key() < laddr);
-    }
-#endif
-    return seastar::now();
-  } else {
-    assert(iter.leaf.pos == 0);
-    return iter.prev(
-      c
-    ).si_then([laddr, &iter](auto p) {
-      boost::ignore_unused(laddr); // avoid clang warning;
-      assert(p.leaf.node->get_node_meta().begin <= laddr);
-      assert(p.get_key() < laddr);
-      // Note, this is specifically allowed to violate the iterator
-      // invariant that pos is a valid index for the node in the event
-      // that the insertion point is at the end of a node.
-      p.leaf.pos++;
-      assert(p.at_boundary());
-      iter = p;
-      return seastar::now();
-    });
-  }
-}
-
-LBABtree::handle_split_ret LBABtree::handle_split(
-  op_context_t c,
-  iterator &iter)
-{
-  LOG_PREFIX(LBATree::handle_split);
-
-  depth_t split_from = iter.check_split();
-
-  DEBUGT("split_from {}, depth {}", c.trans, split_from, iter.get_depth());
-
-  if (split_from == iter.get_depth()) {
-    auto nroot = c.cache.alloc_new_extent<LBAInternalNode>(
-      c.trans, LBA_BLOCK_SIZE);
-    lba_node_meta_t meta{0, L_ADDR_MAX, iter.get_depth() + 1};
-    nroot->set_meta(meta);
-    nroot->pin.set_range(meta);
-    nroot->journal_insert(
-      std::cbegin(*nroot),
-      L_ADDR_MIN,
-      root.get_location(),
-      nullptr);
-    iter.internal.push_back({nroot, 0});
-
-    root.set_location(nroot->get_paddr());
-    root.set_depth(iter.get_depth());
-    c.trans.get_lba_tree_stats().depth = iter.get_depth();
-    root_dirty = true;
-  }
-
-  /* pos may be either node_position_t<LBALeafNode> or
-   * node_position_t<LBAInternalNode> */
-  auto split_level = [&, FNAME](auto &parent_pos, auto &pos) {
-    auto [left, right, pivot] = pos.node->make_split_children(c);
-
-    auto parent_node = parent_pos.node;
-    auto parent_iter = parent_pos.get_iter();
-
-    parent_node->update(
-      parent_iter,
-      left->get_paddr());
-    parent_node->insert(
-      parent_iter + 1,
-      pivot,
-      right->get_paddr());
-
-    DEBUGT("splitted {} into left: {}, right: {}",
-      c.trans,
-      *pos.node,
-      *left,
-      *right);
-    c.cache.retire_extent(c.trans, pos.node);
-
-    return std::make_pair(left, right);
-  };
-
-  for (; split_from > 0; --split_from) {
-    auto &parent_pos = iter.get_internal(split_from + 1);
-    if (!parent_pos.node->is_pending()) {
-      parent_pos.node = c.cache.duplicate_for_write(
-       c.trans, parent_pos.node
-      )->cast<LBAInternalNode>();
-    }
-
-    if (split_from > 1) {
-      auto &pos = iter.get_internal(split_from);
-      DEBUGT("splitting internal {} at depth {}, parent: {} at pos: {}",
-       c.trans,
-       *pos.node,
-       split_from,
-       *parent_pos.node,
-       parent_pos.pos);
-      auto [left, right] = split_level(parent_pos, pos);
-
-      if (pos.pos < left->get_size()) {
-       pos.node = left;
-      } else {
-       pos.node = right;
-       pos.pos -= left->get_size();
-
-       parent_pos.pos += 1;
-      }
-    } else {
-      auto &pos = iter.leaf;
-      DEBUGT("splitting leaf {}, parent: {} at pos: {}",
-       c.trans,
-       *pos.node,
-       *parent_pos.node,
-       parent_pos.pos);
-      auto [left, right] = split_level(parent_pos, pos);
-
-      /* right->get_node_meta().begin == pivot == right->begin()->get_key()
-       * Thus, if pos.pos == left->get_size(), we want iter to point to
-       * left with pos.pos at the end rather than right with pos.pos = 0
-       * since the insertion would be to the left of the first element
-       * of right and thus necessarily less than right->get_node_meta().begin.
-       */
-      if (pos.pos <= left->get_size()) {
-       pos.node = left;
-      } else {
-       pos.node = right;
-       pos.pos -= left->get_size();
-
-       parent_pos.pos += 1;
-      }
-    }
-  }
-
-  return seastar::now();
-}
-
-template <typename NodeType>
-LBABtree::base_iertr::future<typename NodeType::Ref> get_node(
-  op_context_t c,
-  depth_t depth,
-  paddr_t addr,
-  laddr_t begin,
-  laddr_t end);
-
-template <>
-LBABtree::base_iertr::future<LBALeafNodeRef> get_node<LBALeafNode>(
-  op_context_t c,
-  depth_t depth,
-  paddr_t addr,
-  laddr_t begin,
-  laddr_t end) {
-  assert(depth == 1);
-  return LBABtree::get_leaf_node(c, addr, begin, end);
-}
-
-template <>
-LBABtree::base_iertr::future<LBAInternalNodeRef> get_node<LBAInternalNode>(
-  op_context_t c,
-  depth_t depth,
-  paddr_t addr,
-  laddr_t begin,
-  laddr_t end) {
-  return LBABtree::get_internal_node(c, depth, addr, begin, end);
-}
-
-template <typename NodeType>
-LBABtree::handle_merge_ret merge_level(
-  op_context_t c,
-  depth_t depth,
-  LBABtree::node_position_t<LBAInternalNode> &parent_pos,
-  LBABtree::node_position_t<NodeType> &pos)
-{
-  LOG_PREFIX(LBABtree::merge_level);
-  if (!parent_pos.node->is_pending()) {
-    parent_pos.node = c.cache.duplicate_for_write(
-      c.trans, parent_pos.node
-    )->cast<LBAInternalNode>();
-  }
-
-  auto iter = parent_pos.get_iter();
-  assert(iter.get_offset() < parent_pos.node->get_size());
-  bool donor_is_left = ((iter.get_offset() + 1) == parent_pos.node->get_size());
-  auto donor_iter = donor_is_left ? (iter - 1) : (iter + 1);
-  auto next_iter = donor_iter + 1;
-  auto begin = donor_iter->get_key();
-  auto end = next_iter == parent_pos.node->end()
-    ? parent_pos.node->get_node_meta().end
-    : next_iter->get_key();
-  
-  DEBUGT("parent: {}, node: {}", c.trans, *parent_pos.node, *pos.node);
-  return get_node<NodeType>(
-    c,
-    depth,
-    donor_iter.get_val().maybe_relative_to(parent_pos.node->get_paddr()),
-    begin,
-    end
-  ).si_then([FNAME, c, iter, donor_iter, donor_is_left, &parent_pos, &pos](
-             typename NodeType::Ref donor) {
-    auto [l, r] = donor_is_left ?
-      std::make_pair(donor, pos.node) : std::make_pair(pos.node, donor);
-
-    auto [liter, riter] = donor_is_left ?
-      std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter);
-
-    if (donor->at_min_capacity()) {
-      auto replacement = l->make_full_merge(c, r);
-
-      parent_pos.node->update(
-       liter,
-       replacement->get_paddr());
-      parent_pos.node->remove(riter);
-
-      pos.node = replacement;
-      if (donor_is_left) {
-       pos.pos += r->get_size();
-       parent_pos.pos--;
-      }
-
-      DEBUGT("l: {}, r: {}, replacement: {}", c.trans, *l, *r, *replacement);
-      c.cache.retire_extent(c.trans, l);
-      c.cache.retire_extent(c.trans, r);
-    } else {
-      auto [replacement_l, replacement_r, pivot] =
-       l->make_balanced(
-         c,
-         r,
-         !donor_is_left);
-
-      parent_pos.node->update(
-       liter,
-       replacement_l->get_paddr());
-      parent_pos.node->replace(
-       riter,
-       pivot,
-       replacement_r->get_paddr());
-
-      if (donor_is_left) {
-       assert(parent_pos.pos > 0);
-       parent_pos.pos--;
-      }
-
-      auto orig_position = donor_is_left ?
-       l->get_size() + pos.pos :
-       pos.pos;
-      if (orig_position < replacement_l->get_size()) {
-       pos.node = replacement_l;
-       pos.pos = orig_position;
-      } else {
-       parent_pos.pos++;
-       pos.node = replacement_r;
-       pos.pos = orig_position - replacement_l->get_size();
-      }
-
-      DEBUGT("l: {}, r: {}, replacement_l: {}, replacement_r: {}",
-       c.trans, *l, *r, *replacement_l, *replacement_r);
-      c.cache.retire_extent(c.trans, l);
-      c.cache.retire_extent(c.trans, r);
-    }
-
-    return seastar::now();
-  });
-}
-
-LBABtree::handle_merge_ret LBABtree::handle_merge(
-  op_context_t c,
-  iterator &iter)
-{
-  LOG_PREFIX(LBATree::handle_merge);
-  if (iter.get_depth() == 1 ||
-      !iter.leaf.node->below_min_capacity()) {
-    DEBUGT(
-      "no need to merge leaf, leaf size {}, depth {}",
-      c.trans,
-      iter.leaf.node->get_size(),
-      iter.get_depth());
-    return seastar::now();
-  }
-
-  return seastar::do_with(
-    depth_t{1},
-    [FNAME, this, c, &iter](auto &to_merge) {
-      return trans_intr::repeat(
-       [FNAME, this, c, &iter, &to_merge] {
-         DEBUGT(
-           "merging depth {}",
-           c.trans,
-           to_merge);
-         auto &parent_pos = iter.get_internal(to_merge + 1);
-         auto merge_fut = handle_merge_iertr::now();
-         if (to_merge > 1) {
-           auto &pos = iter.get_internal(to_merge);
-           merge_fut = merge_level(c, to_merge, parent_pos, pos);
-         } else {
-           auto &pos = iter.leaf;
-           merge_fut = merge_level(c, to_merge, parent_pos, pos);
-         }
-
-         return merge_fut.si_then([FNAME, this, c, &iter, &to_merge] {
-           ++to_merge;
-           auto &pos = iter.get_internal(to_merge);
-           if (to_merge == iter.get_depth()) {
-             if (pos.node->get_size() == 1) {
-               DEBUGT("collapsing root", c.trans);
-               c.cache.retire_extent(c.trans, pos.node);
-               assert(pos.pos == 0);
-               auto node_iter = pos.get_iter();
-               root.set_location(
-                 node_iter->get_val().maybe_relative_to(pos.node->get_paddr()));
-               iter.internal.pop_back();
-               root.set_depth(iter.get_depth());
-               c.trans.get_lba_tree_stats().depth = iter.get_depth();
-               root_dirty = true;
-             } else {
-               DEBUGT("no need to collapse root", c.trans);
-             }
-             return seastar::stop_iteration::yes;
-           } else if (pos.node->below_min_capacity()) {
-             DEBUGT(
-               "continuing, next node {} depth {} at min",
-               c.trans,
-               *pos.node,
-               to_merge);
-             return seastar::stop_iteration::no;
-           } else {
-             DEBUGT(
-               "complete, next node {} depth {} not min",
-               c.trans,
-               *pos.node,
-               to_merge);
-             return seastar::stop_iteration::yes;
-           }
-         });
-       });
-    });
-}
-
-LBABtree::update_internal_mapping_ret LBABtree::update_internal_mapping(
-  op_context_t c,
-  depth_t depth,
-  laddr_t laddr,
-  paddr_t old_addr,
-  paddr_t new_addr)
-{
-  LOG_PREFIX(LBATree::update_internal_mapping);
-  DEBUGT(
-    "updating laddr {} at depth {} from {} to {}",
-    c.trans,
-    laddr,
-    depth,
-    old_addr,
-    new_addr);
-
-  return lower_bound(
-    c, laddr
-  ).si_then([=](auto iter) {
-    assert(iter.get_depth() >= depth);
-    if (depth == iter.get_depth()) {
-      DEBUGT("update at root", c.trans);
-
-      if (laddr != 0) {
-       ERRORT(
-         "updating root laddr {} at depth {} from {} to {},"
-         "laddr is not 0",
-         c.trans,
-         laddr,
-         depth,
-         old_addr,
-         new_addr,
-         root.get_location());
-       ceph_assert(0 == "impossible");
-      }
-
-      if (root.get_location() != old_addr) {
-       ERRORT(
-         "updating root laddr {} at depth {} from {} to {},"
-         "root addr {} does not match",
-         c.trans,
-         laddr,
-         depth,
-         old_addr,
-         new_addr,
-         root.get_location());
-       ceph_assert(0 == "impossible");
-      }
-
-      root.set_location(new_addr);
-      root_dirty = true;
-    } else {
-      auto &parent = iter.get_internal(depth + 1);
-      assert(parent.node);
-      assert(parent.pos < parent.node->get_size());
-      auto piter = parent.node->iter_idx(parent.pos);
-
-      if (piter->get_key() != laddr) {
-       ERRORT(
-         "updating laddr {} at depth {} from {} to {},"
-         "node {} pos {} val pivot addr {} does not match",
-         c.trans,
-         laddr,
-         depth,
-         old_addr,
-         new_addr,
-         *(parent.node),
-         parent.pos,
-         piter->get_key());
-       ceph_assert(0 == "impossible");
-      }
-
-
-      if (piter->get_val() != old_addr) {
-       ERRORT(
-         "updating laddr {} at depth {} from {} to {},"
-         "node {} pos {} val addr {} does not match",
-         c.trans,
-         laddr,
-         depth,
-         old_addr,
-         new_addr,
-         *(parent.node),
-         parent.pos,
-         piter->get_val());
-       ceph_assert(0 == "impossible");
-      }
-
-      CachedExtentRef mut = c.cache.duplicate_for_write(
-       c.trans,
-       parent.node
-      );
-      LBAInternalNodeRef mparent = mut->cast<LBAInternalNode>();
-      mparent->update(piter, new_addr);
-
-      /* Note, iter is now invalid as we didn't udpate either the parent
-       * node reference to the new mutable instance nor did we update the
-       * child pointer to the new node.  Not a problem as we'll now just
-       * destruct it.
-       */
-    }
-    return seastar::now();
-  });
-}
-}
diff --git a/src/crimson/os/seastore/lba_manager/btree/lba_btree.h b/src/crimson/os/seastore/lba_manager/btree/lba_btree.h
deleted file mode 100644 (file)
index 0ba4592..0000000
+++ /dev/null
@@ -1,702 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#pragma once
-
-#include <boost/container/static_vector.hpp>
-#include <sys/mman.h>
-#include <memory>
-#include <string.h>
-
-#include "crimson/os/seastore/lba_manager.h"
-#include "crimson/os/seastore/logging.h"
-#include "crimson/os/seastore/seastore_types.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
-
-namespace crimson::os::seastore::lba_manager::btree {
-
-
-class LBABtree {
-  static constexpr size_t MAX_DEPTH = 16;
-public:
-  using base_iertr = LBAManager::base_iertr;
-
-  class iterator;
-  using iterator_fut = base_iertr::future<iterator>;
-
-  using mapped_space_visitor_t = LBAManager::scan_mapped_space_func_t;
-
-  class iterator {
-  public:
-    iterator(const iterator &rhs) noexcept :
-      internal(rhs.internal), leaf(rhs.leaf) {}
-    iterator(iterator &&rhs) noexcept :
-      internal(std::move(rhs.internal)), leaf(std::move(rhs.leaf)) {}
-
-    iterator &operator=(const iterator &) = default;
-    iterator &operator=(iterator &&) = default;
-
-    iterator_fut next(
-      op_context_t c,
-      mapped_space_visitor_t *visit=nullptr) const;
-
-    iterator_fut prev(op_context_t c) const;
-
-    void assert_valid() const {
-      assert(leaf.node);
-      assert(leaf.pos <= leaf.node->get_size());
-
-      for (auto &i: internal) {
-       (void)i;
-       assert(i.node);
-       assert(i.pos < i.node->get_size());
-      }
-    }
-
-    depth_t get_depth() const {
-      return internal.size() + 1;
-    }
-
-    auto &get_internal(depth_t depth) {
-      assert(depth > 1);
-      assert((depth - 2) < internal.size());
-      return internal[depth - 2];
-    }
-
-    const auto &get_internal(depth_t depth) const {
-      assert(depth > 1);
-      assert((depth - 2) < internal.size());
-      return internal[depth - 2];
-    }
-
-    laddr_t get_key() const {
-      assert(!is_end());
-      return leaf.node->iter_idx(leaf.pos).get_key();
-    }
-    lba_map_val_t get_val() const {
-      assert(!is_end());
-      auto ret = leaf.node->iter_idx(leaf.pos).get_val();
-      ret.paddr = ret.paddr.maybe_relative_to(leaf.node->get_paddr());
-      return ret;
-    }
-
-    bool is_end() const {
-      // external methods may only resolve at a boundary if at end
-      return at_boundary();
-    }
-
-    bool is_begin() const {
-      for (auto &i: internal) {
-       if (i.pos != 0)
-         return false;
-      }
-      return leaf.pos == 0;
-    }
-
-    LBAPinRef get_pin() const {
-      assert(!is_end());
-      auto val = get_val();
-      auto key = get_key();
-      return std::make_unique<BtreeLBAPin>(
-       leaf.node,
-       val.paddr,
-       lba_node_meta_t{ key, key + val.len, 0 });
-    }
-
-  private:
-    iterator() noexcept {}
-    iterator(depth_t depth) noexcept : internal(depth - 1) {}
-
-    friend class LBABtree;
-    static constexpr uint16_t INVALID = std::numeric_limits<uint16_t>::max();
-    template <typename NodeType>
-    struct node_position_t {
-      typename NodeType::Ref node;
-      uint16_t pos = INVALID;
-
-      void reset() {
-       *this = node_position_t{};
-      }
-
-      auto get_iter() {
-       assert(pos != INVALID);
-       assert(pos < node->get_size());
-       return node->iter_idx(pos);
-      }
-    };
-    boost::container::static_vector<
-      node_position_t<LBAInternalNode>, MAX_DEPTH> internal;
-    node_position_t<LBALeafNode> leaf;
-
-    bool at_boundary() const {
-      assert(leaf.pos <= leaf.node->get_size());
-      return leaf.pos == leaf.node->get_size();
-    }
-
-    using handle_boundary_ertr = base_iertr;
-    using handle_boundary_ret = handle_boundary_ertr::future<>;
-    handle_boundary_ret handle_boundary(
-      op_context_t c,
-      mapped_space_visitor_t *visitor);
-
-    depth_t check_split() const {
-      if (!leaf.node->at_max_capacity()) {
-       return 0;
-      }
-      for (depth_t split_from = 1; split_from < get_depth(); ++split_from) {
-       if (!get_internal(split_from + 1).node->at_max_capacity())
-         return split_from;
-      }
-      return get_depth();
-    }
-
-    depth_t check_merge() const {
-      if (!leaf.node->below_min_capacity()) {
-       return 0;
-      }
-      for (depth_t merge_from = 1; merge_from < get_depth(); ++merge_from) {
-       if (!get_internal(merge_from + 1).node->below_min_capacity())
-         return merge_from;
-      }
-      return get_depth();
-    }
-  };
-
-  LBABtree(lba_root_t root) : root(root) {}
-
-  bool is_root_dirty() const {
-    return root_dirty;
-  }
-  lba_root_t get_root_undirty() {
-    ceph_assert(root_dirty);
-    root_dirty = false;
-    return root;
-  }
-
-  /// mkfs
-  using mkfs_ret = lba_root_t;
-  static mkfs_ret mkfs(op_context_t c);
-
-  /**
-   * lower_bound
-   *
-   * @param c [in] context
-   * @param addr [in] ddr
-   * @return least iterator >= key
-   */
-  iterator_fut lower_bound(
-    op_context_t c,
-    laddr_t addr,
-    mapped_space_visitor_t *visit=nullptr) const;
-
-  /**
-   * upper_bound
-   *
-   * @param c [in] context
-   * @param addr [in] ddr
-   * @return least iterator > key
-   */
-  iterator_fut upper_bound(
-    op_context_t c,
-    laddr_t addr
-  ) const {
-    return lower_bound(
-      c, addr
-    ).si_then([c, addr](auto iter) {
-      if (!iter.is_end() && iter.get_key() == addr) {
-       return iter.next(c);
-      } else {
-       return iterator_fut(
-         interruptible::ready_future_marker{},
-         iter);
-      }
-    });
-  }
-
-  /**
-   * upper_bound_right
-   *
-   * @param c [in] context
-   * @param addr [in] addr
-   * @return least iterator i s.t. i.get_key() + i.get_val().len > key
-   */
-  iterator_fut upper_bound_right(
-    op_context_t c,
-    laddr_t addr) const
-  {
-    return lower_bound(
-      c, addr
-    ).si_then([c, addr](auto iter) {
-      if (iter.is_begin()) {
-       return iterator_fut(
-         interruptible::ready_future_marker{},
-         iter);
-      } else {
-       return iter.prev(
-         c
-       ).si_then([iter, addr](auto prev) {
-         if ((prev.get_key() + prev.get_val().len) > addr) {
-           return iterator_fut(
-             interruptible::ready_future_marker{},
-             prev);
-         } else {
-           return iterator_fut(
-             interruptible::ready_future_marker{},
-             iter);
-         }
-       });
-      }
-    });
-  }
-
-  iterator_fut begin(op_context_t c) const {
-    return lower_bound(c, 0);
-  }
-  iterator_fut end(op_context_t c) const {
-    return upper_bound(c, L_ADDR_MAX);
-  }
-
-  using iterate_repeat_ret_inner = base_iertr::future<
-    seastar::stop_iteration>;
-  template <typename F>
-  static base_iertr::future<> iterate_repeat(
-    op_context_t c,
-    iterator_fut &&iter_fut,
-    F &&f,
-    mapped_space_visitor_t *visitor=nullptr) {
-    return std::move(
-      iter_fut
-    ).si_then([c, visitor, f=std::forward<F>(f)](auto iter) {
-      return seastar::do_with(
-       iter,
-       std::move(f),
-       [c, visitor](auto &pos, auto &f) {
-         return trans_intr::repeat(
-           [c, visitor, &f, &pos] {
-             return f(
-               pos
-             ).si_then([c, visitor, &pos](auto done) {
-               if (done == seastar::stop_iteration::yes) {
-                 return iterate_repeat_ret_inner(
-                   interruptible::ready_future_marker{},
-                   seastar::stop_iteration::yes);
-               } else {
-                 ceph_assert(!pos.is_end());
-                 return pos.next(
-                   c, visitor
-                 ).si_then([&pos](auto next) {
-                   pos = next;
-                   return iterate_repeat_ret_inner(
-                     interruptible::ready_future_marker{},
-                     seastar::stop_iteration::no);
-                 });
-               }
-             });
-           });
-       });
-    });
-  }
-
-  /**
-   * insert
-   *
-   * Inserts val at laddr with iter as a hint.  If element at laddr already
-   * exists returns iterator to that element unchanged and returns false.
-   *
-   * Invalidates all outstanding iterators for this tree on this transaction.
-   *
-   * @param c [in] op context
-   * @param iter [in] hint, insertion constant if immediately prior to iter
-   * @param laddr [in] addr at which to insert
-   * @param val [in] val to insert
-   * @return pair<iter, bool> where iter points to element at addr, bool true
-   *         iff element at laddr did not exist.
-   */
-  using insert_iertr = base_iertr;
-  using insert_ret = insert_iertr::future<std::pair<iterator, bool>>;
-  insert_ret insert(
-    op_context_t c,
-    iterator iter,
-    laddr_t laddr,
-    lba_map_val_t val
-  );
-  insert_ret insert(
-    op_context_t c,
-    laddr_t laddr,
-    lba_map_val_t val) {
-    return lower_bound(
-      c, laddr
-    ).si_then([this, c, laddr, val](auto iter) {
-      return insert(c, iter, laddr, val);
-    });
-  }
-
-  /**
-   * update
-   *
-   * Invalidates all outstanding iterators for this tree on this transaction.
-   *
-   * @param c [in] op context
-   * @param iter [in] iterator to element to update, must not be end
-   * @param val [in] val with which to update
-   * @return iterator to newly updated element
-   */
-  using update_iertr = base_iertr;
-  using update_ret = update_iertr::future<iterator>;
-  update_ret update(
-    op_context_t c,
-    iterator iter,
-    lba_map_val_t val);
-
-  /**
-   * remove
-   *
-   * Invalidates all outstanding iterators for this tree on this transaction.
-   *
-   * @param c [in] op context
-   * @param iter [in] iterator to element to remove, must not be end
-   */
-  using remove_iertr = base_iertr;
-  using remove_ret = remove_iertr::future<>;
-  remove_ret remove(
-    op_context_t c,
-    iterator iter);
-
-  /**
-   * init_cached_extent
-   *
-   * Checks whether e is live (reachable from lba tree) and drops or initializes
-   * accordingly.
-   *
-   * Returns if e is live.
-   */
-  using init_cached_extent_iertr = base_iertr;
-  using init_cached_extent_ret = init_cached_extent_iertr::future<bool>;
-  init_cached_extent_ret init_cached_extent(op_context_t c, CachedExtentRef e);
-
-  /// get_leaf_if_live: get leaf node at laddr/addr if still live
-  using get_leaf_if_live_iertr = base_iertr;
-  using get_leaf_if_live_ret = get_leaf_if_live_iertr::future<CachedExtentRef>;
-  get_leaf_if_live_ret get_leaf_if_live(
-    op_context_t c,
-    paddr_t addr,
-    laddr_t laddr,
-    seastore_off_t len);
-
-  /// get_internal_if_live: get internal node at laddr/addr if still live
-  using get_internal_if_live_iertr = base_iertr;
-  using get_internal_if_live_ret = get_internal_if_live_iertr::future<CachedExtentRef>;
-  get_internal_if_live_ret get_internal_if_live(
-    op_context_t c,
-    paddr_t addr,
-    laddr_t laddr,
-    seastore_off_t len);
-
-  /**
-   * rewrite_lba_extent
-   *
-   * Rewrites a fresh copy of extent into transaction and updates internal
-   * references.
-   */
-  using rewrite_lba_extent_iertr = base_iertr;
-  using rewrite_lba_extent_ret = rewrite_lba_extent_iertr::future<>;
-  rewrite_lba_extent_ret rewrite_lba_extent(op_context_t c, CachedExtentRef e);
-
-private:
-  lba_root_t root;
-  bool root_dirty = false;
-
-  using get_internal_node_iertr = base_iertr;
-  using get_internal_node_ret = get_internal_node_iertr::future<LBAInternalNodeRef>;
-  static get_internal_node_ret get_internal_node(
-    op_context_t c,
-    depth_t depth,
-    paddr_t offset,
-    laddr_t begin,
-    laddr_t end);
-
-  using get_leaf_node_iertr = base_iertr;
-  using get_leaf_node_ret = get_leaf_node_iertr::future<LBALeafNodeRef>;
-  static get_leaf_node_ret get_leaf_node(
-    op_context_t c,
-    paddr_t offset,
-    laddr_t begin,
-    laddr_t end);
-
-  using lookup_root_iertr = base_iertr;
-  using lookup_root_ret = lookup_root_iertr::future<>;
-  lookup_root_ret lookup_root(
-    op_context_t c,
-    iterator &iter,
-    mapped_space_visitor_t *visitor) const {
-    if (root.get_depth() > 1) {
-      return get_internal_node(
-       c,
-       root.get_depth(),
-       root.get_location(),
-       0,
-       L_ADDR_MAX
-      ).si_then([this, visitor, &iter](LBAInternalNodeRef root_node) {
-       iter.get_internal(root.get_depth()).node = root_node;
-       if (visitor) (*visitor)(root_node->get_paddr(), root_node->get_length());
-       return lookup_root_iertr::now();
-      });
-    } else {
-      return get_leaf_node(
-       c,
-       root.get_location(),
-       0,
-       L_ADDR_MAX
-      ).si_then([visitor, &iter](LBALeafNodeRef root_node) {
-       iter.leaf.node = root_node;
-       if (visitor) (*visitor)(root_node->get_paddr(), root_node->get_length());
-       return lookup_root_iertr::now();
-      });
-    }
-  }
-
-  using lookup_internal_level_iertr = base_iertr;
-  using lookup_internal_level_ret = lookup_internal_level_iertr::future<>;
-  template <typename F>
-  static lookup_internal_level_ret lookup_internal_level(
-    op_context_t c,
-    depth_t depth,
-    iterator &iter,
-    F &f,
-    mapped_space_visitor_t *visitor
-  ) {
-    assert(depth > 1);
-    auto &parent_entry = iter.get_internal(depth + 1);
-    auto parent = parent_entry.node;
-    auto node_iter = parent->iter_idx(parent_entry.pos);
-    auto next_iter = node_iter + 1;
-    auto begin = node_iter->get_key();
-    auto end = next_iter == parent->end()
-      ? parent->get_node_meta().end
-      : next_iter->get_key();
-    return get_internal_node(
-      c,
-      depth,
-      node_iter->get_val().maybe_relative_to(parent->get_paddr()),
-      begin,
-      end
-    ).si_then([depth, visitor, &iter, &f](LBAInternalNodeRef node) {
-      auto &entry = iter.get_internal(depth);
-      entry.node = node;
-      auto node_iter = f(*node);
-      assert(node_iter != node->end());
-      entry.pos = node_iter->get_offset();
-      if (visitor) (*visitor)(node->get_paddr(), node->get_length());
-      return seastar::now();
-    });
-  }
-
-  using lookup_leaf_iertr = base_iertr;
-  using lookup_leaf_ret = lookup_leaf_iertr::future<>;
-  template <typename F>
-  static lookup_internal_level_ret lookup_leaf(
-    op_context_t c,
-    iterator &iter,
-    F &f,
-    mapped_space_visitor_t *visitor
-  ) {
-    auto &parent_entry = iter.get_internal(2);
-    auto parent = parent_entry.node;
-    assert(parent);
-    auto node_iter = parent->iter_idx(parent_entry.pos);
-    auto next_iter = node_iter + 1;
-    auto begin = node_iter->get_key();
-    auto end = next_iter == parent->end()
-      ? parent->get_node_meta().end
-      : next_iter->get_key();
-
-    return get_leaf_node(
-      c,
-      node_iter->get_val().maybe_relative_to(parent->get_paddr()),
-      begin,
-      end
-    ).si_then([visitor, &iter, &f](LBALeafNodeRef node) {
-      iter.leaf.node = node;
-      auto node_iter = f(*node);
-      iter.leaf.pos = node_iter->get_offset();
-      if (visitor) (*visitor)(node->get_paddr(), node->get_length());
-      return seastar::now();
-    });
-  }
-
-  /**
-   * lookup_depth_range
-   *
-   * Performs node lookups on depths [from, to) using li and ll to
-   * specific target at each level.  Note, may leave the iterator
-   * at_boundary(), call handle_boundary() prior to returning out
-   * lf LBABtree.
-   */
-  using lookup_depth_range_iertr = base_iertr;
-  using lookup_depth_range_ret = lookup_depth_range_iertr::future<>;
-  template <typename LI, typename LL>
-  static lookup_depth_range_ret lookup_depth_range(
-    op_context_t c, ///< [in] context
-    iterator &iter, ///< [in,out] iterator to populate
-    depth_t from,   ///< [in] from inclusive
-    depth_t to,     ///< [in] to exclusive, (to <= from, to == from is a noop)
-    LI &li,         ///< [in] internal->iterator
-    LL &ll,         ///< [in] leaf->iterator
-    mapped_space_visitor_t *visitor ///< [in] mapped space visitor
-  ) {
-    LOG_PREFIX(LBATree::lookup_depth_range);
-    SUBDEBUGT(seastore_lba_details, "{} -> {}", c.trans, from, to);
-    return seastar::do_with(
-      from,
-      [c, to, visitor, &iter, &li, &ll](auto &d) {
-       return trans_intr::repeat(
-         [c, to, visitor, &iter, &li, &ll, &d] {
-           if (d > to) {
-             return [&] {
-               if (d > 1) {
-                 return lookup_internal_level(
-                   c,
-                   d,
-                   iter,
-                   li,
-                   visitor);
-               } else {
-                 assert(d == 1);
-                 return lookup_leaf(
-                   c,
-                   iter,
-                   ll,
-                   visitor);
-               }
-             }().si_then([&d] {
-               --d;
-               return lookup_depth_range_iertr::make_ready_future<
-                 seastar::stop_iteration
-                 >(seastar::stop_iteration::no);
-             });
-           } else {
-             return lookup_depth_range_iertr::make_ready_future<
-               seastar::stop_iteration
-               >(seastar::stop_iteration::yes);
-           }
-         });
-      });
-  }
-
-  using lookup_iertr = base_iertr;
-  using lookup_ret = lookup_iertr::future<iterator>;
-  template <typename LI, typename LL>
-  lookup_ret lookup(
-    op_context_t c,
-    LI &&lookup_internal,
-    LL &&lookup_leaf,
-    mapped_space_visitor_t *visitor
-  ) const {
-    LOG_PREFIX(LBATree::lookup);
-    return seastar::do_with(
-      iterator{root.get_depth()},
-      std::forward<LI>(lookup_internal),
-      std::forward<LL>(lookup_leaf),
-      [FNAME, this, visitor, c](auto &iter, auto &li, auto &ll) {
-       return lookup_root(
-         c, iter, visitor
-       ).si_then([FNAME, this, visitor, c, &iter, &li, &ll] {
-         if (iter.get_depth() > 1) {
-           auto &root_entry = *(iter.internal.rbegin());
-           root_entry.pos = li(*(root_entry.node)).get_offset();
-         } else {
-           auto &root_entry = iter.leaf;
-           auto riter = ll(*(root_entry.node));
-           root_entry.pos = riter->get_offset();
-         }
-         SUBDEBUGT(seastore_lba_details, "got root, depth {}", c.trans, root.get_depth());
-         return lookup_depth_range(
-           c,
-           iter,
-           root.get_depth() - 1,
-           0,
-           li,
-           ll,
-           visitor
-         ).si_then([c, visitor, &iter] {
-           if (iter.at_boundary()) {
-             return iter.handle_boundary(c, visitor);
-           } else {
-             return lookup_iertr::now();
-           }
-         });
-       }).si_then([&iter] {
-         return std::move(iter);
-       });
-      });
-  }
-
-  /**
-   * handle_split
-   *
-   * Prepare iter for insertion.  iter should begin pointing at
-   * the valid insertion point (lower_bound(laddr)).
-   *
-   * Upon completion, iter will point at the
-   * position at which laddr should be inserted.  iter may, upon completion,
-   * point at the end of a leaf other than the end leaf if that's the correct
-   * insertion point.
-   */
-  using find_insertion_iertr = base_iertr;
-  using find_insertion_ret = find_insertion_iertr::future<>;
-  static find_insertion_ret find_insertion(
-    op_context_t c,
-    laddr_t laddr,
-    iterator &iter);
-
-  /**
-   * handle_split
-   *
-   * Split nodes in iter as needed for insertion. First, scan iter from leaf
-   * to find first non-full level.  Then, split from there towards leaf.
-   *
-   * Upon completion, iter will point at the newly split insertion point.  As
-   * with find_insertion, iter's leaf pointer may be end without iter being
-   * end.
-   */
-  using handle_split_iertr = base_iertr;
-  using handle_split_ret = handle_split_iertr::future<>;
-  handle_split_ret handle_split(
-    op_context_t c,
-    iterator &iter);
-
-  using handle_merge_iertr = base_iertr;
-  using handle_merge_ret = handle_merge_iertr::future<>;
-  handle_merge_ret handle_merge(
-    op_context_t c,
-    iterator &iter);
-
-  using update_internal_mapping_iertr = base_iertr;
-  using update_internal_mapping_ret = update_internal_mapping_iertr::future<>;
-  update_internal_mapping_ret update_internal_mapping(
-    op_context_t c,
-    depth_t depth,
-    laddr_t laddr,
-    paddr_t old_addr,
-    paddr_t new_addr);
-
-  template <typename T>
-  using node_position_t = iterator::node_position_t<T>;
-
-  template <typename NodeType>
-  friend base_iertr::future<typename NodeType::Ref> get_node(
-    op_context_t c,
-    depth_t depth,
-    paddr_t addr,
-    laddr_t begin,
-    laddr_t end);
-
-  template <typename NodeType>
-  friend handle_merge_ret merge_level(
-    op_context_t c,
-    depth_t depth,
-    node_position_t<LBAInternalNode> &parent_pos,
-    node_position_t<NodeType> &pos);
-};
-
-}
index 683efbed46a7efde2a74c50c01c7c48c5ae38a59..8b2530e7c91e4221acb07e015933316769677e1a 100644 (file)
 #include "crimson/os/seastore/seastore_types.h"
 #include "crimson/os/seastore/cache.h"
 #include "crimson/os/seastore/cached_extent.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
-#include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h"
+
+#include "crimson/os/seastore/btree/btree_range_pin.h"
+#include "crimson/os/seastore/btree/fixed_kv_btree.h"
 
 namespace crimson::os::seastore::lba_manager::btree {
 
 using base_iertr = LBAManager::base_iertr;
 
-struct op_context_t {
-  Cache &cache;
-  Transaction &trans;
-  btree_pin_set_t *pins = nullptr;
-};
-
 /**
  * lba_map_val_t
  *
@@ -57,15 +52,12 @@ WRITE_EQ_OPERATORS_4(
 
 std::ostream& operator<<(std::ostream& out, const lba_map_val_t&);
 
-class BtreeLBAPin;
-using BtreeLBAPinRef = std::unique_ptr<BtreeLBAPin>;
-
 constexpr size_t LBA_BLOCK_SIZE = 4096;
 
 /**
  * lba_node_meta_le_t
  *
- * On disk layout for lba_node_meta_t
+ * On disk layout for fixed_kv_node_meta_t
  */
 struct lba_node_meta_le_t {
   laddr_le_t begin = laddr_le_t(0);
@@ -74,13 +66,13 @@ struct lba_node_meta_le_t {
 
   lba_node_meta_le_t() = default;
   lba_node_meta_le_t(const lba_node_meta_le_t &) = default;
-  explicit lba_node_meta_le_t(const lba_node_meta_t &val)
+  explicit lba_node_meta_le_t(const fixed_kv_node_meta_t<laddr_t> &val)
     : begin(ceph_le64(val.begin)),
       end(ceph_le64(val.end)),
       depth(init_depth_le(val.depth)) {}
 
-  operator lba_node_meta_t() const {
-    return lba_node_meta_t{ begin, end, depth };
+  operator fixed_kv_node_meta_t<laddr_t>() const {
+    return fixed_kv_node_meta_t<laddr_t>{ begin, end, depth };
   }
 };
 
@@ -92,13 +84,13 @@ struct lba_node_meta_le_t {
 struct LBANode : CachedExtent {
   using LBANodeRef = TCachedExtentRef<LBANode>;
 
-  btree_range_pin_t pin;
+  btree_range_pin_t<laddr_t> pin;
 
   LBANode(ceph::bufferptr &&ptr) : CachedExtent(std::move(ptr)), pin(this) {}
   LBANode(const LBANode &rhs)
     : CachedExtent(rhs), pin(rhs.pin, this) {}
 
-  virtual lba_node_meta_t get_node_meta() const = 0;
+  virtual fixed_kv_node_meta_t<laddr_t> get_node_meta() const = 0;
 
   virtual ~LBANode() = default;
 
@@ -145,7 +137,7 @@ struct LBAInternalNode
   : LBANode,
     common::FixedKVNodeLayout<
       INTERNAL_NODE_CAPACITY,
-      lba_node_meta_t, lba_node_meta_le_t,
+      fixed_kv_node_meta_t<laddr_t>, lba_node_meta_le_t,
       laddr_t, laddr_le_t,
       paddr_t, paddr_le_t> {
   using Ref = TCachedExtentRef<LBAInternalNode>;
@@ -157,7 +149,7 @@ struct LBAInternalNode
 
   static constexpr extent_types_t TYPE = extent_types_t::LADDR_INTERNAL;
 
-  lba_node_meta_t get_node_meta() const { return get_meta(); }
+  fixed_kv_node_meta_t<laddr_t> get_node_meta() const { return get_meta(); }
 
   CachedExtentRef duplicate_for_write() final {
     assert(delta_buffer.empty());
@@ -207,7 +199,7 @@ struct LBAInternalNode
   }
 
   std::tuple<Ref, Ref, laddr_t>
-  make_split_children(op_context_t c) {
+  make_split_children(op_context_t<laddr_t> c) {
     auto left = c.cache.alloc_new_extent<LBAInternalNode>(
       c.trans, LBA_BLOCK_SIZE);
     auto right = c.cache.alloc_new_extent<LBAInternalNode>(
@@ -222,7 +214,7 @@ struct LBAInternalNode
   }
 
   Ref make_full_merge(
-    op_context_t c,
+    op_context_t<laddr_t> c,
     Ref &right) {
     auto replacement = c.cache.alloc_new_extent<LBAInternalNode>(
       c.trans, LBA_BLOCK_SIZE);
@@ -233,7 +225,7 @@ struct LBAInternalNode
 
   std::tuple<Ref, Ref, laddr_t>
   make_balanced(
-    op_context_t c,
+    op_context_t<laddr_t> c,
     Ref &_right,
     bool prefer_left) {
     ceph_assert(_right->get_type() == get_type());
@@ -383,7 +375,7 @@ struct LBALeafNode
   : LBANode,
     common::FixedKVNodeLayout<
       LEAF_NODE_CAPACITY,
-      lba_node_meta_t, lba_node_meta_le_t,
+      fixed_kv_node_meta_t<laddr_t>, lba_node_meta_le_t,
       laddr_t, laddr_le_t,
       lba_map_val_t, lba_map_val_le_t> {
   using Ref = TCachedExtentRef<LBALeafNode>;
@@ -395,7 +387,7 @@ struct LBALeafNode
 
   static constexpr extent_types_t TYPE = extent_types_t::LADDR_LEAF;
 
-  lba_node_meta_t get_node_meta() const { return get_meta(); }
+  fixed_kv_node_meta_t<laddr_t> get_node_meta() const { return get_meta(); }
 
   CachedExtentRef duplicate_for_write() final {
     assert(delta_buffer.empty());
@@ -438,7 +430,7 @@ struct LBALeafNode
 
 
   std::tuple<Ref, Ref, laddr_t>
-  make_split_children(op_context_t c) {
+  make_split_children(op_context_t<laddr_t> c) {
     auto left = c.cache.alloc_new_extent<LBALeafNode>(
       c.trans, LBA_BLOCK_SIZE);
     auto right = c.cache.alloc_new_extent<LBALeafNode>(
@@ -453,7 +445,7 @@ struct LBALeafNode
   }
 
   Ref make_full_merge(
-    op_context_t c,
+    op_context_t<laddr_t> c,
     Ref &right) {
     auto replacement = c.cache.alloc_new_extent<LBALeafNode>(
       c.trans, LBA_BLOCK_SIZE);
@@ -464,7 +456,7 @@ struct LBALeafNode
 
   std::tuple<Ref, Ref, laddr_t>
   make_balanced(
-    op_context_t c,
+    op_context_t<laddr_t> c,
     Ref &_right,
     bool prefer_left) {
     ceph_assert(_right->get_type() == get_type());
index 624229979159a6bf28a83f7a091e016c9b2e5faa..e59ad3dee7e0e24a99f3f0f02cdf21f77c841ed5 100644 (file)
@@ -69,10 +69,10 @@ ObjectDataHandler::write_ret do_removals(
       LOG_PREFIX(object_data_handler.cc::do_removals);
       DEBUGT("decreasing ref: {}",
             ctx.t,
-            pin->get_laddr());
+            pin->get_key());
       return ctx.tm.dec_ref(
        ctx.t,
-       pin->get_laddr()
+       pin->get_key()
       ).si_then(
        [](auto){},
        ObjectDataHandler::write_iertr::pass_further{},
@@ -129,14 +129,14 @@ ObjectDataHandler::write_ret do_insertions(
          region.len
        ).si_then([FNAME, ctx, &region](auto pin) {
          ceph_assert(pin->get_length() == region.len);
-         if (pin->get_laddr() != region.addr) {
+         if (pin->get_key() != region.addr) {
            ERRORT(
              "inconsistent laddr: pin: {} region {}",
              ctx.t,
-             pin->get_laddr(),
+             pin->get_key(),
              region.addr);
          }
-         ceph_assert(pin->get_laddr() == region.addr);
+         ceph_assert(pin->get_key() == region.addr);
          return ObjectDataHandler::write_iertr::now();
        });
       }
@@ -156,7 +156,7 @@ using split_ret_bare = std::pair<
 using split_ret = get_iertr::future<split_ret_bare>;
 split_ret split_pin_left(context_t ctx, LBAPinRef &pin, laddr_t offset)
 {
-  const auto pin_offset = pin->get_laddr();
+  const auto pin_offset = pin->get_key();
   assert_aligned(pin_offset);
   ceph_assert(offset >= pin_offset);
   if (offset == pin_offset) {
@@ -181,7 +181,7 @@ split_ret split_pin_left(context_t ctx, LBAPinRef &pin, laddr_t offset)
     );
   } else {
     // Data, return up to offset to prepend
-    auto to_prepend = offset - pin->get_laddr();
+    auto to_prepend = offset - pin->get_key();
     return read_pin(ctx, pin->duplicate()
     ).si_then([to_prepend](auto extent) {
       return get_iertr::make_ready_future<split_ret_bare>(
@@ -194,8 +194,8 @@ split_ret split_pin_left(context_t ctx, LBAPinRef &pin, laddr_t offset)
 /// Reverse of split_pin_left
 split_ret split_pin_right(context_t ctx, LBAPinRef &pin, laddr_t end)
 {
-  const auto pin_begin = pin->get_laddr();
-  const auto pin_end = pin->get_laddr() + pin->get_length();
+  const auto pin_begin = pin->get_key();
+  const auto pin_end = pin->get_key() + pin->get_length();
   assert_aligned(pin_end);
   ceph_assert(pin_end >= end);
   if (end == pin_end) {
@@ -273,7 +273,7 @@ ObjectDataHandler::write_ret ObjectDataHandler::prepare_data_reservation(
     ).si_then([max_object_size=max_object_size, &object_data](auto pin) {
       ceph_assert(pin->get_length() == max_object_size);
       object_data.update_reserved(
-       pin->get_laddr(),
+       pin->get_key(),
        pin->get_length());
       return write_iertr::now();
     });
@@ -302,17 +302,17 @@ ObjectDataHandler::clear_ret ObjectDataHandler::trim_data_reservation(
        _pins.swap(pins);
        ceph_assert(pins.size());
        auto &pin = *pins.front();
-       ceph_assert(pin.get_laddr() >= object_data.get_reserved_data_base());
+       ceph_assert(pin.get_key() >= object_data.get_reserved_data_base());
        ceph_assert(
-         pin.get_laddr() <= object_data.get_reserved_data_base() + size);
-       auto pin_offset = pin.get_laddr() -
+         pin.get_key() <= object_data.get_reserved_data_base() + size);
+       auto pin_offset = pin.get_key() -
          object_data.get_reserved_data_base();
-       if ((pin.get_laddr() == (object_data.get_reserved_data_base() + size)) ||
+       if ((pin.get_key() == (object_data.get_reserved_data_base() + size)) ||
          (pin.get_paddr().is_zero())) {
          /* First pin is exactly at the boundary or is a zero pin.  Either way,
           * remove all pins and add a single zero pin to the end. */
          to_write.emplace_back(
-           pin.get_laddr(),
+           pin.get_key(),
            object_data.get_reserved_data_len() - pin_offset);
          return clear_iertr::now();
        } else {
@@ -332,7 +332,7 @@ ObjectDataHandler::clear_ret ObjectDataHandler::trim_data_reservation(
              ));
            bl.append_zero(p2roundup(size, ctx.tm.get_block_size()) - size);
            to_write.emplace_back(
-             pin.get_laddr(),
+             pin.get_key(),
              bl);
            to_write.emplace_back(
              object_data.get_reserved_data_base() +
@@ -387,9 +387,9 @@ ObjectDataHandler::write_ret ObjectDataHandler::overwrite(
             offset,
             bl.length());
       ceph_assert(pins.size() >= 1);
-      auto pin_begin = pins.front()->get_laddr();
+      auto pin_begin = pins.front()->get_key();
       ceph_assert(pin_begin <= offset);
-      auto pin_end = pins.back()->get_laddr() + pins.back()->get_length();
+      auto pin_end = pins.back()->get_key() + pins.back()->get_length();
       ceph_assert(pin_end >= (offset + bl.length()));
 
       return split_pin_left(
@@ -500,7 +500,7 @@ ObjectDataHandler::read_ret ObjectDataHandler::read(
          ).si_then([ctx, loffset, len, &ret](auto _pins) {
            // offset~len falls within reserved region and len > 0
            ceph_assert(_pins.size() >= 1);
-           ceph_assert((*_pins.begin())->get_laddr() <= loffset);
+           ceph_assert((*_pins.begin())->get_key() <= loffset);
            return seastar::do_with(
              std::move(_pins),
              loffset,
@@ -511,9 +511,9 @@ ObjectDataHandler::read_ret ObjectDataHandler::read(
                  -> read_iertr::future<> {
                    ceph_assert(current <= (loffset + len));
                    ceph_assert(
-                     (loffset + len) > pin->get_laddr());
+                     (loffset + len) > pin->get_key());
                    laddr_t end = std::min(
-                     pin->get_laddr() + pin->get_length(),
+                     pin->get_key() + pin->get_length(),
                      loffset + len);
                    if (pin->get_paddr().is_zero()) {
                      ceph_assert(end > current); // See LBAManager::get_mappings
@@ -583,12 +583,12 @@ ObjectDataHandler::fiemap_ret ObjectDataHandler::fiemap(
         len
       ).si_then([loffset, len, &object_data, &ret](auto &&pins) {
        ceph_assert(pins.size() >= 1);
-        ceph_assert((*pins.begin())->get_laddr() <= loffset);
+        ceph_assert((*pins.begin())->get_key() <= loffset);
        for (auto &&i: pins) {
          if (!(i->get_paddr().is_zero())) {
-           auto ret_left = std::max(i->get_laddr(), loffset);
+           auto ret_left = std::max(i->get_key(), loffset);
            auto ret_right = std::min(
-             i->get_laddr() + i->get_length(),
+             i->get_key() + i->get_length(),
              loffset + len);
            assert(ret_right > ret_left);
            ret.emplace(
index e6ddbb221507531da04fa196bb6944860e672c29..646f78b76af5050aa9f8be489c995d41d7d1decd 100644 (file)
@@ -1081,22 +1081,22 @@ public:
 };
 
 /**
- * lba_root_t 
+ * phy_tree_root_t
  */
-class __attribute__((packed)) lba_root_t {
+class __attribute__((packed)) phy_tree_root_t {
   paddr_le_t root_addr;
   depth_le_t depth = init_extent_len_le(0);
   
 public:
-  lba_root_t() = default;
+  phy_tree_root_t() = default;
   
-  lba_root_t(paddr_t addr, depth_t depth)
+  phy_tree_root_t(paddr_t addr, depth_t depth)
     : root_addr(addr), depth(init_depth_le(depth)) {}
 
-  lba_root_t(const lba_root_t &o) = default;
-  lba_root_t(lba_root_t &&o) = default;
-  lba_root_t &operator=(const lba_root_t &o) = default;
-  lba_root_t &operator=(lba_root_t &&o) = default;
+  phy_tree_root_t(const phy_tree_root_t &o) = default;
+  phy_tree_root_t(phy_tree_root_t &&o) = default;
+  phy_tree_root_t &operator=(const phy_tree_root_t &o) = default;
+  phy_tree_root_t &operator=(phy_tree_root_t &&o) = default;
   
   paddr_t get_location() const {
     return root_addr;
@@ -1188,6 +1188,7 @@ public:
   }
 };
 
+using lba_root_t = phy_tree_root_t;
 
 /**
  * root_t
index e38c1ee9e05746825567dc06e7e59ac27a582e13..61170ac8b629002e28a21638ecbfda6a76b8d2cc 100644 (file)
@@ -481,14 +481,14 @@ TransactionManager::get_extent_if_live_ret TransactionManager::get_extent_if_liv
       return lba_manager->get_mapping(
        t,
        laddr).si_then([=, &t] (LBAPinRef pin) -> inner_ret {
-         ceph_assert(pin->get_laddr() == laddr);
+         ceph_assert(pin->get_key() == laddr);
          if (pin->get_paddr() == addr) {
            if (pin->get_length() != (extent_len_t)len) {
              ERRORT(
                "Invalid pin {}~{} {} found for "
                "extent {} {}~{} {}",
                t,
-               pin->get_laddr(),
+               pin->get_key(),
                pin->get_length(),
                pin->get_paddr(),
                type,
index 20342c29c788c58e1fd4263f3d07898e88542a96..a8275715474b4c35a242780606383d8e9ef022d8 100644 (file)
@@ -141,7 +141,7 @@ struct lba_btree_test : btree_test_base {
   std::map<laddr_t, lba_map_val_t> check;
 
   auto get_op_context(Transaction &t) {
-    return op_context_t{*cache, t};
+    return op_context_t<laddr_t>{*cache, t};
   }
 
   LBAManager::mkfs_ret test_structure_setup(Transaction &t) final {
@@ -376,11 +376,11 @@ struct btree_lba_manager_test : btree_test_base {
       }).unsafe_get0();
     logger().debug("alloc'd: {}", *ret);
     EXPECT_EQ(len, ret->get_length());
-    auto [b, e] = get_overlap(t, ret->get_laddr(), len);
+    auto [b, e] = get_overlap(t, ret->get_key(), len);
     EXPECT_EQ(b, e);
     t.mappings.emplace(
       std::make_pair(
-       ret->get_laddr(),
+       ret->get_key(),
        test_extent_t{
          ret->get_paddr(),
          ret->get_length(),
@@ -474,7 +474,7 @@ struct btree_lba_manager_test : btree_test_base {
       EXPECT_EQ(ret_list.size(), 1);
       auto &ret = *ret_list.begin();
       EXPECT_EQ(i.second.addr, ret->get_paddr());
-      EXPECT_EQ(laddr, ret->get_laddr());
+      EXPECT_EQ(laddr, ret->get_key());
       EXPECT_EQ(len, ret->get_length());
 
       auto ret_pin = with_trans_intr(
@@ -484,7 +484,7 @@ struct btree_lba_manager_test : btree_test_base {
            t, laddr);
        }).unsafe_get0();
       EXPECT_EQ(i.second.addr, ret_pin->get_paddr());
-      EXPECT_EQ(laddr, ret_pin->get_laddr());
+      EXPECT_EQ(laddr, ret_pin->get_key());
       EXPECT_EQ(len, ret_pin->get_length());
     }
     with_trans_intr(
@@ -554,8 +554,8 @@ TEST_F(btree_lba_manager_test, force_split_merge)
          check_mappings(t);
          check_mappings();
        }
-       incref_mapping(t, ret->get_laddr());
-       decref_mapping(t, ret->get_laddr());
+       incref_mapping(t, ret->get_key());
+       decref_mapping(t, ret->get_key());
       }
       logger().debug("submitting transaction");
       submit_test_transaction(std::move(t));