Basically, this pr moves the current LBABtree and lba_range_pin out of lba manager,
and rename LBABtree to FixedKVBtree. This is the preparation for implementing backrefs
Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
lba_manager.cc
segment_cleaner.cc
lba_manager/btree/btree_lba_manager.cc
- lba_manager/btree/btree_range_pin.cc
- lba_manager/btree/lba_btree.cc
lba_manager/btree/lba_btree_node.cc
omap_manager.cc
omap_manager/btree/btree_omap_manager.cc
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <boost/intrusive/set.hpp>
+
+#include "crimson/common/log.h"
+
+#include "crimson/os/seastore/cached_extent.h"
+#include "crimson/os/seastore/seastore_types.h"
+
+namespace crimson::os::seastore {
+
+template <typename bound_t>
+struct fixed_kv_node_meta_t {
+ bound_t begin = 0;
+ bound_t end = 0;
+ depth_t depth = 0;
+
+ bool is_parent_of(const fixed_kv_node_meta_t &other) const {
+ return (depth == other.depth + 1) &&
+ (begin <= other.begin) &&
+ (end > other.begin);
+ }
+
+ std::pair<fixed_kv_node_meta_t, fixed_kv_node_meta_t> split_into(bound_t pivot) const {
+ return std::make_pair(
+ fixed_kv_node_meta_t{begin, pivot, depth},
+ fixed_kv_node_meta_t{pivot, end, depth});
+ }
+
+ static fixed_kv_node_meta_t merge_from(
+ const fixed_kv_node_meta_t &lhs, const fixed_kv_node_meta_t &rhs) {
+ ceph_assert(lhs.depth == rhs.depth);
+ return fixed_kv_node_meta_t{lhs.begin, rhs.end, lhs.depth};
+ }
+
+ static std::pair<fixed_kv_node_meta_t, fixed_kv_node_meta_t>
+ rebalance(const fixed_kv_node_meta_t &lhs, const fixed_kv_node_meta_t &rhs, bound_t pivot) {
+ ceph_assert(lhs.depth == rhs.depth);
+ return std::make_pair(
+ fixed_kv_node_meta_t{lhs.begin, pivot, lhs.depth},
+ fixed_kv_node_meta_t{pivot, rhs.end, lhs.depth});
+ }
+
+ bool is_root() const {
+ return begin == 0 && end == L_ADDR_MAX;
+ }
+};
+
+template <typename bound_t>
+inline std::ostream &operator<<(
+ std::ostream &lhs,
+ const fixed_kv_node_meta_t<bound_t> &rhs)
+{
+ return lhs << "btree_node_meta_t("
+ << "begin=" << rhs.begin
+ << ", end=" << rhs.end
+ << ", depth=" << rhs.depth
+ << ")";
+}
+/**
+ * btree_range_pin_t
+ *
+ * Element tracked by btree_pin_set_t below. Encapsulates the intrusive_set
+ * hook, the fixed_kv_node_meta_t representing the key range covered by a node,
+ * and extent and ref members intended to hold a reference when the extent
+ * should be pinned.
+ */
+template <typename T>
+class btree_pin_set_t;
+
+template <typename node_bound_t>
+class btree_range_pin_t : public boost::intrusive::set_base_hook<> {
+ friend class btree_pin_set_t<node_bound_t>;
+ fixed_kv_node_meta_t<node_bound_t> range;
+
+ btree_pin_set_t<node_bound_t> *pins = nullptr;
+
+ // We need to be able to remember extent without holding a reference,
+ // but we can do it more compactly -- TODO
+ CachedExtent *extent = nullptr;
+ CachedExtentRef ref;
+
+ using index_t = boost::intrusive::set<btree_range_pin_t>;
+
+ static auto get_tuple(const fixed_kv_node_meta_t<node_bound_t> &meta) {
+ return std::make_tuple(-meta.depth, meta.begin);
+ }
+
+ void acquire_ref() {
+ ref = CachedExtentRef(extent);
+ }
+
+ void drop_ref() {
+ ref.reset();
+ }
+
+public:
+ btree_range_pin_t() = default;
+ btree_range_pin_t(CachedExtent *extent)
+ : extent(extent) {}
+ btree_range_pin_t(const btree_range_pin_t &rhs, CachedExtent *extent)
+ : range(rhs.range), extent(extent) {}
+
+ bool has_ref() const {
+ return !!ref;
+ }
+
+ bool is_root() const {
+ return range.is_root();
+ }
+
+ void set_range(const fixed_kv_node_meta_t<node_bound_t> &nrange) {
+ range = nrange;
+ }
+ void set_extent(CachedExtent *nextent) {
+ ceph_assert(!extent);
+ extent = nextent;
+ }
+
+ CachedExtent &get_extent() {
+ assert(extent);
+ return *extent;
+ }
+
+ bool has_ref() {
+ return !!ref;
+ }
+
+ void take_pin(btree_range_pin_t &other)
+ {
+ ceph_assert(other.extent);
+ if (other.pins) {
+ other.pins->replace_pin(*this, other);
+ pins = other.pins;
+ other.pins = nullptr;
+
+ if (other.has_ref()) {
+ other.drop_ref();
+ acquire_ref();
+ }
+ }
+ }
+
+ friend bool operator<(
+ const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
+ return get_tuple(lhs.range) < get_tuple(rhs.range);
+ }
+ friend bool operator>(
+ const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
+ return get_tuple(lhs.range) > get_tuple(rhs.range);
+ }
+ friend bool operator==(
+ const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
+ return get_tuple(lhs.range) == rhs.get_tuple(rhs.range);
+ }
+
+ struct meta_cmp_t {
+ bool operator()(
+ const btree_range_pin_t &lhs, const fixed_kv_node_meta_t<node_bound_t> &rhs) const {
+ return get_tuple(lhs.range) < get_tuple(rhs);
+ }
+ bool operator()(
+ const fixed_kv_node_meta_t<node_bound_t> &lhs, const btree_range_pin_t &rhs) const {
+ return get_tuple(lhs) < get_tuple(rhs.range);
+ }
+ };
+
+ friend std::ostream &operator<<(
+ std::ostream &lhs,
+ const btree_range_pin_t<node_bound_t> &rhs) {
+ return lhs << "btree_range_pin_t("
+ << "begin=" << rhs.range.begin
+ << ", end=" << rhs.range.end
+ << ", depth=" << rhs.range.depth
+ << ", extent=" << rhs.extent
+ << ")";
+ }
+
+ template <typename>
+ friend class BtreeNodePin;
+ ~btree_range_pin_t()
+ {
+ ceph_assert(!pins == !is_linked());
+ ceph_assert(!ref);
+ if (pins) {
+ crimson::get_logger(ceph_subsys_seastore_lba
+ ).debug("{}: removing {}", __func__, *this);
+ pins->remove_pin(*this, true);
+ }
+ extent = nullptr;
+ }
+
+};
+
+/**
+ * btree_pin_set_t
+ *
+ * Ensures that for every cached node, all parent btree nodes required
+ * to map it are present in cache. Relocating these nodes can
+ * therefore be done without further reads or cache space.
+ *
+ * Contains a btree_range_pin_t for every clean or dirty btree node
+ * or LogicalCachedExtent instance in cache at any point in time.
+ * For any btree node, the contained btree_range_pin_t will hold
+ * a reference to that node pinning it in cache as long as that
+ * node has children in the set. This invariant can be violated
+ * only by calling retire_extent and is repaired by calling
+ * check_parent synchronously after adding any new extents.
+ */
+template <typename node_bound_t>
+class btree_pin_set_t {
+ friend class btree_range_pin_t<node_bound_t>;
+ using pins_t = typename btree_range_pin_t<node_bound_t>::index_t;
+ pins_t pins;
+
+ /// Removes pin from set optionally checking whether parent has other children
+ void remove_pin(btree_range_pin_t<node_bound_t> &pin, bool do_check_parent)
+ {
+ crimson::get_logger(ceph_subsys_seastore_lba).debug("{}: {}", __func__, pin);
+ ceph_assert(pin.is_linked());
+ ceph_assert(pin.pins);
+ ceph_assert(!pin.ref);
+
+ pins.erase(pin);
+ pin.pins = nullptr;
+
+ if (do_check_parent) {
+ check_parent(pin);
+ }
+ }
+
+ void replace_pin(
+ btree_range_pin_t<node_bound_t> &to,
+ btree_range_pin_t<node_bound_t> &from)
+ {
+ pins.replace_node(pins.iterator_to(from), to);
+ }
+
+ /// Returns parent pin if exists
+ btree_range_pin_t<node_bound_t> *maybe_get_parent(
+ const fixed_kv_node_meta_t<node_bound_t> &meta)
+ {
+ auto cmeta = meta;
+ cmeta.depth++;
+ auto iter = pins.upper_bound(
+ cmeta,
+ typename btree_range_pin_t<node_bound_t>::meta_cmp_t());
+ if (iter == pins.begin()) {
+ return nullptr;
+ } else {
+ --iter;
+ if (iter->range.is_parent_of(meta)) {
+ return &*iter;
+ } else {
+ return nullptr;
+ }
+ }
+ }
+
+ /// Returns earliest child pin if exist
+ const btree_range_pin_t<node_bound_t>
+ *maybe_get_first_child(const fixed_kv_node_meta_t<node_bound_t> &meta) const
+ {
+ if (meta.depth == 0) {
+ return nullptr;
+ }
+
+ auto cmeta = meta;
+ cmeta.depth--;
+
+ auto iter = pins.lower_bound(
+ cmeta,
+ typename btree_range_pin_t<node_bound_t>::meta_cmp_t());
+ if (iter == pins.end()) {
+ return nullptr;
+ } else if (meta.is_parent_of(iter->range)) {
+ return &*iter;
+ } else {
+ return nullptr;
+ }
+ }
+
+ /// Releases pin if it has no children
+ void release_if_no_children(btree_range_pin_t<node_bound_t> &pin)
+ {
+ ceph_assert(pin.is_linked());
+ if (maybe_get_first_child(pin.range) == nullptr) {
+ pin.drop_ref();
+ }
+ }
+
+public:
+ /// Adds pin to set, assumes set is consistent
+ void add_pin(btree_range_pin_t<node_bound_t> &pin)
+ {
+ ceph_assert(!pin.is_linked());
+ ceph_assert(!pin.pins);
+ ceph_assert(!pin.ref);
+
+ auto [prev, inserted] = pins.insert(pin);
+ if (!inserted) {
+ crimson::get_logger(ceph_subsys_seastore_lba).error(
+ "{}: unable to add {} ({}), found {} ({})",
+ __func__,
+ pin,
+ *(pin.extent),
+ *prev,
+ *(prev->extent));
+ ceph_assert(0 == "impossible");
+ return;
+ }
+ pin.pins = this;
+ if (!pin.is_root()) {
+ auto *parent = maybe_get_parent(pin.range);
+ ceph_assert(parent);
+ if (!parent->has_ref()) {
+ crimson::get_logger(ceph_subsys_seastore_lba
+ ).debug("{}: acquiring parent {}", __func__,
+ static_cast<void*>(parent));
+ parent->acquire_ref();
+ } else {
+ crimson::get_logger(ceph_subsys_seastore_lba).debug(
+ "{}: parent has ref {}", __func__,
+ static_cast<void*>(parent));
+ }
+ }
+ if (maybe_get_first_child(pin.range) != nullptr) {
+ crimson::get_logger(ceph_subsys_seastore_lba).debug(
+ "{}: acquiring self {}", __func__, pin);
+ pin.acquire_ref();
+ }
+ }
+
+
+ /**
+ * retire/check_parent
+ *
+ * See BtreeLBAManager::complete_transaction.
+ * retire removes the specified pin from the set, but does not
+ * check parents. After any new extents are added to the set,
+ * the caller is required to call check_parent to restore the
+ * invariant.
+ */
+ void retire(btree_range_pin_t<node_bound_t> &pin)
+ {
+ pin.drop_ref();
+ remove_pin(pin, false);
+ }
+
+ void check_parent(btree_range_pin_t<node_bound_t> &pin)
+ {
+ auto parent = maybe_get_parent(pin.range);
+ if (parent) {
+ crimson::get_logger(ceph_subsys_seastore_lba
+ ).debug("{}: releasing parent {}", __func__, *parent);
+ release_if_no_children(*parent);
+ }
+ }
+
+ template <typename F>
+ void scan(F &&f) {
+ for (auto &i : pins) {
+ std::invoke(f, i);
+ }
+ }
+
+ ~btree_pin_set_t() {
+ ceph_assert(pins.empty());
+ }
+};
+
+template <typename key_t>
+class BtreeNodePin : public PhysicalNodePin<key_t> {
+
+ /**
+ * parent
+ *
+ * populated until link_extent is called to ensure cache residence
+ * until add_pin is called.
+ */
+ CachedExtentRef parent;
+
+ paddr_t paddr;
+ btree_range_pin_t<key_t> pin;
+
+public:
+ BtreeNodePin() = default;
+
+ BtreeNodePin(
+ CachedExtentRef parent,
+ paddr_t paddr,
+ fixed_kv_node_meta_t<key_t> &&meta)
+ : parent(parent), paddr(paddr) {
+ pin.set_range(std::move(meta));
+ }
+
+ btree_range_pin_t<key_t>& get_range_pin() {
+ return pin;
+ }
+
+ CachedExtentRef get_parent() {
+ return parent;
+ }
+
+ void set_parent(CachedExtentRef pin) {
+ parent = pin;
+ }
+
+ void link_extent(LogicalCachedExtent *ref) final {
+ pin.set_extent(ref);
+ }
+
+ extent_len_t get_length() const final {
+ ceph_assert(pin.range.end > pin.range.begin);
+ return pin.range.end - pin.range.begin;
+ }
+
+ paddr_t get_paddr() const final {
+ return paddr;
+ }
+
+ key_t get_key() const final {
+ return pin.range.begin;
+ }
+
+ PhysicalNodePinRef<key_t> duplicate() const final {
+ auto ret = std::unique_ptr<BtreeNodePin<key_t>>(
+ new BtreeNodePin<key_t>);
+ ret->pin.set_range(pin.range);
+ ret->paddr = paddr;
+ ret->parent = parent;
+ return ret;
+ }
+
+ void take_pin(PhysicalNodePin<key_t> &opin) final {
+ pin.take_pin(static_cast<BtreeNodePin<key_t>&>(opin).pin);
+ }
+
+ bool has_been_invalidated() const final {
+ return parent->has_been_invalidated();
+ }
+};
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:nil -*-
+// vim: ts=8 sw=2 smarttab expandtab
+
+#pragma once
+
+#include <boost/container/static_vector.hpp>
+#include <sys/mman.h>
+#include <memory>
+#include <string.h>
+
+#include "crimson/os/seastore/logging.h"
+
+#include "crimson/os/seastore/lba_manager.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/btree/btree_range_pin.h"
+
+namespace crimson::os::seastore {
+
+template <typename node_key_t>
+struct op_context_t {
+ Cache &cache;
+ Transaction &trans;
+ btree_pin_set_t<node_key_t> *pins = nullptr;
+};
+
+template <typename T>
+Transaction::tree_stats_t& get_tree_stats(Transaction &t);
+
+template <
+ typename node_key_t,
+ typename node_val_t,
+ typename internal_node_t,
+ typename leaf_node_t,
+ size_t node_size>
+class FixedKVBtree {
+ static constexpr size_t MAX_DEPTH = 16;
+ using self_type = FixedKVBtree<
+ node_key_t,
+ node_val_t,
+ internal_node_t,
+ leaf_node_t,
+ node_size>;
+public:
+ using InternalNodeRef = TCachedExtentRef<internal_node_t>;
+ using LeafNodeRef = TCachedExtentRef<leaf_node_t>;
+
+ using base_ertr = crimson::errorator<
+ crimson::ct_error::input_output_error>;
+ using base_iertr = trans_iertr<base_ertr>;
+
+ class iterator;
+ using iterator_fut = base_iertr::future<iterator>;
+
+ using mapped_space_visitor_t = std::function<
+ void(paddr_t, extent_len_t)>;
+
+ class iterator {
+ public:
+ iterator(const iterator &rhs) noexcept :
+ internal(rhs.internal), leaf(rhs.leaf) {}
+ iterator(iterator &&rhs) noexcept :
+ internal(std::move(rhs.internal)), leaf(std::move(rhs.leaf)) {}
+
+ iterator &operator=(const iterator &) = default;
+ iterator &operator=(iterator &&) = default;
+
+ iterator_fut next(
+ op_context_t<node_key_t> c,
+ mapped_space_visitor_t *visitor=nullptr) const
+ {
+ assert_valid();
+ assert(!is_end());
+
+ auto ret = *this;
+ ret.leaf.pos++;
+ if (ret.at_boundary()) {
+ return seastar::do_with(
+ ret,
+ [c, visitor](auto &ret) mutable {
+ return ret.handle_boundary(
+ c, visitor
+ ).si_then([&ret] {
+ return std::move(ret);
+ });
+ });
+ } else {
+ return iterator_fut(
+ interruptible::ready_future_marker{},
+ ret);
+ }
+
+ }
+
+ iterator_fut prev(op_context_t<node_key_t> c) const
+ {
+ assert_valid();
+ assert(!is_begin());
+
+ auto ret = *this;
+
+ if (ret.leaf.pos > 0) {
+ ret.leaf.pos--;
+ return iterator_fut(
+ interruptible::ready_future_marker{},
+ ret);
+ }
+
+ depth_t depth_with_space = 2;
+ for (; depth_with_space <= get_depth(); ++depth_with_space) {
+ if (ret.get_internal(depth_with_space).pos > 0) {
+ break;
+ }
+ }
+
+ assert(depth_with_space <= ret.get_depth()); // must not be begin()
+ return seastar::do_with(
+ std::move(ret),
+ [](const internal_node_t &internal) { return --internal.end(); },
+ [](const leaf_node_t &leaf) { return --leaf.end(); },
+ [c, depth_with_space](auto &ret, auto &li, auto &ll) {
+ for (depth_t depth = 2; depth < depth_with_space; ++depth) {
+ ret.get_internal(depth).reset();
+ }
+ ret.leaf.reset();
+ ret.get_internal(depth_with_space).pos--;
+ // note, cannot result in at_boundary() by construction
+ return lookup_depth_range(
+ c, ret, depth_with_space - 1, 0, li, ll, nullptr
+ ).si_then([&ret] {
+ assert(!ret.at_boundary());
+ return std::move(ret);
+ });
+ });
+ }
+
+ void assert_valid() const {
+ assert(leaf.node);
+ assert(leaf.pos <= leaf.node->get_size());
+
+ for (auto &i: internal) {
+ (void)i;
+ assert(i.node);
+ assert(i.pos < i.node->get_size());
+ }
+ }
+
+ depth_t get_depth() const {
+ return internal.size() + 1;
+ }
+
+ auto &get_internal(depth_t depth) {
+ assert(depth > 1);
+ assert((depth - 2) < internal.size());
+ return internal[depth - 2];
+ }
+
+ const auto &get_internal(depth_t depth) const {
+ assert(depth > 1);
+ assert((depth - 2) < internal.size());
+ return internal[depth - 2];
+ }
+
+ node_key_t get_key() const {
+ assert(!is_end());
+ return leaf.node->iter_idx(leaf.pos).get_key();
+ }
+ node_val_t get_val() const {
+ assert(!is_end());
+ auto ret = leaf.node->iter_idx(leaf.pos).get_val();
+ ret.paddr = ret.paddr.maybe_relative_to(leaf.node->get_paddr());
+ return ret;
+ }
+
+ bool is_end() const {
+ // external methods may only resolve at a boundary if at end
+ return at_boundary();
+ }
+
+ bool is_begin() const {
+ for (auto &i: internal) {
+ if (i.pos != 0)
+ return false;
+ }
+ return leaf.pos == 0;
+ }
+
+ PhysicalNodePinRef<node_key_t> get_pin() const {
+ assert(!is_end());
+ auto val = get_val();
+ auto key = get_key();
+ return std::make_unique<BtreeNodePin<node_key_t>>(
+ leaf.node,
+ val.paddr,
+ fixed_kv_node_meta_t<node_key_t>{ key, key + val.len, 0 });
+ }
+
+ typename leaf_node_t::Ref get_leaf_node() {
+ return leaf.node;
+ }
+
+ private:
+ iterator() noexcept {}
+ iterator(depth_t depth) noexcept : internal(depth - 1) {}
+
+ friend class FixedKVBtree;
+ static constexpr uint16_t INVALID = std::numeric_limits<uint16_t>::max();
+ template <typename NodeType>
+ struct node_position_t {
+ typename NodeType::Ref node;
+ uint16_t pos = INVALID;
+
+ void reset() {
+ *this = node_position_t{};
+ }
+
+ auto get_iter() {
+ assert(pos != INVALID);
+ assert(pos < node->get_size());
+ return node->iter_idx(pos);
+ }
+ };
+ boost::container::static_vector<
+ node_position_t<internal_node_t>, MAX_DEPTH> internal;
+ node_position_t<leaf_node_t> leaf;
+
+ bool at_boundary() const {
+ assert(leaf.pos <= leaf.node->get_size());
+ return leaf.pos == leaf.node->get_size();
+ }
+
+ using handle_boundary_ertr = base_iertr;
+ using handle_boundary_ret = handle_boundary_ertr::future<>;
+ handle_boundary_ret handle_boundary(
+ op_context_t<node_key_t> c,
+ mapped_space_visitor_t *visitor)
+ {
+ assert(at_boundary());
+ depth_t depth_with_space = 2;
+ for (; depth_with_space <= get_depth(); ++depth_with_space) {
+ if ((get_internal(depth_with_space).pos + 1) <
+ get_internal(depth_with_space).node->get_size()) {
+ break;
+ }
+ }
+
+ if (depth_with_space <= get_depth()) {
+ return seastar::do_with(
+ [](const internal_node_t &internal) { return internal.begin(); },
+ [](const leaf_node_t &leaf) { return leaf.begin(); },
+ [this, c, depth_with_space, visitor](auto &li, auto &ll) {
+ for (depth_t depth = 2; depth < depth_with_space; ++depth) {
+ get_internal(depth).reset();
+ }
+ leaf.reset();
+ get_internal(depth_with_space).pos++;
+ // note, cannot result in at_boundary() by construction
+ return lookup_depth_range(
+ c, *this, depth_with_space - 1, 0, li, ll, visitor
+ );
+ });
+ } else {
+ // end
+ return seastar::now();
+ }
+ }
+
+ depth_t check_split() const {
+ if (!leaf.node->at_max_capacity()) {
+ return 0;
+ }
+ for (depth_t split_from = 1; split_from < get_depth(); ++split_from) {
+ if (!get_internal(split_from + 1).node->at_max_capacity())
+ return split_from;
+ }
+ return get_depth();
+ }
+
+ depth_t check_merge() const {
+ if (!leaf.node->below_min_capacity()) {
+ return 0;
+ }
+ for (depth_t merge_from = 1; merge_from < get_depth(); ++merge_from) {
+ if (!get_internal(merge_from + 1).node->below_min_capacity())
+ return merge_from;
+ }
+ return get_depth();
+ }
+ };
+
+ FixedKVBtree(phy_tree_root_t root) : root(root) {}
+
+ bool is_root_dirty() const {
+ return root_dirty;
+ }
+ phy_tree_root_t get_root_undirty() {
+ ceph_assert(root_dirty);
+ root_dirty = false;
+ return root;
+ }
+
+ /// mkfs
+ using mkfs_ret = phy_tree_root_t;
+ static mkfs_ret mkfs(op_context_t<node_key_t> c) {
+ auto root_leaf = c.cache.template alloc_new_extent<leaf_node_t>(
+ c.trans,
+ node_size);
+ root_leaf->set_size(0);
+ fixed_kv_node_meta_t<node_key_t> meta{0, L_ADDR_MAX, 1};
+ root_leaf->set_meta(meta);
+ root_leaf->pin.set_range(meta);
+ c.trans.get_lba_tree_stats().depth = 1u;
+ return phy_tree_root_t{root_leaf->get_paddr(), 1u};
+ }
+
+ /**
+ * lower_bound
+ *
+ * @param c [in] context
+ * @param addr [in] ddr
+ * @return least iterator >= key
+ */
+ iterator_fut lower_bound(
+ op_context_t<node_key_t> c,
+ node_key_t addr,
+ mapped_space_visitor_t *visitor=nullptr) const
+ {
+ LOG_PREFIX(FixedKVBtree::lower_bound);
+ return lookup(
+ c,
+ [addr](const internal_node_t &internal) {
+ assert(internal.get_size() > 0);
+ auto iter = internal.upper_bound(addr);
+ assert(iter != internal.begin());
+ --iter;
+ return iter;
+ },
+ [FNAME, c, addr](const leaf_node_t &leaf) {
+ auto ret = leaf.lower_bound(addr);
+ SUBDEBUGT(
+ seastore_lba_details,
+ "leaf addr {}, got ret offset {}, size {}, end {}",
+ c.trans,
+ addr,
+ ret.get_offset(),
+ leaf.get_size(),
+ ret == leaf.end());
+ return ret;
+ },
+ visitor
+ ).si_then([FNAME, c](auto &&ret) {
+ SUBDEBUGT(
+ seastore_lba_details,
+ "ret.leaf.pos {}",
+ c.trans,
+ ret.leaf.pos);
+ ret.assert_valid();
+ return std::move(ret);
+ });
+ }
+
+
+ /**
+ * upper_bound
+ *
+ * @param c [in] context
+ * @param addr [in] ddr
+ * @return least iterator > key
+ */
+ iterator_fut upper_bound(
+ op_context_t<node_key_t> c,
+ node_key_t addr
+ ) const {
+ return lower_bound(
+ c, addr
+ ).si_then([c, addr](auto iter) {
+ if (!iter.is_end() && iter.get_key() == addr) {
+ return iter.next(c);
+ } else {
+ return iterator_fut(
+ interruptible::ready_future_marker{},
+ iter);
+ }
+ });
+ }
+
+ /**
+ * upper_bound_right
+ *
+ * @param c [in] context
+ * @param addr [in] addr
+ * @return least iterator i s.t. i.get_key() + i.get_val().len > key
+ */
+ iterator_fut upper_bound_right(
+ op_context_t<node_key_t> c,
+ node_key_t addr) const
+ {
+ return lower_bound(
+ c, addr
+ ).si_then([c, addr](auto iter) {
+ if (iter.is_begin()) {
+ return iterator_fut(
+ interruptible::ready_future_marker{},
+ iter);
+ } else {
+ return iter.prev(
+ c
+ ).si_then([iter, addr](auto prev) {
+ if ((prev.get_key() + prev.get_val().len) > addr) {
+ return iterator_fut(
+ interruptible::ready_future_marker{},
+ prev);
+ } else {
+ return iterator_fut(
+ interruptible::ready_future_marker{},
+ iter);
+ }
+ });
+ }
+ });
+ }
+
+ iterator_fut begin(op_context_t<node_key_t> c) const {
+ return lower_bound(c, 0);
+ }
+ iterator_fut end(op_context_t<node_key_t> c) const {
+ return upper_bound(c, L_ADDR_MAX);
+ }
+
+ using iterate_repeat_ret_inner = base_iertr::future<
+ seastar::stop_iteration>;
+ template <typename F>
+ static base_iertr::future<> iterate_repeat(
+ op_context_t<node_key_t> c,
+ iterator_fut &&iter_fut,
+ F &&f,
+ mapped_space_visitor_t *visitor=nullptr) {
+ return std::move(
+ iter_fut
+ ).si_then([c, visitor, f=std::forward<F>(f)](auto iter) {
+ return seastar::do_with(
+ iter,
+ std::move(f),
+ [c, visitor](auto &pos, auto &f) {
+ return trans_intr::repeat(
+ [c, visitor, &f, &pos] {
+ return f(
+ pos
+ ).si_then([c, visitor, &pos](auto done) {
+ if (done == seastar::stop_iteration::yes) {
+ return iterate_repeat_ret_inner(
+ interruptible::ready_future_marker{},
+ seastar::stop_iteration::yes);
+ } else {
+ ceph_assert(!pos.is_end());
+ return pos.next(
+ c, visitor
+ ).si_then([&pos](auto next) {
+ pos = next;
+ return iterate_repeat_ret_inner(
+ interruptible::ready_future_marker{},
+ seastar::stop_iteration::no);
+ });
+ }
+ });
+ });
+ });
+ });
+ }
+
+ /**
+ * insert
+ *
+ * Inserts val at laddr with iter as a hint. If element at laddr already
+ * exists returns iterator to that element unchanged and returns false.
+ *
+ * Invalidates all outstanding iterators for this tree on this transaction.
+ *
+ * @param c [in] op context
+ * @param iter [in] hint, insertion constant if immediately prior to iter
+ * @param laddr [in] addr at which to insert
+ * @param val [in] val to insert
+ * @return pair<iter, bool> where iter points to element at addr, bool true
+ * iff element at laddr did not exist.
+ */
+ using insert_iertr = base_iertr;
+ using insert_ret = insert_iertr::future<std::pair<iterator, bool>>;
+ insert_ret insert(
+ op_context_t<node_key_t> c,
+ iterator iter,
+ node_key_t laddr,
+ node_val_t val
+ ) {
+ LOG_PREFIX(FixedKVBtree::insert);
+ SUBDEBUGT(
+ seastore_lba_details,
+ "inserting laddr {} at iter {}",
+ c.trans,
+ laddr,
+ iter.is_end() ? L_ADDR_MAX : iter.get_key());
+ return seastar::do_with(
+ iter,
+ [this, c, laddr, val](auto &ret) {
+ return find_insertion(
+ c, laddr, ret
+ ).si_then([this, c, laddr, val, &ret] {
+ if (!ret.at_boundary() && ret.get_key() == laddr) {
+ return insert_ret(
+ interruptible::ready_future_marker{},
+ std::make_pair(ret, false));
+ } else {
+ ++(c.trans.get_lba_tree_stats().num_inserts);
+ return handle_split(
+ c, ret
+ ).si_then([c, laddr, val, &ret] {
+ if (!ret.leaf.node->is_pending()) {
+ CachedExtentRef mut = c.cache.duplicate_for_write(
+ c.trans, ret.leaf.node
+ );
+ ret.leaf.node = mut->cast<leaf_node_t>();
+ }
+ auto iter = typename leaf_node_t::const_iterator(
+ ret.leaf.node.get(), ret.leaf.pos);
+ assert(iter == ret.leaf.node->lower_bound(laddr));
+ assert(iter == ret.leaf.node->end() || iter->get_key() > laddr);
+ assert(laddr >= ret.leaf.node->get_meta().begin &&
+ laddr < ret.leaf.node->get_meta().end);
+ ret.leaf.node->insert(iter, laddr, val);
+ return insert_ret(
+ interruptible::ready_future_marker{},
+ std::make_pair(ret, true));
+ });
+ }
+ });
+ });
+ }
+
+ insert_ret insert(
+ op_context_t<node_key_t> c,
+ node_key_t laddr,
+ node_val_t val) {
+ return lower_bound(
+ c, laddr
+ ).si_then([this, c, laddr, val](auto iter) {
+ return this->insert(c, iter, laddr, val);
+ });
+ }
+
+ /**
+ * update
+ *
+ * Invalidates all outstanding iterators for this tree on this transaction.
+ *
+ * @param c [in] op context
+ * @param iter [in] iterator to element to update, must not be end
+ * @param val [in] val with which to update
+ * @return iterator to newly updated element
+ */
+ using update_iertr = base_iertr;
+ using update_ret = update_iertr::future<iterator>;
+ update_ret update(
+ op_context_t<node_key_t> c,
+ iterator iter,
+ node_val_t val)
+ {
+ LOG_PREFIX(FixedKVBtree::update);
+ SUBDEBUGT(
+ seastore_lba_details,
+ "update element at {}",
+ c.trans,
+ iter.is_end() ? L_ADDR_MAX : iter.get_key());
+ if (!iter.leaf.node->is_pending()) {
+ CachedExtentRef mut = c.cache.duplicate_for_write(
+ c.trans, iter.leaf.node
+ );
+ iter.leaf.node = mut->cast<leaf_node_t>();
+ }
+ iter.leaf.node->update(
+ iter.leaf.node->iter_idx(iter.leaf.pos),
+ val);
+ return update_ret(
+ interruptible::ready_future_marker{},
+ iter);
+ }
+
+
+ /**
+ * remove
+ *
+ * Invalidates all outstanding iterators for this tree on this transaction.
+ *
+ * @param c [in] op context
+ * @param iter [in] iterator to element to remove, must not be end
+ */
+ using remove_iertr = base_iertr;
+ using remove_ret = remove_iertr::future<>;
+ remove_ret remove(
+ op_context_t<node_key_t> c,
+ iterator iter)
+ {
+ LOG_PREFIX(FixedKVBtree::remove);
+ SUBDEBUGT(
+ seastore_lba_details,
+ "remove element at {}",
+ c.trans,
+ iter.is_end() ? L_ADDR_MAX : iter.get_key());
+ assert(!iter.is_end());
+ ++(c.trans.get_lba_tree_stats().num_erases);
+ return seastar::do_with(
+ iter,
+ [this, c](auto &ret) {
+ if (!ret.leaf.node->is_pending()) {
+ CachedExtentRef mut = c.cache.duplicate_for_write(
+ c.trans, ret.leaf.node
+ );
+ ret.leaf.node = mut->cast<leaf_node_t>();
+ }
+ ret.leaf.node->remove(
+ ret.leaf.node->iter_idx(ret.leaf.pos));
+
+ return handle_merge(
+ c, ret
+ );
+ });
+ }
+
+ /**
+ * init_cached_extent
+ *
+ * Checks whether e is live (reachable from fixed kv tree) and drops or initializes
+ * accordingly.
+ *
+ * Returns if e is live.
+ */
+ using init_cached_extent_iertr = base_iertr;
+ using init_cached_extent_ret = init_cached_extent_iertr::future<bool>;
+ init_cached_extent_ret init_cached_extent(
+ op_context_t<node_key_t> c,
+ CachedExtentRef e)
+ {
+ assert(!e->is_logical());
+ LOG_PREFIX(FixedKVTree::init_cached_extent);
+ SUBDEBUGT(seastore_lba_details, "extent {}", c.trans, *e);
+ if (e->get_type() == internal_node_t::TYPE) {
+ auto eint = e->cast<internal_node_t>();
+ return lower_bound(
+ c, eint->get_node_meta().begin
+ ).si_then([e, c, eint](auto iter) {
+ // Note, this check is valid even if iter.is_end()
+ LOG_PREFIX(FixedKVTree::init_cached_extent);
+ depth_t cand_depth = eint->get_node_meta().depth;
+ if (cand_depth <= iter.get_depth() &&
+ &*iter.get_internal(cand_depth).node == &*eint) {
+ SUBDEBUGT(
+ seastore_lba_details,
+ "extent {} is live",
+ c.trans,
+ *eint);
+ return true;
+ } else {
+ SUBDEBUGT(
+ seastore_lba_details,
+ "extent {} is not live",
+ c.trans,
+ *eint);
+ return false;
+ }
+ });
+ } else if (e->get_type() == leaf_node_t::TYPE) {
+ auto eleaf = e->cast<leaf_node_t>();
+ return lower_bound(
+ c, eleaf->get_node_meta().begin
+ ).si_then([c, e, eleaf](auto iter) {
+ // Note, this check is valid even if iter.is_end()
+ LOG_PREFIX(FixedKVTree::init_cached_extent);
+ if (iter.leaf.node == &*eleaf) {
+ SUBDEBUGT(
+ seastore_lba_details,
+ "extent {} is live",
+ c.trans,
+ *eleaf);
+ return true;
+ } else {
+ SUBDEBUGT(
+ seastore_lba_details,
+ "extent {} is not live",
+ c.trans,
+ *eleaf);
+ return false;
+ }
+ });
+ } else {
+ SUBDEBUGT(
+ seastore_lba_details,
+ "found other extent {} type {}",
+ c.trans,
+ *e,
+ e->get_type());
+ return init_cached_extent_ret(
+ interruptible::ready_future_marker{},
+ true);
+ }
+ }
+
+ /// get_leaf_if_live: get leaf node at laddr/addr if still live
+ using get_leaf_if_live_iertr = base_iertr;
+ using get_leaf_if_live_ret = get_leaf_if_live_iertr::future<CachedExtentRef>;
+ get_leaf_if_live_ret get_leaf_if_live(
+ op_context_t<node_key_t> c,
+ paddr_t addr,
+ node_key_t laddr,
+ seastore_off_t len)
+ {
+ LOG_PREFIX(FixedKVBtree::get_leaf_if_live);
+ return lower_bound(
+ c, laddr
+ ).si_then([FNAME, c, addr, laddr, len](auto iter) {
+ if (iter.leaf.node->get_paddr() == addr) {
+ SUBDEBUGT(
+ seastore_lba_details,
+ "extent laddr {} addr {}~{} found: {}",
+ c.trans,
+ laddr,
+ addr,
+ len,
+ *iter.leaf.node);
+ return CachedExtentRef(iter.leaf.node);
+ } else {
+ SUBDEBUGT(
+ seastore_lba_details,
+ "extent laddr {} addr {}~{} is not live, does not match node {}",
+ c.trans,
+ laddr,
+ addr,
+ len,
+ *iter.leaf.node);
+ return CachedExtentRef();
+ }
+ });
+ }
+
+
+ /// get_internal_if_live: get internal node at laddr/addr if still live
+ using get_internal_if_live_iertr = base_iertr;
+ using get_internal_if_live_ret = get_internal_if_live_iertr::future<CachedExtentRef>;
+ get_internal_if_live_ret get_internal_if_live(
+ op_context_t<node_key_t> c,
+ paddr_t addr,
+ node_key_t laddr,
+ seastore_off_t len)
+ {
+ LOG_PREFIX(FixedKVBtree::get_leaf_if_live);
+ return lower_bound(
+ c, laddr
+ ).si_then([FNAME, c, addr, laddr, len](auto iter) {
+ for (depth_t d = 2; d <= iter.get_depth(); ++d) {
+ CachedExtent &node = *iter.get_internal(d).node;
+ auto internal_node = node.cast<internal_node_t>();
+ if (internal_node->get_paddr() == addr) {
+ SUBDEBUGT(
+ seastore_lba_details,
+ "extent laddr {} addr {}~{} found: {}",
+ c.trans,
+ laddr,
+ addr,
+ len,
+ *internal_node);
+ assert(internal_node->get_node_meta().begin == laddr);
+ return CachedExtentRef(internal_node);
+ }
+ }
+ SUBDEBUGT(
+ seastore_lba_details,
+ "extent laddr {} addr {}~{} is not live, no matching internal node",
+ c.trans,
+ laddr,
+ addr,
+ len);
+ return CachedExtentRef();
+ });
+ }
+
+
+ /**
+ * rewrite_extent
+ *
+ * Rewrites a fresh copy of extent into transaction and updates internal
+ * references.
+ */
+ using rewrite_extent_iertr = base_iertr;
+ using rewrite_extent_ret = rewrite_extent_iertr::future<>;
+ rewrite_extent_ret rewrite_extent(
+ op_context_t<node_key_t> c,
+ CachedExtentRef e) {
+ LOG_PREFIX(FixedKVBtree::rewrite_extent);
+ assert(e->get_type() == extent_types_t::LADDR_INTERNAL ||
+ e->get_type() == extent_types_t::LADDR_LEAF);
+
+ auto do_rewrite = [&](auto &fixed_kv_extent) {
+ auto n_fixed_kv_extent = c.cache.template alloc_new_extent<
+ std::remove_reference_t<decltype(fixed_kv_extent)>
+ >(
+ c.trans,
+ fixed_kv_extent.get_length());
+ fixed_kv_extent.get_bptr().copy_out(
+ 0,
+ fixed_kv_extent.get_length(),
+ n_fixed_kv_extent->get_bptr().c_str());
+ n_fixed_kv_extent->pin.set_range(n_fixed_kv_extent->get_node_meta());
+
+ /* This is a bit underhanded. Any relative addrs here must necessarily
+ * be record relative as we are rewriting a dirty extent. Thus, we
+ * are using resolve_relative_addrs with a (likely negative) block
+ * relative offset to correct them to block-relative offsets adjusted
+ * for our new transaction location.
+ *
+ * Upon commit, these now block relative addresses will be interpretted
+ * against the real final address.
+ */
+ n_fixed_kv_extent->resolve_relative_addrs(
+ make_record_relative_paddr(0) - n_fixed_kv_extent->get_paddr());
+
+ SUBDEBUGT(
+ seastore_lba_details,
+ "rewriting {} into {}",
+ c.trans,
+ fixed_kv_extent,
+ *n_fixed_kv_extent);
+
+ return update_internal_mapping(
+ c,
+ n_fixed_kv_extent->get_node_meta().depth,
+ n_fixed_kv_extent->get_node_meta().begin,
+ e->get_paddr(),
+ n_fixed_kv_extent->get_paddr()
+ ).si_then([c, e] {
+ c.cache.retire_extent(c.trans, e);
+ });
+ };
+
+ CachedExtentRef n_fixed_kv_extent;
+ if (e->get_type() == internal_node_t::TYPE) {
+ auto lint = e->cast<internal_node_t>();
+ return do_rewrite(*lint);
+ } else {
+ assert(e->get_type() == leaf_node_t::TYPE);
+ auto lleaf = e->cast<leaf_node_t>();
+ return do_rewrite(*lleaf);
+ }
+ }
+
+ using update_internal_mapping_iertr = base_iertr;
+ using update_internal_mapping_ret = update_internal_mapping_iertr::future<>;
+ update_internal_mapping_ret update_internal_mapping(
+ op_context_t<node_key_t> c,
+ depth_t depth,
+ node_key_t laddr,
+ paddr_t old_addr,
+ paddr_t new_addr)
+ {
+ LOG_PREFIX(FixedKVBtree::update_internal_mapping);
+ SUBDEBUGT(
+ seastore_lba_details,
+ "updating laddr {} at depth {} from {} to {}",
+ c.trans,
+ laddr,
+ depth,
+ old_addr,
+ new_addr);
+
+ return lower_bound(
+ c, laddr
+ ).si_then([=](auto iter) {
+ assert(iter.get_depth() >= depth);
+ if (depth == iter.get_depth()) {
+ SUBDEBUGT(seastore_lba_details, "update at root", c.trans);
+
+ if (laddr != 0) {
+ SUBERRORT(
+ seastore_lba_details,
+ "updating root laddr {} at depth {} from {} to {},"
+ "laddr is not 0",
+ c.trans,
+ laddr,
+ depth,
+ old_addr,
+ new_addr,
+ root.get_location());
+ ceph_assert(0 == "impossible");
+ }
+
+ if (root.get_location() != old_addr) {
+ SUBERRORT(
+ seastore_lba_details,
+ "updating root laddr {} at depth {} from {} to {},"
+ "root addr {} does not match",
+ c.trans,
+ laddr,
+ depth,
+ old_addr,
+ new_addr,
+ root.get_location());
+ ceph_assert(0 == "impossible");
+ }
+
+ root.set_location(new_addr);
+ root_dirty = true;
+ } else {
+ auto &parent = iter.get_internal(depth + 1);
+ assert(parent.node);
+ assert(parent.pos < parent.node->get_size());
+ auto piter = parent.node->iter_idx(parent.pos);
+
+ if (piter->get_key() != laddr) {
+ SUBERRORT(
+ seastore_lba_details,
+ "updating laddr {} at depth {} from {} to {},"
+ "node {} pos {} val pivot addr {} does not match",
+ c.trans,
+ laddr,
+ depth,
+ old_addr,
+ new_addr,
+ *(parent.node),
+ parent.pos,
+ piter->get_key());
+ ceph_assert(0 == "impossible");
+ }
+
+
+ if (piter->get_val() != old_addr) {
+ SUBERRORT(
+ seastore_lba_details,
+ "updating laddr {} at depth {} from {} to {},"
+ "node {} pos {} val addr {} does not match",
+ c.trans,
+ laddr,
+ depth,
+ old_addr,
+ new_addr,
+ *(parent.node),
+ parent.pos,
+ piter->get_val());
+ ceph_assert(0 == "impossible");
+ }
+
+ CachedExtentRef mut = c.cache.duplicate_for_write(
+ c.trans,
+ parent.node
+ );
+ typename internal_node_t::Ref mparent = mut->cast<internal_node_t>();
+ mparent->update(piter, new_addr);
+
+ /* Note, iter is now invalid as we didn't udpate either the parent
+ * node reference to the new mutable instance nor did we update the
+ * child pointer to the new node. Not a problem as we'll now just
+ * destruct it.
+ */
+ }
+ return seastar::now();
+ });
+ }
+
+
+private:
+ phy_tree_root_t root;
+ bool root_dirty = false;
+
+ using get_internal_node_iertr = base_iertr;
+ using get_internal_node_ret = get_internal_node_iertr::future<InternalNodeRef>;
+ static get_internal_node_ret get_internal_node(
+ op_context_t<node_key_t> c,
+ depth_t depth,
+ paddr_t offset,
+ node_key_t begin,
+ node_key_t end)
+ {
+ LOG_PREFIX(FixedKVBtree::get_internal_node);
+ SUBDEBUGT(
+ seastore_lba_details,
+ "reading internal at offset {}, depth {}, begin {}, end {}",
+ c.trans,
+ offset,
+ depth,
+ begin,
+ end);
+ assert(depth > 1);
+ auto init_internal = [c, depth, begin, end](internal_node_t &node) {
+ assert(!node.is_pending());
+ assert(!node.pin.is_linked());
+ node.pin.set_range(fixed_kv_node_meta_t<node_key_t>{begin, end, depth});
+ if (c.pins) {
+ c.pins->add_pin(node.pin);
+ }
+ };
+ return c.cache.template get_extent<internal_node_t>(
+ c.trans,
+ offset,
+ node_size,
+ init_internal
+ ).si_then([FNAME, c, offset, init_internal, depth, begin, end](
+ typename internal_node_t::Ref ret) {
+ SUBDEBUGT(
+ seastore_lba_details,
+ "read internal at offset {} {}",
+ c.trans,
+ offset,
+ *ret);
+ // This can only happen during init_cached_extent
+ if (c.pins && !ret->is_pending() && !ret->pin.is_linked()) {
+ assert(ret->is_dirty());
+ init_internal(*ret);
+ }
+ auto meta = ret->get_meta();
+ if (ret->get_size()) {
+ ceph_assert(meta.begin <= ret->begin()->get_key());
+ ceph_assert(meta.end > (ret->end() - 1)->get_key());
+ }
+ ceph_assert(depth == meta.depth);
+ ceph_assert(begin == meta.begin);
+ ceph_assert(end == meta.end);
+ return get_internal_node_ret(
+ interruptible::ready_future_marker{},
+ ret);
+ });
+ }
+
+
+ using get_leaf_node_iertr = base_iertr;
+ using get_leaf_node_ret = get_leaf_node_iertr::future<LeafNodeRef>;
+ static get_leaf_node_ret get_leaf_node(
+ op_context_t<node_key_t> c,
+ paddr_t offset,
+ node_key_t begin,
+ node_key_t end)
+ {
+ LOG_PREFIX(FixedKVBtree::get_leaf_node);
+ SUBDEBUGT(
+ seastore_lba_details,
+ "reading leaf at offset {}, begin {}, end {}",
+ c.trans,
+ offset,
+ begin,
+ end);
+ auto init_leaf = [c, begin, end](leaf_node_t &node) {
+ assert(!node.is_pending());
+ assert(!node.pin.is_linked());
+ node.pin.set_range(fixed_kv_node_meta_t<node_key_t>{begin, end, 1});
+ if (c.pins) {
+ c.pins->add_pin(node.pin);
+ }
+ };
+ return c.cache.template get_extent<leaf_node_t>(
+ c.trans,
+ offset,
+ node_size,
+ init_leaf
+ ).si_then([FNAME, c, offset, init_leaf, begin, end]
+ (typename leaf_node_t::Ref ret) {
+ SUBDEBUGT(
+ seastore_lba_details,
+ "read leaf at offset {} {}",
+ c.trans,
+ offset,
+ *ret);
+ // This can only happen during init_cached_extent
+ if (c.pins && !ret->is_pending() && !ret->pin.is_linked()) {
+ assert(ret->is_dirty());
+ init_leaf(*ret);
+ }
+ auto meta = ret->get_meta();
+ if (ret->get_size()) {
+ ceph_assert(meta.begin <= ret->begin()->get_key());
+ ceph_assert(meta.end > (ret->end() - 1)->get_key());
+ }
+ ceph_assert(1 == meta.depth);
+ ceph_assert(begin == meta.begin);
+ ceph_assert(end == meta.end);
+ return get_leaf_node_ret(
+ interruptible::ready_future_marker{},
+ ret);
+ });
+ }
+
+ using lookup_root_iertr = base_iertr;
+ using lookup_root_ret = lookup_root_iertr::future<>;
+ lookup_root_ret lookup_root(
+ op_context_t<node_key_t> c,
+ iterator &iter,
+ mapped_space_visitor_t *visitor) const {
+ if (root.get_depth() > 1) {
+ return get_internal_node(
+ c,
+ root.get_depth(),
+ root.get_location(),
+ 0,
+ L_ADDR_MAX
+ ).si_then([this, visitor, &iter](InternalNodeRef root_node) {
+ iter.get_internal(root.get_depth()).node = root_node;
+ if (visitor) (*visitor)(root_node->get_paddr(), root_node->get_length());
+ return lookup_root_iertr::now();
+ });
+ } else {
+ return get_leaf_node(
+ c,
+ root.get_location(),
+ 0,
+ L_ADDR_MAX
+ ).si_then([visitor, &iter](LeafNodeRef root_node) {
+ iter.leaf.node = root_node;
+ if (visitor) (*visitor)(root_node->get_paddr(), root_node->get_length());
+ return lookup_root_iertr::now();
+ });
+ }
+ }
+
+ using lookup_internal_level_iertr = base_iertr;
+ using lookup_internal_level_ret = lookup_internal_level_iertr::future<>;
+ template <typename F>
+ static lookup_internal_level_ret lookup_internal_level(
+ op_context_t<node_key_t> c,
+ depth_t depth,
+ iterator &iter,
+ F &f,
+ mapped_space_visitor_t *visitor
+ ) {
+ assert(depth > 1);
+ auto &parent_entry = iter.get_internal(depth + 1);
+ auto parent = parent_entry.node;
+ auto node_iter = parent->iter_idx(parent_entry.pos);
+ auto next_iter = node_iter + 1;
+ auto begin = node_iter->get_key();
+ auto end = next_iter == parent->end()
+ ? parent->get_node_meta().end
+ : next_iter->get_key();
+ return get_internal_node(
+ c,
+ depth,
+ node_iter->get_val().maybe_relative_to(parent->get_paddr()),
+ begin,
+ end
+ ).si_then([depth, visitor, &iter, &f](InternalNodeRef node) {
+ auto &entry = iter.get_internal(depth);
+ entry.node = node;
+ auto node_iter = f(*node);
+ assert(node_iter != node->end());
+ entry.pos = node_iter->get_offset();
+ if (visitor) (*visitor)(node->get_paddr(), node->get_length());
+ return seastar::now();
+ });
+ }
+
+ using lookup_leaf_iertr = base_iertr;
+ using lookup_leaf_ret = lookup_leaf_iertr::future<>;
+ template <typename F>
+ static lookup_internal_level_ret lookup_leaf(
+ op_context_t<node_key_t> c,
+ iterator &iter,
+ F &f,
+ mapped_space_visitor_t *visitor
+ ) {
+ auto &parent_entry = iter.get_internal(2);
+ auto parent = parent_entry.node;
+ assert(parent);
+ auto node_iter = parent->iter_idx(parent_entry.pos);
+ auto next_iter = node_iter + 1;
+ auto begin = node_iter->get_key();
+ auto end = next_iter == parent->end()
+ ? parent->get_node_meta().end
+ : next_iter->get_key();
+
+ return get_leaf_node(
+ c,
+ node_iter->get_val().maybe_relative_to(parent->get_paddr()),
+ begin,
+ end
+ ).si_then([visitor, &iter, &f](LeafNodeRef node) {
+ iter.leaf.node = node;
+ auto node_iter = f(*node);
+ iter.leaf.pos = node_iter->get_offset();
+ if (visitor) (*visitor)(node->get_paddr(), node->get_length());
+ return seastar::now();
+ });
+ }
+
+ /**
+ * lookup_depth_range
+ *
+ * Performs node lookups on depths [from, to) using li and ll to
+ * specific target at each level. Note, may leave the iterator
+ * at_boundary(), call handle_boundary() prior to returning out
+ * lf FixedKVBtree.
+ */
+ using lookup_depth_range_iertr = base_iertr;
+ using lookup_depth_range_ret = lookup_depth_range_iertr::future<>;
+ template <typename LI, typename LL>
+ static lookup_depth_range_ret lookup_depth_range(
+ op_context_t<node_key_t> c, ///< [in] context
+ iterator &iter, ///< [in,out] iterator to populate
+ depth_t from, ///< [in] from inclusive
+ depth_t to, ///< [in] to exclusive, (to <= from, to == from is a noop)
+ LI &li, ///< [in] internal->iterator
+ LL &ll, ///< [in] leaf->iterator
+ mapped_space_visitor_t *visitor ///< [in] mapped space visitor
+ ) {
+ LOG_PREFIX(FixedKVBtree::lookup_depth_range);
+ SUBDEBUGT(seastore_lba_details, "{} -> {}", c.trans, from, to);
+ return seastar::do_with(
+ from,
+ [c, to, visitor, &iter, &li, &ll](auto &d) {
+ return trans_intr::repeat(
+ [c, to, visitor, &iter, &li, &ll, &d] {
+ if (d > to) {
+ return [&] {
+ if (d > 1) {
+ return lookup_internal_level(
+ c,
+ d,
+ iter,
+ li,
+ visitor);
+ } else {
+ assert(d == 1);
+ return lookup_leaf(
+ c,
+ iter,
+ ll,
+ visitor);
+ }
+ }().si_then([&d] {
+ --d;
+ return lookup_depth_range_iertr::make_ready_future<
+ seastar::stop_iteration
+ >(seastar::stop_iteration::no);
+ });
+ } else {
+ return lookup_depth_range_iertr::make_ready_future<
+ seastar::stop_iteration
+ >(seastar::stop_iteration::yes);
+ }
+ });
+ });
+ }
+
+ using lookup_iertr = base_iertr;
+ using lookup_ret = lookup_iertr::future<iterator>;
+ template <typename LI, typename LL>
+ lookup_ret lookup(
+ op_context_t<node_key_t> c,
+ LI &&lookup_internal,
+ LL &&lookup_leaf,
+ mapped_space_visitor_t *visitor
+ ) const {
+ LOG_PREFIX(FixedKVBtree::lookup);
+ return seastar::do_with(
+ iterator{root.get_depth()},
+ std::forward<LI>(lookup_internal),
+ std::forward<LL>(lookup_leaf),
+ [FNAME, this, visitor, c](auto &iter, auto &li, auto &ll) {
+ return lookup_root(
+ c, iter, visitor
+ ).si_then([FNAME, this, visitor, c, &iter, &li, &ll] {
+ if (iter.get_depth() > 1) {
+ auto &root_entry = *(iter.internal.rbegin());
+ root_entry.pos = li(*(root_entry.node)).get_offset();
+ } else {
+ auto &root_entry = iter.leaf;
+ auto riter = ll(*(root_entry.node));
+ root_entry.pos = riter->get_offset();
+ }
+ SUBDEBUGT(seastore_lba_details, "got root, depth {}", c.trans, root.get_depth());
+ return lookup_depth_range(
+ c,
+ iter,
+ root.get_depth() - 1,
+ 0,
+ li,
+ ll,
+ visitor
+ ).si_then([c, visitor, &iter] {
+ if (iter.at_boundary()) {
+ return iter.handle_boundary(c, visitor);
+ } else {
+ return lookup_iertr::now();
+ }
+ });
+ }).si_then([&iter] {
+ return std::move(iter);
+ });
+ });
+ }
+
+ /**
+ * find_insertion
+ *
+ * Prepare iter for insertion. iter should begin pointing at
+ * the valid insertion point (lower_bound(laddr)).
+ *
+ * Upon completion, iter will point at the
+ * position at which laddr should be inserted. iter may, upon completion,
+ * point at the end of a leaf other than the end leaf if that's the correct
+ * insertion point.
+ */
+ using find_insertion_iertr = base_iertr;
+ using find_insertion_ret = find_insertion_iertr::future<>;
+ static find_insertion_ret find_insertion(
+ op_context_t<node_key_t> c,
+ node_key_t laddr,
+ iterator &iter)
+ {
+ assert(iter.is_end() || iter.get_key() >= laddr);
+ if (!iter.is_end() && iter.get_key() == laddr) {
+ return seastar::now();
+ } else if (iter.leaf.node->get_node_meta().begin <= laddr) {
+#ifndef NDEBUG
+ auto p = iter;
+ if (p.leaf.pos > 0) {
+ --p.leaf.pos;
+ assert(p.get_key() < laddr);
+ }
+#endif
+ return seastar::now();
+ } else {
+ assert(iter.leaf.pos == 0);
+ return iter.prev(
+ c
+ ).si_then([laddr, &iter](auto p) {
+ boost::ignore_unused(laddr); // avoid clang warning;
+ assert(p.leaf.node->get_node_meta().begin <= laddr);
+ assert(p.get_key() < laddr);
+ // Note, this is specifically allowed to violate the iterator
+ // invariant that pos is a valid index for the node in the event
+ // that the insertion point is at the end of a node.
+ p.leaf.pos++;
+ assert(p.at_boundary());
+ iter = p;
+ return seastar::now();
+ });
+ }
+ }
+
+ /**
+ * handle_split
+ *
+ * Split nodes in iter as needed for insertion. First, scan iter from leaf
+ * to find first non-full level. Then, split from there towards leaf.
+ *
+ * Upon completion, iter will point at the newly split insertion point. As
+ * with find_insertion, iter's leaf pointer may be end without iter being
+ * end.
+ */
+ using handle_split_iertr = base_iertr;
+ using handle_split_ret = handle_split_iertr::future<>;
+ handle_split_ret handle_split(
+ op_context_t<node_key_t> c,
+ iterator &iter)
+ {
+ LOG_PREFIX(FixedKVBtree::handle_split);
+
+ depth_t split_from = iter.check_split();
+
+ SUBDEBUGT(seastore_lba_details, "split_from {}, depth {}", c.trans, split_from, iter.get_depth());
+
+ if (split_from == iter.get_depth()) {
+ auto nroot = c.cache.template alloc_new_extent<internal_node_t>(
+ c.trans, node_size);
+ fixed_kv_node_meta_t<node_key_t> meta{0, L_ADDR_MAX, iter.get_depth() + 1};
+ nroot->set_meta(meta);
+ nroot->pin.set_range(meta);
+ nroot->journal_insert(
+ nroot->begin(),
+ L_ADDR_MIN,
+ root.get_location(),
+ nullptr);
+ iter.internal.push_back({nroot, 0});
+
+ root.set_location(nroot->get_paddr());
+ root.set_depth(iter.get_depth());
+ c.trans.get_lba_tree_stats().depth = iter.get_depth();
+ root_dirty = true;
+ }
+
+ /* pos may be either node_position_t<leaf_node_t> or
+ * node_position_t<internal_node_t> */
+ auto split_level = [&](auto &parent_pos, auto &pos) {
+ LOG_PREFIX(FixedKVBtree::handle_split);
+ auto [left, right, pivot] = pos.node->make_split_children(c);
+
+ auto parent_node = parent_pos.node;
+ auto parent_iter = parent_pos.get_iter();
+
+ parent_node->update(
+ parent_iter,
+ left->get_paddr());
+ parent_node->insert(
+ parent_iter + 1,
+ pivot,
+ right->get_paddr());
+
+ SUBDEBUGT(
+ seastore_lba_details,
+ "splitted {} into left: {}, right: {}",
+ c.trans,
+ *pos.node,
+ *left,
+ *right);
+ c.cache.retire_extent(c.trans, pos.node);
+
+ return std::make_pair(left, right);
+ };
+
+ for (; split_from > 0; --split_from) {
+ auto &parent_pos = iter.get_internal(split_from + 1);
+ if (!parent_pos.node->is_pending()) {
+ parent_pos.node = c.cache.duplicate_for_write(
+ c.trans, parent_pos.node
+ )->template cast<internal_node_t>();
+ }
+
+ if (split_from > 1) {
+ auto &pos = iter.get_internal(split_from);
+ SUBDEBUGT(
+ seastore_lba_details,
+ "splitting internal {} at depth {}, parent: {} at pos: {}",
+ c.trans,
+ *pos.node,
+ split_from,
+ *parent_pos.node,
+ parent_pos.pos);
+ auto [left, right] = split_level(parent_pos, pos);
+
+ if (pos.pos < left->get_size()) {
+ pos.node = left;
+ } else {
+ pos.node = right;
+ pos.pos -= left->get_size();
+
+ parent_pos.pos += 1;
+ }
+ } else {
+ auto &pos = iter.leaf;
+ SUBDEBUGT(
+ seastore_lba_details,
+ "splitting leaf {}, parent: {} at pos: {}",
+ c.trans,
+ *pos.node,
+ *parent_pos.node,
+ parent_pos.pos);
+ auto [left, right] = split_level(parent_pos, pos);
+
+ /* right->get_node_meta().begin == pivot == right->begin()->get_key()
+ * Thus, if pos.pos == left->get_size(), we want iter to point to
+ * left with pos.pos at the end rather than right with pos.pos = 0
+ * since the insertion would be to the left of the first element
+ * of right and thus necessarily less than right->get_node_meta().begin.
+ */
+ if (pos.pos <= left->get_size()) {
+ pos.node = left;
+ } else {
+ pos.node = right;
+ pos.pos -= left->get_size();
+
+ parent_pos.pos += 1;
+ }
+ }
+ }
+
+ return seastar::now();
+ }
+
+
+ using handle_merge_iertr = base_iertr;
+ using handle_merge_ret = handle_merge_iertr::future<>;
+ handle_merge_ret handle_merge(
+ op_context_t<node_key_t> c,
+ iterator &iter)
+ {
+ LOG_PREFIX(FixedKVBtree::handle_merge);
+ if (iter.get_depth() == 1 ||
+ !iter.leaf.node->below_min_capacity()) {
+ SUBDEBUGT(
+ seastore_lba_details,
+ "no need to merge leaf, leaf size {}, depth {}",
+ c.trans,
+ iter.leaf.node->get_size(),
+ iter.get_depth());
+ return seastar::now();
+ }
+
+ return seastar::do_with(
+ depth_t{1},
+ [FNAME, this, c, &iter](auto &to_merge) {
+ return trans_intr::repeat(
+ [FNAME, this, c, &iter, &to_merge] {
+ SUBDEBUGT(
+ seastore_lba_details,
+ "merging depth {}",
+ c.trans,
+ to_merge);
+ auto &parent_pos = iter.get_internal(to_merge + 1);
+ auto merge_fut = handle_merge_iertr::now();
+ if (to_merge > 1) {
+ auto &pos = iter.get_internal(to_merge);
+ merge_fut = merge_level(c, to_merge, parent_pos, pos);
+ } else {
+ auto &pos = iter.leaf;
+ merge_fut = merge_level(c, to_merge, parent_pos, pos);
+ }
+
+ return merge_fut.si_then([FNAME, this, c, &iter, &to_merge] {
+ ++to_merge;
+ auto &pos = iter.get_internal(to_merge);
+ if (to_merge == iter.get_depth()) {
+ if (pos.node->get_size() == 1) {
+ SUBDEBUGT(seastore_lba_details, "collapsing root", c.trans);
+ c.cache.retire_extent(c.trans, pos.node);
+ assert(pos.pos == 0);
+ auto node_iter = pos.get_iter();
+ root.set_location(
+ node_iter->get_val().maybe_relative_to(pos.node->get_paddr()));
+ iter.internal.pop_back();
+ root.set_depth(iter.get_depth());
+ get_tree_stats<self_type>(c.trans).depth = iter.get_depth();
+ root_dirty = true;
+ } else {
+ SUBDEBUGT(seastore_lba_details, "no need to collapse root", c.trans);
+ }
+ return seastar::stop_iteration::yes;
+ } else if (pos.node->below_min_capacity()) {
+ SUBDEBUGT(
+ seastore_lba_details,
+ "continuing, next node {} depth {} at min",
+ c.trans,
+ *pos.node,
+ to_merge);
+ return seastar::stop_iteration::no;
+ } else {
+ SUBDEBUGT(
+ seastore_lba_details,
+ "complete, next node {} depth {} not min",
+ c.trans,
+ *pos.node,
+ to_merge);
+ return seastar::stop_iteration::yes;
+ }
+ });
+ });
+ });
+ }
+
+ template <typename T>
+ using node_position_t = typename iterator::template node_position_t<T>;
+
+ template <typename NodeType,
+ std::enable_if_t<std::is_same_v<NodeType, leaf_node_t>, int> = 0>
+ base_iertr::future<typename NodeType::Ref> get_node(
+ op_context_t<node_key_t> c,
+ depth_t depth,
+ paddr_t addr,
+ laddr_t begin,
+ laddr_t end) {
+ assert(depth == 1);
+ return get_leaf_node(c, addr, begin, end);
+ }
+
+ template <typename NodeType,
+ std::enable_if_t<std::is_same_v<NodeType, internal_node_t>, int> = 0>
+ base_iertr::future<typename NodeType::Ref> get_node(
+ op_context_t<node_key_t> c,
+ depth_t depth,
+ paddr_t addr,
+ laddr_t begin,
+ laddr_t end) {
+ return get_internal_node(c, depth, addr, begin, end);
+ }
+
+ template <typename NodeType>
+ handle_merge_ret merge_level(
+ op_context_t<node_key_t> c,
+ depth_t depth,
+ node_position_t<internal_node_t> &parent_pos,
+ node_position_t<NodeType> &pos)
+ {
+ LOG_PREFIX(FixedKVBtree::merge_level);
+ if (!parent_pos.node->is_pending()) {
+ parent_pos.node = c.cache.duplicate_for_write(
+ c.trans, parent_pos.node
+ )->template cast<internal_node_t>();
+ }
+
+ auto iter = parent_pos.get_iter();
+ assert(iter.get_offset() < parent_pos.node->get_size());
+ bool donor_is_left = ((iter.get_offset() + 1) == parent_pos.node->get_size());
+ auto donor_iter = donor_is_left ? (iter - 1) : (iter + 1);
+ auto next_iter = donor_iter + 1;
+ auto begin = donor_iter->get_key();
+ auto end = next_iter == parent_pos.node->end()
+ ? parent_pos.node->get_node_meta().end
+ : next_iter->get_key();
+
+ SUBDEBUGT(seastore_lba_details, "parent: {}, node: {}", c.trans, *parent_pos.node, *pos.node);
+ return get_node<NodeType>(
+ c,
+ depth,
+ donor_iter.get_val().maybe_relative_to(parent_pos.node->get_paddr()),
+ begin,
+ end
+ ).si_then([c, iter, donor_iter, donor_is_left, &parent_pos, &pos](
+ typename NodeType::Ref donor) {
+ LOG_PREFIX(FixedKVBtree::merge_level);
+ auto [l, r] = donor_is_left ?
+ std::make_pair(donor, pos.node) : std::make_pair(pos.node, donor);
+
+ auto [liter, riter] = donor_is_left ?
+ std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter);
+
+ if (donor->at_min_capacity()) {
+ auto replacement = l->make_full_merge(c, r);
+
+ parent_pos.node->update(
+ liter,
+ replacement->get_paddr());
+ parent_pos.node->remove(riter);
+
+ pos.node = replacement;
+ if (donor_is_left) {
+ pos.pos += r->get_size();
+ parent_pos.pos--;
+ }
+
+ SUBDEBUGT(seastore_lba_details, "l: {}, r: {}, replacement: {}", c.trans, *l, *r, *replacement);
+ c.cache.retire_extent(c.trans, l);
+ c.cache.retire_extent(c.trans, r);
+ } else {
+ LOG_PREFIX(FixedKVBtree::merge_level);
+ auto [replacement_l, replacement_r, pivot] =
+ l->make_balanced(
+ c,
+ r,
+ !donor_is_left);
+
+ parent_pos.node->update(
+ liter,
+ replacement_l->get_paddr());
+ parent_pos.node->replace(
+ riter,
+ pivot,
+ replacement_r->get_paddr());
+
+ if (donor_is_left) {
+ assert(parent_pos.pos > 0);
+ parent_pos.pos--;
+ }
+
+ auto orig_position = donor_is_left ?
+ l->get_size() + pos.pos :
+ pos.pos;
+ if (orig_position < replacement_l->get_size()) {
+ pos.node = replacement_l;
+ pos.pos = orig_position;
+ } else {
+ parent_pos.pos++;
+ pos.node = replacement_r;
+ pos.pos = orig_position - replacement_l->get_size();
+ }
+
+ SUBDEBUGT(
+ seastore_lba_details,
+ "l: {}, r: {}, replacement_l: {}, replacement_r: {}",
+ c.trans, *l, *r, *replacement_l, *replacement_r);
+ c.cache.retire_extent(c.trans, l);
+ c.cache.retire_extent(c.trans, r);
+ }
+
+ return seastar::now();
+ });
+ }
+};
+
+}
+
std::ostream &operator<<(std::ostream &out, const LBAPin &rhs)
{
- return out << "LBAPin(" << rhs.get_laddr() << "~" << rhs.get_length()
+ return out << "LBAPin(" << rhs.get_key() << "~" << rhs.get_length()
<< "->" << rhs.get_paddr();
}
};
class LogicalCachedExtent;
-class LBAPin;
-using LBAPinRef = std::unique_ptr<LBAPin>;
-class LBAPin {
+
+template <typename key_t>
+class PhysicalNodePin;
+
+template <typename key_t>
+using PhysicalNodePinRef = std::unique_ptr<PhysicalNodePin<key_t>>;
+
+template <typename key_t>
+class PhysicalNodePin {
public:
virtual void link_extent(LogicalCachedExtent *ref) = 0;
- virtual void take_pin(LBAPin &pin) = 0;
+ virtual void take_pin(PhysicalNodePin<key_t> &pin) = 0;
virtual extent_len_t get_length() const = 0;
virtual paddr_t get_paddr() const = 0;
- virtual laddr_t get_laddr() const = 0;
- virtual LBAPinRef duplicate() const = 0;
+ virtual key_t get_key() const = 0;
+ virtual PhysicalNodePinRef<key_t> duplicate() const = 0;
virtual bool has_been_invalidated() const = 0;
- virtual ~LBAPin() {}
+ virtual ~PhysicalNodePin() {}
};
+
+using LBAPin = PhysicalNodePin<laddr_t>;
+using LBAPinRef = PhysicalNodePinRef<laddr_t>;
+
std::ostream &operator<<(std::ostream &out, const LBAPin &rhs);
using lba_pin_list_t = std::list<LBAPinRef>;
void set_pin(LBAPinRef &&npin) {
assert(!pin);
pin = std::move(npin);
- laddr = pin->get_laddr();
+ laddr = pin->get_key();
pin->link_extent(this);
}
#include "include/buffer.h"
#include "crimson/os/seastore/lba_manager/btree/btree_lba_manager.h"
#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree.h"
#include "crimson/os/seastore/logging.h"
SET_SUBSYS(seastore_lba);
* - TRACE: read operations, DEBUG details
*/
+namespace crimson::os::seastore {
+
+template<>
+Transaction::tree_stats_t& get_tree_stats<
+ crimson::os::seastore::lba_manager::btree::LBABtree>(Transaction &t) {
+ return t.get_lba_tree_stats();
+}
+}
+
namespace crimson::os::seastore::lba_manager::btree {
BtreeLBAManager::mkfs_ret BtreeLBAManager::mkfs(
return is_lba_node(e.get_type());
}
-btree_range_pin_t &BtreeLBAManager::get_pin(CachedExtent &e)
+btree_range_pin_t<laddr_t> &BtreeLBAManager::get_pin(CachedExtent &e)
{
if (is_lba_node(e)) {
return e.cast<LBANode>()->pin;
} else if (e.is_logical()) {
return static_cast<BtreeLBAPin &>(
- e.cast<LogicalCachedExtent>()->get_pin()).pin;
+ e.cast<LogicalCachedExtent>()->get_pin()).get_range_pin();
} else {
ceph_abort_msg("impossible");
}
}
}
+BtreeLBAManager::base_iertr::future<> _init_cached_extent(
+ op_context_t<laddr_t> c,
+ const CachedExtentRef &e,
+ LBABtree &btree,
+ bool &ret)
+{
+ if (e->is_logical()) {
+ auto logn = e->cast<LogicalCachedExtent>();
+ return btree.lower_bound(
+ c,
+ logn->get_laddr()
+ ).si_then([e, c, logn, &ret](auto iter) {
+ LOG_PREFIX(BtreeLBAManager::init_cached_extent);
+ if (!iter.is_end() &&
+ iter.get_key() == logn->get_laddr() &&
+ iter.get_val().paddr == logn->get_paddr()) {
+ logn->set_pin(iter.get_pin());
+ ceph_assert(iter.get_val().len == e->get_length());
+ if (c.pins) {
+ c.pins->add_pin(
+ static_cast<BtreeLBAPin&>(logn->get_pin()).get_range_pin());
+ }
+ DEBUGT("logical extent {} live", c.trans, *logn);
+ ret = true;
+ } else {
+ DEBUGT("logical extent {} not live", c.trans, *logn);
+ ret = false;
+ }
+ });
+ } else {
+ return btree.init_cached_extent(c, e
+ ).si_then([&ret](bool is_alive) {
+ ret = is_alive;
+ });
+ }
+}
+
BtreeLBAManager::init_cached_extent_ret BtreeLBAManager::init_cached_extent(
Transaction &t,
CachedExtentRef e)
{
LOG_PREFIX(BtreeLBAManager::init_cached_extent);
TRACET("{}", t, *e);
- return seastar::do_with(bool(), [this, e, FNAME, &t](bool& ret) {
+ return seastar::do_with(bool(), [this, e, &t](bool &ret) {
auto c = get_context(t);
- return with_btree(c, [c, e, &ret](auto &btree) {
- return btree.init_cached_extent(c, e
- ).si_then([&ret](bool is_alive) {
- ret = is_alive;
- });
- }).si_then([&ret, e, FNAME, c] {
- DEBUGT("is_alive={} -- {}", c.trans, ret, *e);
- return ret;
- });
+ return with_btree(c, [c, e, &ret](auto &btree)
+ -> base_iertr::future<> {
+ LOG_PREFIX(BtreeLBAManager::init_cached_extent);
+ DEBUGT("extent {}", c.trans, *e);
+ return _init_cached_extent(c, e, btree, ret);
+ }).si_then([&ret] { return ret; });
});
}
return with_btree(
c,
[c, extent](auto &btree) mutable {
- return btree.rewrite_lba_extent(c, extent);
+ return btree.rewrite_extent(c, extent);
});
} else {
DEBUGT("skip non lba extent -- {}", t, *extent);
#include "common/interval_map.h"
#include "crimson/osd/exceptions.h"
+#include "crimson/os/seastore/btree/fixed_kv_btree.h"
#include "crimson/os/seastore/seastore_types.h"
#include "crimson/os/seastore/lba_manager.h"
#include "crimson/os/seastore/cache.h"
#include "crimson/os/seastore/segment_manager.h"
#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree.h"
+#include "crimson/os/seastore/btree/btree_range_pin.h"
namespace crimson::os::seastore::lba_manager::btree {
+using LBABtree = FixedKVBtree<laddr_t, lba_map_val_t, LBAInternalNode, LBALeafNode, LBA_BLOCK_SIZE>;
+
+using BtreeLBAPin = BtreeNodePin<laddr_t>;
+
/**
* BtreeLBAManager
*
void complete_transaction(
Transaction &t) final;
+ /**
+ * init_cached_extent
+ *
+ * Checks whether e is live (reachable from lba tree) and drops or initializes
+ * accordingly.
+ *
+ * Returns if e is live.
+ */
init_cached_extent_ret init_cached_extent(
Transaction &t,
CachedExtentRef e) final;
void add_pin(LBAPin &pin) final {
auto *bpin = reinterpret_cast<BtreeLBAPin*>(&pin);
- pin_set.add_pin(bpin->pin);
- bpin->parent = nullptr;
+ pin_set.add_pin(bpin->get_range_pin());
+ bpin->set_parent(nullptr);
}
~BtreeLBAManager();
SegmentManager &segment_manager;
Cache &cache;
- btree_pin_set_t pin_set;
+ btree_pin_set_t<laddr_t> pin_set;
struct {
uint64_t num_alloc_extents = 0;
uint64_t num_alloc_extents_iter_nexts = 0;
} stats;
- op_context_t get_context(Transaction &t) {
- return op_context_t{cache, t, &pin_set};
+ op_context_t<laddr_t> get_context(Transaction &t) {
+ return op_context_t<laddr_t>{cache, t, &pin_set};
}
- static btree_range_pin_t &get_pin(CachedExtent &e);
+ static btree_range_pin_t<laddr_t> &get_pin(CachedExtent &e);
seastar::metrics::metric_group metrics;
void register_metrics();
template <typename F, typename... Args>
auto with_btree(
- op_context_t c,
+ op_context_t<laddr_t> c,
F &&f) {
return cache.get_root(
c.trans
template <typename State, typename F>
auto with_btree_state(
- op_context_t c,
+ op_context_t<laddr_t> c,
State &&init,
F &&f) {
return seastar::do_with(
template <typename State, typename F>
auto with_btree_state(
- op_context_t c,
+ op_context_t<laddr_t> c,
F &&f) {
return with_btree_state<State, F>(c, State{}, std::forward<F>(f));
}
template <typename Ret, typename F>
auto with_btree_ret(
- op_context_t c,
+ op_context_t<laddr_t> c,
F &&f) {
return with_btree_state<Ret>(
c,
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h"
-#include "crimson/os/seastore/logging.h"
-
-SET_SUBSYS(seastore_lba);
-
-namespace crimson::os::seastore::lba_manager::btree {
-
-void btree_range_pin_t::take_pin(btree_range_pin_t &other)
-{
- ceph_assert(other.extent);
- if (other.pins) {
- other.pins->replace_pin(*this, other);
- pins = other.pins;
- other.pins = nullptr;
-
- if (other.has_ref()) {
- other.drop_ref();
- acquire_ref();
- }
- }
-}
-
-btree_range_pin_t::~btree_range_pin_t()
-{
- LOG_PREFIX(btree_range_pin_t::~btree_range_pin_t);
- ceph_assert(!pins == !is_linked());
- ceph_assert(!ref);
- if (pins) {
- TRACE("removing {}", *this);
- pins->remove_pin(*this, true);
- }
- extent = nullptr;
-}
-
-void btree_pin_set_t::replace_pin(btree_range_pin_t &to, btree_range_pin_t &from)
-{
- pins.replace_node(pins.iterator_to(from), to);
-}
-
-void btree_pin_set_t::remove_pin(btree_range_pin_t &pin, bool do_check_parent)
-{
- LOG_PREFIX(btree_pin_set_t::remove_pin);
- TRACE("{}", pin);
- ceph_assert(pin.is_linked());
- ceph_assert(pin.pins);
- ceph_assert(!pin.ref);
-
- pins.erase(pin);
- pin.pins = nullptr;
-
- if (do_check_parent) {
- check_parent(pin);
- }
-}
-
-btree_range_pin_t *btree_pin_set_t::maybe_get_parent(
- const lba_node_meta_t &meta)
-{
- auto cmeta = meta;
- cmeta.depth++;
- auto iter = pins.upper_bound(cmeta, btree_range_pin_t::meta_cmp_t());
- if (iter == pins.begin()) {
- return nullptr;
- } else {
- --iter;
- if (iter->range.is_parent_of(meta)) {
- return &*iter;
- } else {
- return nullptr;
- }
- }
-}
-
-const btree_range_pin_t *btree_pin_set_t::maybe_get_first_child(
- const lba_node_meta_t &meta) const
-{
- if (meta.depth == 0) {
- return nullptr;
- }
-
- auto cmeta = meta;
- cmeta.depth--;
-
- auto iter = pins.lower_bound(cmeta, btree_range_pin_t::meta_cmp_t());
- if (iter == pins.end()) {
- return nullptr;
- } else if (meta.is_parent_of(iter->range)) {
- return &*iter;
- } else {
- return nullptr;
- }
-}
-
-void btree_pin_set_t::release_if_no_children(btree_range_pin_t &pin)
-{
- ceph_assert(pin.is_linked());
- if (maybe_get_first_child(pin.range) == nullptr) {
- pin.drop_ref();
- }
-}
-
-void btree_pin_set_t::add_pin(btree_range_pin_t &pin)
-{
- LOG_PREFIX(btree_pin_set_t::add_pin);
- ceph_assert(!pin.is_linked());
- ceph_assert(!pin.pins);
- ceph_assert(!pin.ref);
-
- auto [prev, inserted] = pins.insert(pin);
- if (!inserted) {
- ERROR("unable to add {} ({}), found {} ({})",
- pin,
- *(pin.extent),
- *prev,
- *(prev->extent));
- ceph_assert(0 == "impossible");
- return;
- }
- pin.pins = this;
- if (!pin.is_root()) {
- auto *parent = maybe_get_parent(pin.range);
- ceph_assert(parent);
- if (!parent->has_ref()) {
- TRACE("acquiring parent {}", static_cast<void*>(parent));
- parent->acquire_ref();
- } else {
- TRACE("parent has ref {}", static_cast<void*>(parent));
- }
- }
- if (maybe_get_first_child(pin.range) != nullptr) {
- TRACE("acquiring self {}", pin);
- pin.acquire_ref();
- }
-}
-
-void btree_pin_set_t::retire(btree_range_pin_t &pin)
-{
- pin.drop_ref();
- remove_pin(pin, false);
-}
-
-void btree_pin_set_t::check_parent(btree_range_pin_t &pin)
-{
- LOG_PREFIX(btree_pin_set_t::check_parent);
- auto parent = maybe_get_parent(pin.range);
- if (parent) {
- TRACE("releasing parent {}", *parent);
- release_if_no_children(*parent);
- }
-}
-
-}
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#pragma once
-
-#include <boost/intrusive/set.hpp>
-
-#include "crimson/os/seastore/cached_extent.h"
-#include "crimson/os/seastore/seastore_types.h"
-
-namespace crimson::os::seastore::lba_manager::btree {
-
-class LBANode;
-using LBANodeRef = TCachedExtentRef<LBANode>;
-
-struct lba_node_meta_t {
- laddr_t begin = 0;
- laddr_t end = 0;
- depth_t depth = 0;
-
- bool is_parent_of(const lba_node_meta_t &other) const {
- return (depth == other.depth + 1) &&
- (begin <= other.begin) &&
- (end > other.begin);
- }
-
- std::pair<lba_node_meta_t, lba_node_meta_t> split_into(laddr_t pivot) const {
- return std::make_pair(
- lba_node_meta_t{begin, pivot, depth},
- lba_node_meta_t{pivot, end, depth});
- }
-
- static lba_node_meta_t merge_from(
- const lba_node_meta_t &lhs, const lba_node_meta_t &rhs) {
- ceph_assert(lhs.depth == rhs.depth);
- return lba_node_meta_t{lhs.begin, rhs.end, lhs.depth};
- }
-
- static std::pair<lba_node_meta_t, lba_node_meta_t>
- rebalance(const lba_node_meta_t &lhs, const lba_node_meta_t &rhs, laddr_t pivot) {
- ceph_assert(lhs.depth == rhs.depth);
- return std::make_pair(
- lba_node_meta_t{lhs.begin, pivot, lhs.depth},
- lba_node_meta_t{pivot, rhs.end, lhs.depth});
- }
-
- bool is_root() const {
- return begin == 0 && end == L_ADDR_MAX;
- }
-};
-
-inline std::ostream &operator<<(
- std::ostream &lhs,
- const lba_node_meta_t &rhs)
-{
- return lhs << "btree_node_meta_t("
- << "begin=" << rhs.begin
- << ", end=" << rhs.end
- << ", depth=" << rhs.depth
- << ")";
-}
-
-/**
- * btree_range_pin_t
- *
- * Element tracked by btree_pin_set_t below. Encapsulates the intrusive_set
- * hook, the lba_node_meta_t representing the lba range covered by a node,
- * and extent and ref members intended to hold a reference when the extent
- * should be pinned.
- */
-class btree_pin_set_t;
-class btree_range_pin_t : public boost::intrusive::set_base_hook<> {
- friend class btree_pin_set_t;
- lba_node_meta_t range;
-
- btree_pin_set_t *pins = nullptr;
-
- // We need to be able to remember extent without holding a reference,
- // but we can do it more compactly -- TODO
- CachedExtent *extent = nullptr;
- CachedExtentRef ref;
-
- using index_t = boost::intrusive::set<btree_range_pin_t>;
-
- static auto get_tuple(const lba_node_meta_t &meta) {
- return std::make_tuple(-meta.depth, meta.begin);
- }
-
- void acquire_ref() {
- ref = CachedExtentRef(extent);
- }
-
- void drop_ref() {
- ref.reset();
- }
-
-public:
- btree_range_pin_t() = default;
- btree_range_pin_t(CachedExtent *extent)
- : extent(extent) {}
- btree_range_pin_t(const btree_range_pin_t &rhs, CachedExtent *extent)
- : range(rhs.range), extent(extent) {}
-
- bool has_ref() const {
- return !!ref;
- }
-
- bool is_root() const {
- return range.is_root();
- }
-
- void set_range(const lba_node_meta_t &nrange) {
- range = nrange;
- }
- void set_extent(CachedExtent *nextent) {
- ceph_assert(!extent);
- extent = nextent;
- }
-
- CachedExtent &get_extent() {
- assert(extent);
- return *extent;
- }
-
- bool has_ref() {
- return !!ref;
- }
-
- void take_pin(btree_range_pin_t &other);
-
- friend bool operator<(
- const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
- return get_tuple(lhs.range) < get_tuple(rhs.range);
- }
- friend bool operator>(
- const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
- return get_tuple(lhs.range) > get_tuple(rhs.range);
- }
- friend bool operator==(
- const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
- return get_tuple(lhs.range) == rhs.get_tuple(rhs.range);
- }
-
- struct meta_cmp_t {
- bool operator()(
- const btree_range_pin_t &lhs, const lba_node_meta_t &rhs) const {
- return get_tuple(lhs.range) < get_tuple(rhs);
- }
- bool operator()(
- const lba_node_meta_t &lhs, const btree_range_pin_t &rhs) const {
- return get_tuple(lhs) < get_tuple(rhs.range);
- }
- };
-
- friend std::ostream &operator<<(
- std::ostream &lhs,
- const btree_range_pin_t &rhs) {
- return lhs << "btree_range_pin_t("
- << "begin=" << rhs.range.begin
- << ", end=" << rhs.range.end
- << ", depth=" << rhs.range.depth
- << ", extent=" << rhs.extent
- << ")";
- }
-
- friend class BtreeLBAPin;
- ~btree_range_pin_t();
-};
-
-/**
- * btree_pin_set_t
- *
- * Ensures that for every cached node, all parent LBANodes required
- * to map it are present in cache. Relocating these nodes can
- * therefore be done without further reads or cache space.
- *
- * Contains a btree_range_pin_t for every clean or dirty LBANode
- * or LogicalCachedExtent instance in cache at any point in time.
- * For any LBANode, the contained btree_range_pin_t will hold
- * a reference to that node pinning it in cache as long as that
- * node has children in the set. This invariant can be violated
- * only by calling retire_extent and is repaired by calling
- * check_parent synchronously after adding any new extents.
- */
-class btree_pin_set_t {
- friend class btree_range_pin_t;
- using pins_t = btree_range_pin_t::index_t;
- pins_t pins;
-
- /// Removes pin from set optionally checking whether parent has other children
- void remove_pin(btree_range_pin_t &pin, bool check_parent);
-
- void replace_pin(btree_range_pin_t &to, btree_range_pin_t &from);
-
- /// Returns parent pin if exists
- btree_range_pin_t *maybe_get_parent(const lba_node_meta_t &pin);
-
- /// Returns earliest child pin if exist
- const btree_range_pin_t *maybe_get_first_child(const lba_node_meta_t &pin) const;
-
- /// Releases pin if it has no children
- void release_if_no_children(btree_range_pin_t &pin);
-
-public:
- /// Adds pin to set, assumes set is consistent
- void add_pin(btree_range_pin_t &pin);
-
- /**
- * retire/check_parent
- *
- * See BtreeLBAManager::complete_transaction.
- * retire removes the specified pin from the set, but does not
- * check parents. After any new extents are added to the set,
- * the caller is required to call check_parent to restore the
- * invariant.
- */
- void retire(btree_range_pin_t &pin);
- void check_parent(btree_range_pin_t &pin);
-
- template <typename F>
- void scan(F &&f) {
- for (auto &i : pins) {
- std::invoke(f, i);
- }
- }
-
- ~btree_pin_set_t() {
- ceph_assert(pins.empty());
- }
-};
-
-class BtreeLBAPin : public LBAPin {
- friend class BtreeLBAManager;
- friend class LBABtree;
-
- /**
- * parent
- *
- * populated until link_extent is called to ensure cache residence
- * until add_pin is called.
- */
- CachedExtentRef parent;
-
- paddr_t paddr;
- btree_range_pin_t pin;
-
-public:
- BtreeLBAPin() = default;
-
- BtreeLBAPin(
- CachedExtentRef parent,
- paddr_t paddr,
- lba_node_meta_t &&meta)
- : parent(parent), paddr(paddr) {
- pin.set_range(std::move(meta));
- }
-
- void link_extent(LogicalCachedExtent *ref) final {
- pin.set_extent(ref);
- }
-
- extent_len_t get_length() const final {
- ceph_assert(pin.range.end > pin.range.begin);
- return pin.range.end - pin.range.begin;
- }
-
- paddr_t get_paddr() const final {
- return paddr;
- }
-
- laddr_t get_laddr() const final {
- return pin.range.begin;
- }
-
- LBAPinRef duplicate() const final {
- auto ret = std::unique_ptr<BtreeLBAPin>(new BtreeLBAPin);
- ret->pin.set_range(pin.range);
- ret->paddr = paddr;
- ret->parent = parent;
- return ret;
- }
-
- void take_pin(LBAPin &opin) final {
- pin.take_pin(static_cast<BtreeLBAPin&>(opin).pin);
- }
-
- bool has_been_invalidated() const final {
- return parent->has_been_invalidated();
- }
-};
-
-}
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#include "crimson/os/seastore/lba_manager/btree/lba_btree.h"
-
-SET_SUBSYS(seastore_lba_details);
-
-namespace crimson::os::seastore::lba_manager::btree {
-
-LBABtree::mkfs_ret LBABtree::mkfs(op_context_t c)
-{
- auto root_leaf = c.cache.alloc_new_extent<LBALeafNode>(
- c.trans,
- LBA_BLOCK_SIZE);
- root_leaf->set_size(0);
- lba_node_meta_t meta{0, L_ADDR_MAX, 1};
- root_leaf->set_meta(meta);
- root_leaf->pin.set_range(meta);
- c.trans.get_lba_tree_stats().depth = 1u;
- return lba_root_t{root_leaf->get_paddr(), 1u};
-}
-
-LBABtree::iterator::handle_boundary_ret LBABtree::iterator::handle_boundary(
- op_context_t c,
- mapped_space_visitor_t *visitor)
-{
- assert(at_boundary());
- depth_t depth_with_space = 2;
- for (; depth_with_space <= get_depth(); ++depth_with_space) {
- if ((get_internal(depth_with_space).pos + 1) <
- get_internal(depth_with_space).node->get_size()) {
- break;
- }
- }
-
- if (depth_with_space <= get_depth()) {
- return seastar::do_with(
- [](const LBAInternalNode &internal) { return internal.begin(); },
- [](const LBALeafNode &leaf) { return leaf.begin(); },
- [this, c, depth_with_space, visitor](auto &li, auto &ll) {
- for (depth_t depth = 2; depth < depth_with_space; ++depth) {
- get_internal(depth).reset();
- }
- leaf.reset();
- get_internal(depth_with_space).pos++;
- // note, cannot result in at_boundary() by construction
- return lookup_depth_range(
- c, *this, depth_with_space - 1, 0, li, ll, visitor
- );
- });
- } else {
- // end
- return seastar::now();
- }
-}
-
-LBABtree::iterator_fut LBABtree::iterator::next(
- op_context_t c,
- mapped_space_visitor_t *visitor) const
-{
- assert_valid();
- assert(!is_end());
-
- auto ret = *this;
- ret.leaf.pos++;
- if (ret.at_boundary()) {
- return seastar::do_with(
- ret,
- [c, visitor](auto &ret) mutable {
- return ret.handle_boundary(
- c, visitor
- ).si_then([&ret] {
- return std::move(ret);
- });
- });
- } else {
- return iterator_fut(
- interruptible::ready_future_marker{},
- ret);
- }
-
-}
-
-LBABtree::iterator_fut LBABtree::iterator::prev(op_context_t c) const
-{
- assert_valid();
- assert(!is_begin());
-
- auto ret = *this;
-
- if (ret.leaf.pos > 0) {
- ret.leaf.pos--;
- return iterator_fut(
- interruptible::ready_future_marker{},
- ret);
- }
-
- depth_t depth_with_space = 2;
- for (; depth_with_space <= get_depth(); ++depth_with_space) {
- if (ret.get_internal(depth_with_space).pos > 0) {
- break;
- }
- }
-
- assert(depth_with_space <= ret.get_depth()); // must not be begin()
- return seastar::do_with(
- std::move(ret),
- [](const LBAInternalNode &internal) { return --internal.end(); },
- [](const LBALeafNode &leaf) { return --leaf.end(); },
- [c, depth_with_space](auto &ret, auto &li, auto &ll) {
- for (depth_t depth = 2; depth < depth_with_space; ++depth) {
- ret.get_internal(depth).reset();
- }
- ret.leaf.reset();
- ret.get_internal(depth_with_space).pos--;
- // note, cannot result in at_boundary() by construction
- return lookup_depth_range(
- c, ret, depth_with_space - 1, 0, li, ll, nullptr
- ).si_then([&ret] {
- assert(!ret.at_boundary());
- return std::move(ret);
- });
- });
-}
-
-LBABtree::iterator_fut LBABtree::lower_bound(
- op_context_t c,
- laddr_t addr,
- mapped_space_visitor_t *visitor) const
-{
- LOG_PREFIX(LBATree::lower_bound);
- return lookup(
- c,
- [addr](const LBAInternalNode &internal) {
- assert(internal.get_size() > 0);
- auto iter = internal.upper_bound(addr);
- assert(iter != internal.begin());
- --iter;
- return iter;
- },
- [FNAME, c, addr](const LBALeafNode &leaf) {
- auto ret = leaf.lower_bound(addr);
- DEBUGT(
- "leaf addr {}, got ret offset {}, size {}, end {}",
- c.trans,
- addr,
- ret.get_offset(),
- leaf.get_size(),
- ret == leaf.end());
- return ret;
- },
- visitor
- ).si_then([FNAME, c](auto &&ret) {
- DEBUGT(
- "ret.leaf.pos {}",
- c.trans,
- ret.leaf.pos);
- ret.assert_valid();
- return std::move(ret);
- });
-}
-
-LBABtree::insert_ret LBABtree::insert(
- op_context_t c,
- iterator iter,
- laddr_t laddr,
- lba_map_val_t val)
-{
- LOG_PREFIX(LBATree::insert);
- DEBUGT(
- "inserting laddr {} at iter {}",
- c.trans,
- laddr,
- iter.is_end() ? L_ADDR_MAX : iter.get_key());
- return seastar::do_with(
- iter,
- [this, c, laddr, val](auto &ret) {
- return find_insertion(
- c, laddr, ret
- ).si_then([this, c, laddr, val, &ret] {
- if (!ret.at_boundary() && ret.get_key() == laddr) {
- return insert_ret(
- interruptible::ready_future_marker{},
- std::make_pair(ret, false));
- } else {
- ++(c.trans.get_lba_tree_stats().num_inserts);
- return handle_split(
- c, ret
- ).si_then([c, laddr, val, &ret] {
- if (!ret.leaf.node->is_pending()) {
- CachedExtentRef mut = c.cache.duplicate_for_write(
- c.trans, ret.leaf.node
- );
- ret.leaf.node = mut->cast<LBALeafNode>();
- }
- auto iter = LBALeafNode::const_iterator(
- ret.leaf.node.get(), ret.leaf.pos);
- assert(iter == ret.leaf.node->lower_bound(laddr));
- assert(iter == ret.leaf.node->end() || iter->get_key() > laddr);
- assert(laddr >= ret.leaf.node->get_meta().begin &&
- laddr < ret.leaf.node->get_meta().end);
- ret.leaf.node->insert(iter, laddr, val);
- return insert_ret(
- interruptible::ready_future_marker{},
- std::make_pair(ret, true));
- });
- }
- });
- });
-}
-
-LBABtree::update_ret LBABtree::update(
- op_context_t c,
- iterator iter,
- lba_map_val_t val)
-{
- LOG_PREFIX(LBATree::update);
- DEBUGT(
- "update element at {}",
- c.trans,
- iter.is_end() ? L_ADDR_MAX : iter.get_key());
- if (!iter.leaf.node->is_pending()) {
- CachedExtentRef mut = c.cache.duplicate_for_write(
- c.trans, iter.leaf.node
- );
- iter.leaf.node = mut->cast<LBALeafNode>();
- }
- iter.leaf.node->update(
- iter.leaf.node->iter_idx(iter.leaf.pos),
- val);
- return update_ret(
- interruptible::ready_future_marker{},
- iter);
-}
-
-LBABtree::remove_ret LBABtree::remove(
- op_context_t c,
- iterator iter)
-{
- LOG_PREFIX(LBATree::remove);
- DEBUGT(
- "remove element at {}",
- c.trans,
- iter.is_end() ? L_ADDR_MAX : iter.get_key());
- assert(!iter.is_end());
- ++(c.trans.get_lba_tree_stats().num_erases);
- return seastar::do_with(
- iter,
- [this, c](auto &ret) {
- if (!ret.leaf.node->is_pending()) {
- CachedExtentRef mut = c.cache.duplicate_for_write(
- c.trans, ret.leaf.node
- );
- ret.leaf.node = mut->cast<LBALeafNode>();
- }
- ret.leaf.node->remove(
- ret.leaf.node->iter_idx(ret.leaf.pos));
-
- return handle_merge(
- c, ret
- );
- });
-}
-
-LBABtree::init_cached_extent_ret LBABtree::init_cached_extent(
- op_context_t c,
- CachedExtentRef e)
-{
- LOG_PREFIX(LBATree::init_cached_extent);
- DEBUGT("extent {}", c.trans, *e);
- if (e->is_logical()) {
- auto logn = e->cast<LogicalCachedExtent>();
- return lower_bound(
- c,
- logn->get_laddr()
- ).si_then([FNAME, e, c, logn](auto iter) {
- if (!iter.is_end() &&
- iter.get_key() == logn->get_laddr() &&
- iter.get_val().paddr == logn->get_paddr()) {
- logn->set_pin(iter.get_pin());
- ceph_assert(iter.get_val().len == e->get_length());
- if (c.pins) {
- c.pins->add_pin(
- static_cast<BtreeLBAPin&>(logn->get_pin()).pin);
- }
- DEBUGT("logical extent {} live", c.trans, *logn);
- return true;
- } else {
- DEBUGT("logical extent {} not live", c.trans, *logn);
- return false;
- }
- });
- } else if (e->get_type() == extent_types_t::LADDR_INTERNAL) {
- auto eint = e->cast<LBAInternalNode>();
- return lower_bound(
- c, eint->get_node_meta().begin
- ).si_then([FNAME, e, c, eint](auto iter) {
- // Note, this check is valid even if iter.is_end()
- depth_t cand_depth = eint->get_node_meta().depth;
- if (cand_depth <= iter.get_depth() &&
- &*iter.get_internal(cand_depth).node == &*eint) {
- DEBUGT("extent {} is live", c.trans, *eint);
- return true;
- } else {
- DEBUGT("extent {} is not live", c.trans, *eint);
- return false;
- }
- });
- } else if (e->get_type() == extent_types_t::LADDR_LEAF) {
- auto eleaf = e->cast<LBALeafNode>();
- return lower_bound(
- c, eleaf->get_node_meta().begin
- ).si_then([FNAME, c, e, eleaf](auto iter) {
- // Note, this check is valid even if iter.is_end()
- if (iter.leaf.node == &*eleaf) {
- DEBUGT("extent {} is live", c.trans, *eleaf);
- return true;
- } else {
- DEBUGT("extent {} is not live", c.trans, *eleaf);
- return false;
- }
- });
- } else {
- DEBUGT(
- "found other extent {} type {}",
- c.trans,
- *e,
- e->get_type());
- return init_cached_extent_ret(
- interruptible::ready_future_marker{},
- true);
- }
-}
-
-LBABtree::get_internal_if_live_ret
-LBABtree::get_internal_if_live(
- op_context_t c,
- paddr_t addr,
- laddr_t laddr,
- seastore_off_t len)
-{
- LOG_PREFIX(LBABtree::get_internal_if_live);
- return lower_bound(
- c, laddr
- ).si_then([FNAME, c, addr, laddr, len](auto iter) {
- for (depth_t d = 2; d <= iter.get_depth(); ++d) {
- CachedExtent &node = *iter.get_internal(d).node;
- auto internal_node = node.cast<LBAInternalNode>();
- if (internal_node->get_paddr() == addr) {
- DEBUGT(
- "extent laddr {} addr {}~{} found: {}",
- c.trans,
- laddr,
- addr,
- len,
- *internal_node);
- assert(internal_node->get_node_meta().begin == laddr);
- return CachedExtentRef(internal_node);
- }
- }
- DEBUGT(
- "extent laddr {} addr {}~{} is not live, no matching internal node",
- c.trans,
- laddr,
- addr,
- len);
- return CachedExtentRef();
- });
-}
-
-LBABtree::get_leaf_if_live_ret
-LBABtree::get_leaf_if_live(
- op_context_t c,
- paddr_t addr,
- laddr_t laddr,
- seastore_off_t len)
-{
- LOG_PREFIX(LBABtree::get_leaf_if_live);
- return lower_bound(
- c, laddr
- ).si_then([FNAME, c, addr, laddr, len](auto iter) {
- if (iter.leaf.node->get_paddr() == addr) {
- DEBUGT(
- "extent laddr {} addr {}~{} found: {}",
- c.trans,
- laddr,
- addr,
- len,
- *iter.leaf.node);
- return CachedExtentRef(iter.leaf.node);
- } else {
- DEBUGT(
- "extent laddr {} addr {}~{} is not live, does not match node {}",
- c.trans,
- laddr,
- addr,
- len,
- *iter.leaf.node);
- return CachedExtentRef();
- }
- });
-}
-
-
-LBABtree::rewrite_lba_extent_ret LBABtree::rewrite_lba_extent(
- op_context_t c,
- CachedExtentRef e)
-{
- LOG_PREFIX(LBABtree::rewrite_lba_extent);
- assert(e->get_type() == extent_types_t::LADDR_INTERNAL ||
- e->get_type() == extent_types_t::LADDR_LEAF);
-
- auto do_rewrite = [&](auto &lba_extent) {
- auto nlba_extent = c.cache.alloc_new_extent<
- std::remove_reference_t<decltype(lba_extent)>
- >(
- c.trans,
- lba_extent.get_length());
- lba_extent.get_bptr().copy_out(
- 0,
- lba_extent.get_length(),
- nlba_extent->get_bptr().c_str());
- nlba_extent->pin.set_range(nlba_extent->get_node_meta());
- nlba_extent->set_last_modified(lba_extent.get_last_modified());
-
- /* This is a bit underhanded. Any relative addrs here must necessarily
- * be record relative as we are rewriting a dirty extent. Thus, we
- * are using resolve_relative_addrs with a (likely negative) block
- * relative offset to correct them to block-relative offsets adjusted
- * for our new transaction location.
- *
- * Upon commit, these now block relative addresses will be interpretted
- * against the real final address.
- */
- nlba_extent->resolve_relative_addrs(
- make_record_relative_paddr(0) - nlba_extent->get_paddr());
-
- DEBUGT(
- "rewriting {} into {}",
- c.trans,
- lba_extent,
- *nlba_extent);
-
- return update_internal_mapping(
- c,
- nlba_extent->get_node_meta().depth,
- nlba_extent->get_node_meta().begin,
- e->get_paddr(),
- nlba_extent->get_paddr()
- ).si_then([c, e] {
- c.cache.retire_extent(c.trans, e);
- });
- };
-
- CachedExtentRef nlba_extent;
- if (e->get_type() == extent_types_t::LADDR_INTERNAL) {
- auto lint = e->cast<LBAInternalNode>();
- return do_rewrite(*lint);
- } else {
- assert(e->get_type() == extent_types_t::LADDR_LEAF);
- auto lleaf = e->cast<LBALeafNode>();
- return do_rewrite(*lleaf);
- }
-}
-
-LBABtree::get_internal_node_ret LBABtree::get_internal_node(
- op_context_t c,
- depth_t depth,
- paddr_t offset,
- laddr_t begin,
- laddr_t end)
-{
- LOG_PREFIX(LBATree::get_internal_node);
- DEBUGT(
- "reading internal at offset {}, depth {}, begin {}, end {}",
- c.trans,
- offset,
- depth,
- begin,
- end);
- assert(depth > 1);
- auto init_internal = [c, depth, begin, end](LBAInternalNode &node) {
- assert(!node.is_pending());
- assert(!node.pin.is_linked());
- node.pin.set_range(lba_node_meta_t{begin, end, depth});
- if (c.pins) {
- c.pins->add_pin(node.pin);
- }
- };
- return c.cache.get_extent<LBAInternalNode>(
- c.trans,
- offset,
- LBA_BLOCK_SIZE,
- init_internal
- ).si_then([FNAME, c, offset, init_internal, depth, begin, end](
- LBAInternalNodeRef ret) {
- DEBUGT(
- "read internal at offset {} {}",
- c.trans,
- offset,
- *ret);
- // This can only happen during init_cached_extent
- if (c.pins && !ret->is_pending() && !ret->pin.is_linked()) {
- assert(ret->is_dirty());
- init_internal(*ret);
- }
- auto meta = ret->get_meta();
- if (ret->get_size()) {
- ceph_assert(meta.begin <= ret->begin()->get_key());
- ceph_assert(meta.end > (ret->end() - 1)->get_key());
- }
- ceph_assert(depth == meta.depth);
- ceph_assert(begin == meta.begin);
- ceph_assert(end == meta.end);
- return get_internal_node_ret(
- interruptible::ready_future_marker{},
- ret);
- });
-}
-
-LBABtree::get_leaf_node_ret LBABtree::get_leaf_node(
- op_context_t c,
- paddr_t offset,
- laddr_t begin,
- laddr_t end)
-{
- LOG_PREFIX(LBATree::get_leaf_node);
- DEBUGT(
- "reading leaf at offset {}, begin {}, end {}",
- c.trans,
- offset,
- begin,
- end);
- auto init_leaf = [c, begin, end](LBALeafNode &node) {
- assert(!node.is_pending());
- assert(!node.pin.is_linked());
- node.pin.set_range(lba_node_meta_t{begin, end, 1});
- if (c.pins) {
- c.pins->add_pin(node.pin);
- }
- };
- return c.cache.get_extent<LBALeafNode>(
- c.trans,
- offset,
- LBA_BLOCK_SIZE,
- init_leaf
- ).si_then([FNAME, c, offset, init_leaf, begin, end](LBALeafNodeRef ret) {
- DEBUGT(
- "read leaf at offset {} {}",
- c.trans,
- offset,
- *ret);
- // This can only happen during init_cached_extent
- if (c.pins && !ret->is_pending() && !ret->pin.is_linked()) {
- assert(ret->is_dirty());
- init_leaf(*ret);
- }
- auto meta = ret->get_meta();
- if (ret->get_size()) {
- ceph_assert(meta.begin <= ret->begin()->get_key());
- ceph_assert(meta.end > (ret->end() - 1)->get_key());
- }
- ceph_assert(1 == meta.depth);
- ceph_assert(begin == meta.begin);
- ceph_assert(end == meta.end);
- return get_leaf_node_ret(
- interruptible::ready_future_marker{},
- ret);
- });
-}
-
-LBABtree::find_insertion_ret LBABtree::find_insertion(
- op_context_t c,
- laddr_t laddr,
- iterator &iter)
-{
- assert(iter.is_end() || iter.get_key() >= laddr);
- if (!iter.is_end() && iter.get_key() == laddr) {
- return seastar::now();
- } else if (iter.leaf.node->get_node_meta().begin <= laddr) {
-#ifndef NDEBUG
- auto p = iter;
- if (p.leaf.pos > 0) {
- --p.leaf.pos;
- assert(p.get_key() < laddr);
- }
-#endif
- return seastar::now();
- } else {
- assert(iter.leaf.pos == 0);
- return iter.prev(
- c
- ).si_then([laddr, &iter](auto p) {
- boost::ignore_unused(laddr); // avoid clang warning;
- assert(p.leaf.node->get_node_meta().begin <= laddr);
- assert(p.get_key() < laddr);
- // Note, this is specifically allowed to violate the iterator
- // invariant that pos is a valid index for the node in the event
- // that the insertion point is at the end of a node.
- p.leaf.pos++;
- assert(p.at_boundary());
- iter = p;
- return seastar::now();
- });
- }
-}
-
-LBABtree::handle_split_ret LBABtree::handle_split(
- op_context_t c,
- iterator &iter)
-{
- LOG_PREFIX(LBATree::handle_split);
-
- depth_t split_from = iter.check_split();
-
- DEBUGT("split_from {}, depth {}", c.trans, split_from, iter.get_depth());
-
- if (split_from == iter.get_depth()) {
- auto nroot = c.cache.alloc_new_extent<LBAInternalNode>(
- c.trans, LBA_BLOCK_SIZE);
- lba_node_meta_t meta{0, L_ADDR_MAX, iter.get_depth() + 1};
- nroot->set_meta(meta);
- nroot->pin.set_range(meta);
- nroot->journal_insert(
- std::cbegin(*nroot),
- L_ADDR_MIN,
- root.get_location(),
- nullptr);
- iter.internal.push_back({nroot, 0});
-
- root.set_location(nroot->get_paddr());
- root.set_depth(iter.get_depth());
- c.trans.get_lba_tree_stats().depth = iter.get_depth();
- root_dirty = true;
- }
-
- /* pos may be either node_position_t<LBALeafNode> or
- * node_position_t<LBAInternalNode> */
- auto split_level = [&, FNAME](auto &parent_pos, auto &pos) {
- auto [left, right, pivot] = pos.node->make_split_children(c);
-
- auto parent_node = parent_pos.node;
- auto parent_iter = parent_pos.get_iter();
-
- parent_node->update(
- parent_iter,
- left->get_paddr());
- parent_node->insert(
- parent_iter + 1,
- pivot,
- right->get_paddr());
-
- DEBUGT("splitted {} into left: {}, right: {}",
- c.trans,
- *pos.node,
- *left,
- *right);
- c.cache.retire_extent(c.trans, pos.node);
-
- return std::make_pair(left, right);
- };
-
- for (; split_from > 0; --split_from) {
- auto &parent_pos = iter.get_internal(split_from + 1);
- if (!parent_pos.node->is_pending()) {
- parent_pos.node = c.cache.duplicate_for_write(
- c.trans, parent_pos.node
- )->cast<LBAInternalNode>();
- }
-
- if (split_from > 1) {
- auto &pos = iter.get_internal(split_from);
- DEBUGT("splitting internal {} at depth {}, parent: {} at pos: {}",
- c.trans,
- *pos.node,
- split_from,
- *parent_pos.node,
- parent_pos.pos);
- auto [left, right] = split_level(parent_pos, pos);
-
- if (pos.pos < left->get_size()) {
- pos.node = left;
- } else {
- pos.node = right;
- pos.pos -= left->get_size();
-
- parent_pos.pos += 1;
- }
- } else {
- auto &pos = iter.leaf;
- DEBUGT("splitting leaf {}, parent: {} at pos: {}",
- c.trans,
- *pos.node,
- *parent_pos.node,
- parent_pos.pos);
- auto [left, right] = split_level(parent_pos, pos);
-
- /* right->get_node_meta().begin == pivot == right->begin()->get_key()
- * Thus, if pos.pos == left->get_size(), we want iter to point to
- * left with pos.pos at the end rather than right with pos.pos = 0
- * since the insertion would be to the left of the first element
- * of right and thus necessarily less than right->get_node_meta().begin.
- */
- if (pos.pos <= left->get_size()) {
- pos.node = left;
- } else {
- pos.node = right;
- pos.pos -= left->get_size();
-
- parent_pos.pos += 1;
- }
- }
- }
-
- return seastar::now();
-}
-
-template <typename NodeType>
-LBABtree::base_iertr::future<typename NodeType::Ref> get_node(
- op_context_t c,
- depth_t depth,
- paddr_t addr,
- laddr_t begin,
- laddr_t end);
-
-template <>
-LBABtree::base_iertr::future<LBALeafNodeRef> get_node<LBALeafNode>(
- op_context_t c,
- depth_t depth,
- paddr_t addr,
- laddr_t begin,
- laddr_t end) {
- assert(depth == 1);
- return LBABtree::get_leaf_node(c, addr, begin, end);
-}
-
-template <>
-LBABtree::base_iertr::future<LBAInternalNodeRef> get_node<LBAInternalNode>(
- op_context_t c,
- depth_t depth,
- paddr_t addr,
- laddr_t begin,
- laddr_t end) {
- return LBABtree::get_internal_node(c, depth, addr, begin, end);
-}
-
-template <typename NodeType>
-LBABtree::handle_merge_ret merge_level(
- op_context_t c,
- depth_t depth,
- LBABtree::node_position_t<LBAInternalNode> &parent_pos,
- LBABtree::node_position_t<NodeType> &pos)
-{
- LOG_PREFIX(LBABtree::merge_level);
- if (!parent_pos.node->is_pending()) {
- parent_pos.node = c.cache.duplicate_for_write(
- c.trans, parent_pos.node
- )->cast<LBAInternalNode>();
- }
-
- auto iter = parent_pos.get_iter();
- assert(iter.get_offset() < parent_pos.node->get_size());
- bool donor_is_left = ((iter.get_offset() + 1) == parent_pos.node->get_size());
- auto donor_iter = donor_is_left ? (iter - 1) : (iter + 1);
- auto next_iter = donor_iter + 1;
- auto begin = donor_iter->get_key();
- auto end = next_iter == parent_pos.node->end()
- ? parent_pos.node->get_node_meta().end
- : next_iter->get_key();
-
- DEBUGT("parent: {}, node: {}", c.trans, *parent_pos.node, *pos.node);
- return get_node<NodeType>(
- c,
- depth,
- donor_iter.get_val().maybe_relative_to(parent_pos.node->get_paddr()),
- begin,
- end
- ).si_then([FNAME, c, iter, donor_iter, donor_is_left, &parent_pos, &pos](
- typename NodeType::Ref donor) {
- auto [l, r] = donor_is_left ?
- std::make_pair(donor, pos.node) : std::make_pair(pos.node, donor);
-
- auto [liter, riter] = donor_is_left ?
- std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter);
-
- if (donor->at_min_capacity()) {
- auto replacement = l->make_full_merge(c, r);
-
- parent_pos.node->update(
- liter,
- replacement->get_paddr());
- parent_pos.node->remove(riter);
-
- pos.node = replacement;
- if (donor_is_left) {
- pos.pos += r->get_size();
- parent_pos.pos--;
- }
-
- DEBUGT("l: {}, r: {}, replacement: {}", c.trans, *l, *r, *replacement);
- c.cache.retire_extent(c.trans, l);
- c.cache.retire_extent(c.trans, r);
- } else {
- auto [replacement_l, replacement_r, pivot] =
- l->make_balanced(
- c,
- r,
- !donor_is_left);
-
- parent_pos.node->update(
- liter,
- replacement_l->get_paddr());
- parent_pos.node->replace(
- riter,
- pivot,
- replacement_r->get_paddr());
-
- if (donor_is_left) {
- assert(parent_pos.pos > 0);
- parent_pos.pos--;
- }
-
- auto orig_position = donor_is_left ?
- l->get_size() + pos.pos :
- pos.pos;
- if (orig_position < replacement_l->get_size()) {
- pos.node = replacement_l;
- pos.pos = orig_position;
- } else {
- parent_pos.pos++;
- pos.node = replacement_r;
- pos.pos = orig_position - replacement_l->get_size();
- }
-
- DEBUGT("l: {}, r: {}, replacement_l: {}, replacement_r: {}",
- c.trans, *l, *r, *replacement_l, *replacement_r);
- c.cache.retire_extent(c.trans, l);
- c.cache.retire_extent(c.trans, r);
- }
-
- return seastar::now();
- });
-}
-
-LBABtree::handle_merge_ret LBABtree::handle_merge(
- op_context_t c,
- iterator &iter)
-{
- LOG_PREFIX(LBATree::handle_merge);
- if (iter.get_depth() == 1 ||
- !iter.leaf.node->below_min_capacity()) {
- DEBUGT(
- "no need to merge leaf, leaf size {}, depth {}",
- c.trans,
- iter.leaf.node->get_size(),
- iter.get_depth());
- return seastar::now();
- }
-
- return seastar::do_with(
- depth_t{1},
- [FNAME, this, c, &iter](auto &to_merge) {
- return trans_intr::repeat(
- [FNAME, this, c, &iter, &to_merge] {
- DEBUGT(
- "merging depth {}",
- c.trans,
- to_merge);
- auto &parent_pos = iter.get_internal(to_merge + 1);
- auto merge_fut = handle_merge_iertr::now();
- if (to_merge > 1) {
- auto &pos = iter.get_internal(to_merge);
- merge_fut = merge_level(c, to_merge, parent_pos, pos);
- } else {
- auto &pos = iter.leaf;
- merge_fut = merge_level(c, to_merge, parent_pos, pos);
- }
-
- return merge_fut.si_then([FNAME, this, c, &iter, &to_merge] {
- ++to_merge;
- auto &pos = iter.get_internal(to_merge);
- if (to_merge == iter.get_depth()) {
- if (pos.node->get_size() == 1) {
- DEBUGT("collapsing root", c.trans);
- c.cache.retire_extent(c.trans, pos.node);
- assert(pos.pos == 0);
- auto node_iter = pos.get_iter();
- root.set_location(
- node_iter->get_val().maybe_relative_to(pos.node->get_paddr()));
- iter.internal.pop_back();
- root.set_depth(iter.get_depth());
- c.trans.get_lba_tree_stats().depth = iter.get_depth();
- root_dirty = true;
- } else {
- DEBUGT("no need to collapse root", c.trans);
- }
- return seastar::stop_iteration::yes;
- } else if (pos.node->below_min_capacity()) {
- DEBUGT(
- "continuing, next node {} depth {} at min",
- c.trans,
- *pos.node,
- to_merge);
- return seastar::stop_iteration::no;
- } else {
- DEBUGT(
- "complete, next node {} depth {} not min",
- c.trans,
- *pos.node,
- to_merge);
- return seastar::stop_iteration::yes;
- }
- });
- });
- });
-}
-
-LBABtree::update_internal_mapping_ret LBABtree::update_internal_mapping(
- op_context_t c,
- depth_t depth,
- laddr_t laddr,
- paddr_t old_addr,
- paddr_t new_addr)
-{
- LOG_PREFIX(LBATree::update_internal_mapping);
- DEBUGT(
- "updating laddr {} at depth {} from {} to {}",
- c.trans,
- laddr,
- depth,
- old_addr,
- new_addr);
-
- return lower_bound(
- c, laddr
- ).si_then([=](auto iter) {
- assert(iter.get_depth() >= depth);
- if (depth == iter.get_depth()) {
- DEBUGT("update at root", c.trans);
-
- if (laddr != 0) {
- ERRORT(
- "updating root laddr {} at depth {} from {} to {},"
- "laddr is not 0",
- c.trans,
- laddr,
- depth,
- old_addr,
- new_addr,
- root.get_location());
- ceph_assert(0 == "impossible");
- }
-
- if (root.get_location() != old_addr) {
- ERRORT(
- "updating root laddr {} at depth {} from {} to {},"
- "root addr {} does not match",
- c.trans,
- laddr,
- depth,
- old_addr,
- new_addr,
- root.get_location());
- ceph_assert(0 == "impossible");
- }
-
- root.set_location(new_addr);
- root_dirty = true;
- } else {
- auto &parent = iter.get_internal(depth + 1);
- assert(parent.node);
- assert(parent.pos < parent.node->get_size());
- auto piter = parent.node->iter_idx(parent.pos);
-
- if (piter->get_key() != laddr) {
- ERRORT(
- "updating laddr {} at depth {} from {} to {},"
- "node {} pos {} val pivot addr {} does not match",
- c.trans,
- laddr,
- depth,
- old_addr,
- new_addr,
- *(parent.node),
- parent.pos,
- piter->get_key());
- ceph_assert(0 == "impossible");
- }
-
-
- if (piter->get_val() != old_addr) {
- ERRORT(
- "updating laddr {} at depth {} from {} to {},"
- "node {} pos {} val addr {} does not match",
- c.trans,
- laddr,
- depth,
- old_addr,
- new_addr,
- *(parent.node),
- parent.pos,
- piter->get_val());
- ceph_assert(0 == "impossible");
- }
-
- CachedExtentRef mut = c.cache.duplicate_for_write(
- c.trans,
- parent.node
- );
- LBAInternalNodeRef mparent = mut->cast<LBAInternalNode>();
- mparent->update(piter, new_addr);
-
- /* Note, iter is now invalid as we didn't udpate either the parent
- * node reference to the new mutable instance nor did we update the
- * child pointer to the new node. Not a problem as we'll now just
- * destruct it.
- */
- }
- return seastar::now();
- });
-}
-}
+++ /dev/null
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#pragma once
-
-#include <boost/container/static_vector.hpp>
-#include <sys/mman.h>
-#include <memory>
-#include <string.h>
-
-#include "crimson/os/seastore/lba_manager.h"
-#include "crimson/os/seastore/logging.h"
-#include "crimson/os/seastore/seastore_types.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
-
-namespace crimson::os::seastore::lba_manager::btree {
-
-
-class LBABtree {
- static constexpr size_t MAX_DEPTH = 16;
-public:
- using base_iertr = LBAManager::base_iertr;
-
- class iterator;
- using iterator_fut = base_iertr::future<iterator>;
-
- using mapped_space_visitor_t = LBAManager::scan_mapped_space_func_t;
-
- class iterator {
- public:
- iterator(const iterator &rhs) noexcept :
- internal(rhs.internal), leaf(rhs.leaf) {}
- iterator(iterator &&rhs) noexcept :
- internal(std::move(rhs.internal)), leaf(std::move(rhs.leaf)) {}
-
- iterator &operator=(const iterator &) = default;
- iterator &operator=(iterator &&) = default;
-
- iterator_fut next(
- op_context_t c,
- mapped_space_visitor_t *visit=nullptr) const;
-
- iterator_fut prev(op_context_t c) const;
-
- void assert_valid() const {
- assert(leaf.node);
- assert(leaf.pos <= leaf.node->get_size());
-
- for (auto &i: internal) {
- (void)i;
- assert(i.node);
- assert(i.pos < i.node->get_size());
- }
- }
-
- depth_t get_depth() const {
- return internal.size() + 1;
- }
-
- auto &get_internal(depth_t depth) {
- assert(depth > 1);
- assert((depth - 2) < internal.size());
- return internal[depth - 2];
- }
-
- const auto &get_internal(depth_t depth) const {
- assert(depth > 1);
- assert((depth - 2) < internal.size());
- return internal[depth - 2];
- }
-
- laddr_t get_key() const {
- assert(!is_end());
- return leaf.node->iter_idx(leaf.pos).get_key();
- }
- lba_map_val_t get_val() const {
- assert(!is_end());
- auto ret = leaf.node->iter_idx(leaf.pos).get_val();
- ret.paddr = ret.paddr.maybe_relative_to(leaf.node->get_paddr());
- return ret;
- }
-
- bool is_end() const {
- // external methods may only resolve at a boundary if at end
- return at_boundary();
- }
-
- bool is_begin() const {
- for (auto &i: internal) {
- if (i.pos != 0)
- return false;
- }
- return leaf.pos == 0;
- }
-
- LBAPinRef get_pin() const {
- assert(!is_end());
- auto val = get_val();
- auto key = get_key();
- return std::make_unique<BtreeLBAPin>(
- leaf.node,
- val.paddr,
- lba_node_meta_t{ key, key + val.len, 0 });
- }
-
- private:
- iterator() noexcept {}
- iterator(depth_t depth) noexcept : internal(depth - 1) {}
-
- friend class LBABtree;
- static constexpr uint16_t INVALID = std::numeric_limits<uint16_t>::max();
- template <typename NodeType>
- struct node_position_t {
- typename NodeType::Ref node;
- uint16_t pos = INVALID;
-
- void reset() {
- *this = node_position_t{};
- }
-
- auto get_iter() {
- assert(pos != INVALID);
- assert(pos < node->get_size());
- return node->iter_idx(pos);
- }
- };
- boost::container::static_vector<
- node_position_t<LBAInternalNode>, MAX_DEPTH> internal;
- node_position_t<LBALeafNode> leaf;
-
- bool at_boundary() const {
- assert(leaf.pos <= leaf.node->get_size());
- return leaf.pos == leaf.node->get_size();
- }
-
- using handle_boundary_ertr = base_iertr;
- using handle_boundary_ret = handle_boundary_ertr::future<>;
- handle_boundary_ret handle_boundary(
- op_context_t c,
- mapped_space_visitor_t *visitor);
-
- depth_t check_split() const {
- if (!leaf.node->at_max_capacity()) {
- return 0;
- }
- for (depth_t split_from = 1; split_from < get_depth(); ++split_from) {
- if (!get_internal(split_from + 1).node->at_max_capacity())
- return split_from;
- }
- return get_depth();
- }
-
- depth_t check_merge() const {
- if (!leaf.node->below_min_capacity()) {
- return 0;
- }
- for (depth_t merge_from = 1; merge_from < get_depth(); ++merge_from) {
- if (!get_internal(merge_from + 1).node->below_min_capacity())
- return merge_from;
- }
- return get_depth();
- }
- };
-
- LBABtree(lba_root_t root) : root(root) {}
-
- bool is_root_dirty() const {
- return root_dirty;
- }
- lba_root_t get_root_undirty() {
- ceph_assert(root_dirty);
- root_dirty = false;
- return root;
- }
-
- /// mkfs
- using mkfs_ret = lba_root_t;
- static mkfs_ret mkfs(op_context_t c);
-
- /**
- * lower_bound
- *
- * @param c [in] context
- * @param addr [in] ddr
- * @return least iterator >= key
- */
- iterator_fut lower_bound(
- op_context_t c,
- laddr_t addr,
- mapped_space_visitor_t *visit=nullptr) const;
-
- /**
- * upper_bound
- *
- * @param c [in] context
- * @param addr [in] ddr
- * @return least iterator > key
- */
- iterator_fut upper_bound(
- op_context_t c,
- laddr_t addr
- ) const {
- return lower_bound(
- c, addr
- ).si_then([c, addr](auto iter) {
- if (!iter.is_end() && iter.get_key() == addr) {
- return iter.next(c);
- } else {
- return iterator_fut(
- interruptible::ready_future_marker{},
- iter);
- }
- });
- }
-
- /**
- * upper_bound_right
- *
- * @param c [in] context
- * @param addr [in] addr
- * @return least iterator i s.t. i.get_key() + i.get_val().len > key
- */
- iterator_fut upper_bound_right(
- op_context_t c,
- laddr_t addr) const
- {
- return lower_bound(
- c, addr
- ).si_then([c, addr](auto iter) {
- if (iter.is_begin()) {
- return iterator_fut(
- interruptible::ready_future_marker{},
- iter);
- } else {
- return iter.prev(
- c
- ).si_then([iter, addr](auto prev) {
- if ((prev.get_key() + prev.get_val().len) > addr) {
- return iterator_fut(
- interruptible::ready_future_marker{},
- prev);
- } else {
- return iterator_fut(
- interruptible::ready_future_marker{},
- iter);
- }
- });
- }
- });
- }
-
- iterator_fut begin(op_context_t c) const {
- return lower_bound(c, 0);
- }
- iterator_fut end(op_context_t c) const {
- return upper_bound(c, L_ADDR_MAX);
- }
-
- using iterate_repeat_ret_inner = base_iertr::future<
- seastar::stop_iteration>;
- template <typename F>
- static base_iertr::future<> iterate_repeat(
- op_context_t c,
- iterator_fut &&iter_fut,
- F &&f,
- mapped_space_visitor_t *visitor=nullptr) {
- return std::move(
- iter_fut
- ).si_then([c, visitor, f=std::forward<F>(f)](auto iter) {
- return seastar::do_with(
- iter,
- std::move(f),
- [c, visitor](auto &pos, auto &f) {
- return trans_intr::repeat(
- [c, visitor, &f, &pos] {
- return f(
- pos
- ).si_then([c, visitor, &pos](auto done) {
- if (done == seastar::stop_iteration::yes) {
- return iterate_repeat_ret_inner(
- interruptible::ready_future_marker{},
- seastar::stop_iteration::yes);
- } else {
- ceph_assert(!pos.is_end());
- return pos.next(
- c, visitor
- ).si_then([&pos](auto next) {
- pos = next;
- return iterate_repeat_ret_inner(
- interruptible::ready_future_marker{},
- seastar::stop_iteration::no);
- });
- }
- });
- });
- });
- });
- }
-
- /**
- * insert
- *
- * Inserts val at laddr with iter as a hint. If element at laddr already
- * exists returns iterator to that element unchanged and returns false.
- *
- * Invalidates all outstanding iterators for this tree on this transaction.
- *
- * @param c [in] op context
- * @param iter [in] hint, insertion constant if immediately prior to iter
- * @param laddr [in] addr at which to insert
- * @param val [in] val to insert
- * @return pair<iter, bool> where iter points to element at addr, bool true
- * iff element at laddr did not exist.
- */
- using insert_iertr = base_iertr;
- using insert_ret = insert_iertr::future<std::pair<iterator, bool>>;
- insert_ret insert(
- op_context_t c,
- iterator iter,
- laddr_t laddr,
- lba_map_val_t val
- );
- insert_ret insert(
- op_context_t c,
- laddr_t laddr,
- lba_map_val_t val) {
- return lower_bound(
- c, laddr
- ).si_then([this, c, laddr, val](auto iter) {
- return insert(c, iter, laddr, val);
- });
- }
-
- /**
- * update
- *
- * Invalidates all outstanding iterators for this tree on this transaction.
- *
- * @param c [in] op context
- * @param iter [in] iterator to element to update, must not be end
- * @param val [in] val with which to update
- * @return iterator to newly updated element
- */
- using update_iertr = base_iertr;
- using update_ret = update_iertr::future<iterator>;
- update_ret update(
- op_context_t c,
- iterator iter,
- lba_map_val_t val);
-
- /**
- * remove
- *
- * Invalidates all outstanding iterators for this tree on this transaction.
- *
- * @param c [in] op context
- * @param iter [in] iterator to element to remove, must not be end
- */
- using remove_iertr = base_iertr;
- using remove_ret = remove_iertr::future<>;
- remove_ret remove(
- op_context_t c,
- iterator iter);
-
- /**
- * init_cached_extent
- *
- * Checks whether e is live (reachable from lba tree) and drops or initializes
- * accordingly.
- *
- * Returns if e is live.
- */
- using init_cached_extent_iertr = base_iertr;
- using init_cached_extent_ret = init_cached_extent_iertr::future<bool>;
- init_cached_extent_ret init_cached_extent(op_context_t c, CachedExtentRef e);
-
- /// get_leaf_if_live: get leaf node at laddr/addr if still live
- using get_leaf_if_live_iertr = base_iertr;
- using get_leaf_if_live_ret = get_leaf_if_live_iertr::future<CachedExtentRef>;
- get_leaf_if_live_ret get_leaf_if_live(
- op_context_t c,
- paddr_t addr,
- laddr_t laddr,
- seastore_off_t len);
-
- /// get_internal_if_live: get internal node at laddr/addr if still live
- using get_internal_if_live_iertr = base_iertr;
- using get_internal_if_live_ret = get_internal_if_live_iertr::future<CachedExtentRef>;
- get_internal_if_live_ret get_internal_if_live(
- op_context_t c,
- paddr_t addr,
- laddr_t laddr,
- seastore_off_t len);
-
- /**
- * rewrite_lba_extent
- *
- * Rewrites a fresh copy of extent into transaction and updates internal
- * references.
- */
- using rewrite_lba_extent_iertr = base_iertr;
- using rewrite_lba_extent_ret = rewrite_lba_extent_iertr::future<>;
- rewrite_lba_extent_ret rewrite_lba_extent(op_context_t c, CachedExtentRef e);
-
-private:
- lba_root_t root;
- bool root_dirty = false;
-
- using get_internal_node_iertr = base_iertr;
- using get_internal_node_ret = get_internal_node_iertr::future<LBAInternalNodeRef>;
- static get_internal_node_ret get_internal_node(
- op_context_t c,
- depth_t depth,
- paddr_t offset,
- laddr_t begin,
- laddr_t end);
-
- using get_leaf_node_iertr = base_iertr;
- using get_leaf_node_ret = get_leaf_node_iertr::future<LBALeafNodeRef>;
- static get_leaf_node_ret get_leaf_node(
- op_context_t c,
- paddr_t offset,
- laddr_t begin,
- laddr_t end);
-
- using lookup_root_iertr = base_iertr;
- using lookup_root_ret = lookup_root_iertr::future<>;
- lookup_root_ret lookup_root(
- op_context_t c,
- iterator &iter,
- mapped_space_visitor_t *visitor) const {
- if (root.get_depth() > 1) {
- return get_internal_node(
- c,
- root.get_depth(),
- root.get_location(),
- 0,
- L_ADDR_MAX
- ).si_then([this, visitor, &iter](LBAInternalNodeRef root_node) {
- iter.get_internal(root.get_depth()).node = root_node;
- if (visitor) (*visitor)(root_node->get_paddr(), root_node->get_length());
- return lookup_root_iertr::now();
- });
- } else {
- return get_leaf_node(
- c,
- root.get_location(),
- 0,
- L_ADDR_MAX
- ).si_then([visitor, &iter](LBALeafNodeRef root_node) {
- iter.leaf.node = root_node;
- if (visitor) (*visitor)(root_node->get_paddr(), root_node->get_length());
- return lookup_root_iertr::now();
- });
- }
- }
-
- using lookup_internal_level_iertr = base_iertr;
- using lookup_internal_level_ret = lookup_internal_level_iertr::future<>;
- template <typename F>
- static lookup_internal_level_ret lookup_internal_level(
- op_context_t c,
- depth_t depth,
- iterator &iter,
- F &f,
- mapped_space_visitor_t *visitor
- ) {
- assert(depth > 1);
- auto &parent_entry = iter.get_internal(depth + 1);
- auto parent = parent_entry.node;
- auto node_iter = parent->iter_idx(parent_entry.pos);
- auto next_iter = node_iter + 1;
- auto begin = node_iter->get_key();
- auto end = next_iter == parent->end()
- ? parent->get_node_meta().end
- : next_iter->get_key();
- return get_internal_node(
- c,
- depth,
- node_iter->get_val().maybe_relative_to(parent->get_paddr()),
- begin,
- end
- ).si_then([depth, visitor, &iter, &f](LBAInternalNodeRef node) {
- auto &entry = iter.get_internal(depth);
- entry.node = node;
- auto node_iter = f(*node);
- assert(node_iter != node->end());
- entry.pos = node_iter->get_offset();
- if (visitor) (*visitor)(node->get_paddr(), node->get_length());
- return seastar::now();
- });
- }
-
- using lookup_leaf_iertr = base_iertr;
- using lookup_leaf_ret = lookup_leaf_iertr::future<>;
- template <typename F>
- static lookup_internal_level_ret lookup_leaf(
- op_context_t c,
- iterator &iter,
- F &f,
- mapped_space_visitor_t *visitor
- ) {
- auto &parent_entry = iter.get_internal(2);
- auto parent = parent_entry.node;
- assert(parent);
- auto node_iter = parent->iter_idx(parent_entry.pos);
- auto next_iter = node_iter + 1;
- auto begin = node_iter->get_key();
- auto end = next_iter == parent->end()
- ? parent->get_node_meta().end
- : next_iter->get_key();
-
- return get_leaf_node(
- c,
- node_iter->get_val().maybe_relative_to(parent->get_paddr()),
- begin,
- end
- ).si_then([visitor, &iter, &f](LBALeafNodeRef node) {
- iter.leaf.node = node;
- auto node_iter = f(*node);
- iter.leaf.pos = node_iter->get_offset();
- if (visitor) (*visitor)(node->get_paddr(), node->get_length());
- return seastar::now();
- });
- }
-
- /**
- * lookup_depth_range
- *
- * Performs node lookups on depths [from, to) using li and ll to
- * specific target at each level. Note, may leave the iterator
- * at_boundary(), call handle_boundary() prior to returning out
- * lf LBABtree.
- */
- using lookup_depth_range_iertr = base_iertr;
- using lookup_depth_range_ret = lookup_depth_range_iertr::future<>;
- template <typename LI, typename LL>
- static lookup_depth_range_ret lookup_depth_range(
- op_context_t c, ///< [in] context
- iterator &iter, ///< [in,out] iterator to populate
- depth_t from, ///< [in] from inclusive
- depth_t to, ///< [in] to exclusive, (to <= from, to == from is a noop)
- LI &li, ///< [in] internal->iterator
- LL &ll, ///< [in] leaf->iterator
- mapped_space_visitor_t *visitor ///< [in] mapped space visitor
- ) {
- LOG_PREFIX(LBATree::lookup_depth_range);
- SUBDEBUGT(seastore_lba_details, "{} -> {}", c.trans, from, to);
- return seastar::do_with(
- from,
- [c, to, visitor, &iter, &li, &ll](auto &d) {
- return trans_intr::repeat(
- [c, to, visitor, &iter, &li, &ll, &d] {
- if (d > to) {
- return [&] {
- if (d > 1) {
- return lookup_internal_level(
- c,
- d,
- iter,
- li,
- visitor);
- } else {
- assert(d == 1);
- return lookup_leaf(
- c,
- iter,
- ll,
- visitor);
- }
- }().si_then([&d] {
- --d;
- return lookup_depth_range_iertr::make_ready_future<
- seastar::stop_iteration
- >(seastar::stop_iteration::no);
- });
- } else {
- return lookup_depth_range_iertr::make_ready_future<
- seastar::stop_iteration
- >(seastar::stop_iteration::yes);
- }
- });
- });
- }
-
- using lookup_iertr = base_iertr;
- using lookup_ret = lookup_iertr::future<iterator>;
- template <typename LI, typename LL>
- lookup_ret lookup(
- op_context_t c,
- LI &&lookup_internal,
- LL &&lookup_leaf,
- mapped_space_visitor_t *visitor
- ) const {
- LOG_PREFIX(LBATree::lookup);
- return seastar::do_with(
- iterator{root.get_depth()},
- std::forward<LI>(lookup_internal),
- std::forward<LL>(lookup_leaf),
- [FNAME, this, visitor, c](auto &iter, auto &li, auto &ll) {
- return lookup_root(
- c, iter, visitor
- ).si_then([FNAME, this, visitor, c, &iter, &li, &ll] {
- if (iter.get_depth() > 1) {
- auto &root_entry = *(iter.internal.rbegin());
- root_entry.pos = li(*(root_entry.node)).get_offset();
- } else {
- auto &root_entry = iter.leaf;
- auto riter = ll(*(root_entry.node));
- root_entry.pos = riter->get_offset();
- }
- SUBDEBUGT(seastore_lba_details, "got root, depth {}", c.trans, root.get_depth());
- return lookup_depth_range(
- c,
- iter,
- root.get_depth() - 1,
- 0,
- li,
- ll,
- visitor
- ).si_then([c, visitor, &iter] {
- if (iter.at_boundary()) {
- return iter.handle_boundary(c, visitor);
- } else {
- return lookup_iertr::now();
- }
- });
- }).si_then([&iter] {
- return std::move(iter);
- });
- });
- }
-
- /**
- * handle_split
- *
- * Prepare iter for insertion. iter should begin pointing at
- * the valid insertion point (lower_bound(laddr)).
- *
- * Upon completion, iter will point at the
- * position at which laddr should be inserted. iter may, upon completion,
- * point at the end of a leaf other than the end leaf if that's the correct
- * insertion point.
- */
- using find_insertion_iertr = base_iertr;
- using find_insertion_ret = find_insertion_iertr::future<>;
- static find_insertion_ret find_insertion(
- op_context_t c,
- laddr_t laddr,
- iterator &iter);
-
- /**
- * handle_split
- *
- * Split nodes in iter as needed for insertion. First, scan iter from leaf
- * to find first non-full level. Then, split from there towards leaf.
- *
- * Upon completion, iter will point at the newly split insertion point. As
- * with find_insertion, iter's leaf pointer may be end without iter being
- * end.
- */
- using handle_split_iertr = base_iertr;
- using handle_split_ret = handle_split_iertr::future<>;
- handle_split_ret handle_split(
- op_context_t c,
- iterator &iter);
-
- using handle_merge_iertr = base_iertr;
- using handle_merge_ret = handle_merge_iertr::future<>;
- handle_merge_ret handle_merge(
- op_context_t c,
- iterator &iter);
-
- using update_internal_mapping_iertr = base_iertr;
- using update_internal_mapping_ret = update_internal_mapping_iertr::future<>;
- update_internal_mapping_ret update_internal_mapping(
- op_context_t c,
- depth_t depth,
- laddr_t laddr,
- paddr_t old_addr,
- paddr_t new_addr);
-
- template <typename T>
- using node_position_t = iterator::node_position_t<T>;
-
- template <typename NodeType>
- friend base_iertr::future<typename NodeType::Ref> get_node(
- op_context_t c,
- depth_t depth,
- paddr_t addr,
- laddr_t begin,
- laddr_t end);
-
- template <typename NodeType>
- friend handle_merge_ret merge_level(
- op_context_t c,
- depth_t depth,
- node_position_t<LBAInternalNode> &parent_pos,
- node_position_t<NodeType> &pos);
-};
-
-}
#include "crimson/os/seastore/seastore_types.h"
#include "crimson/os/seastore/cache.h"
#include "crimson/os/seastore/cached_extent.h"
-#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h"
-#include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h"
+
+#include "crimson/os/seastore/btree/btree_range_pin.h"
+#include "crimson/os/seastore/btree/fixed_kv_btree.h"
namespace crimson::os::seastore::lba_manager::btree {
using base_iertr = LBAManager::base_iertr;
-struct op_context_t {
- Cache &cache;
- Transaction &trans;
- btree_pin_set_t *pins = nullptr;
-};
-
/**
* lba_map_val_t
*
std::ostream& operator<<(std::ostream& out, const lba_map_val_t&);
-class BtreeLBAPin;
-using BtreeLBAPinRef = std::unique_ptr<BtreeLBAPin>;
-
constexpr size_t LBA_BLOCK_SIZE = 4096;
/**
* lba_node_meta_le_t
*
- * On disk layout for lba_node_meta_t
+ * On disk layout for fixed_kv_node_meta_t
*/
struct lba_node_meta_le_t {
laddr_le_t begin = laddr_le_t(0);
lba_node_meta_le_t() = default;
lba_node_meta_le_t(const lba_node_meta_le_t &) = default;
- explicit lba_node_meta_le_t(const lba_node_meta_t &val)
+ explicit lba_node_meta_le_t(const fixed_kv_node_meta_t<laddr_t> &val)
: begin(ceph_le64(val.begin)),
end(ceph_le64(val.end)),
depth(init_depth_le(val.depth)) {}
- operator lba_node_meta_t() const {
- return lba_node_meta_t{ begin, end, depth };
+ operator fixed_kv_node_meta_t<laddr_t>() const {
+ return fixed_kv_node_meta_t<laddr_t>{ begin, end, depth };
}
};
struct LBANode : CachedExtent {
using LBANodeRef = TCachedExtentRef<LBANode>;
- btree_range_pin_t pin;
+ btree_range_pin_t<laddr_t> pin;
LBANode(ceph::bufferptr &&ptr) : CachedExtent(std::move(ptr)), pin(this) {}
LBANode(const LBANode &rhs)
: CachedExtent(rhs), pin(rhs.pin, this) {}
- virtual lba_node_meta_t get_node_meta() const = 0;
+ virtual fixed_kv_node_meta_t<laddr_t> get_node_meta() const = 0;
virtual ~LBANode() = default;
: LBANode,
common::FixedKVNodeLayout<
INTERNAL_NODE_CAPACITY,
- lba_node_meta_t, lba_node_meta_le_t,
+ fixed_kv_node_meta_t<laddr_t>, lba_node_meta_le_t,
laddr_t, laddr_le_t,
paddr_t, paddr_le_t> {
using Ref = TCachedExtentRef<LBAInternalNode>;
static constexpr extent_types_t TYPE = extent_types_t::LADDR_INTERNAL;
- lba_node_meta_t get_node_meta() const { return get_meta(); }
+ fixed_kv_node_meta_t<laddr_t> get_node_meta() const { return get_meta(); }
CachedExtentRef duplicate_for_write() final {
assert(delta_buffer.empty());
}
std::tuple<Ref, Ref, laddr_t>
- make_split_children(op_context_t c) {
+ make_split_children(op_context_t<laddr_t> c) {
auto left = c.cache.alloc_new_extent<LBAInternalNode>(
c.trans, LBA_BLOCK_SIZE);
auto right = c.cache.alloc_new_extent<LBAInternalNode>(
}
Ref make_full_merge(
- op_context_t c,
+ op_context_t<laddr_t> c,
Ref &right) {
auto replacement = c.cache.alloc_new_extent<LBAInternalNode>(
c.trans, LBA_BLOCK_SIZE);
std::tuple<Ref, Ref, laddr_t>
make_balanced(
- op_context_t c,
+ op_context_t<laddr_t> c,
Ref &_right,
bool prefer_left) {
ceph_assert(_right->get_type() == get_type());
: LBANode,
common::FixedKVNodeLayout<
LEAF_NODE_CAPACITY,
- lba_node_meta_t, lba_node_meta_le_t,
+ fixed_kv_node_meta_t<laddr_t>, lba_node_meta_le_t,
laddr_t, laddr_le_t,
lba_map_val_t, lba_map_val_le_t> {
using Ref = TCachedExtentRef<LBALeafNode>;
static constexpr extent_types_t TYPE = extent_types_t::LADDR_LEAF;
- lba_node_meta_t get_node_meta() const { return get_meta(); }
+ fixed_kv_node_meta_t<laddr_t> get_node_meta() const { return get_meta(); }
CachedExtentRef duplicate_for_write() final {
assert(delta_buffer.empty());
std::tuple<Ref, Ref, laddr_t>
- make_split_children(op_context_t c) {
+ make_split_children(op_context_t<laddr_t> c) {
auto left = c.cache.alloc_new_extent<LBALeafNode>(
c.trans, LBA_BLOCK_SIZE);
auto right = c.cache.alloc_new_extent<LBALeafNode>(
}
Ref make_full_merge(
- op_context_t c,
+ op_context_t<laddr_t> c,
Ref &right) {
auto replacement = c.cache.alloc_new_extent<LBALeafNode>(
c.trans, LBA_BLOCK_SIZE);
std::tuple<Ref, Ref, laddr_t>
make_balanced(
- op_context_t c,
+ op_context_t<laddr_t> c,
Ref &_right,
bool prefer_left) {
ceph_assert(_right->get_type() == get_type());
LOG_PREFIX(object_data_handler.cc::do_removals);
DEBUGT("decreasing ref: {}",
ctx.t,
- pin->get_laddr());
+ pin->get_key());
return ctx.tm.dec_ref(
ctx.t,
- pin->get_laddr()
+ pin->get_key()
).si_then(
[](auto){},
ObjectDataHandler::write_iertr::pass_further{},
region.len
).si_then([FNAME, ctx, ®ion](auto pin) {
ceph_assert(pin->get_length() == region.len);
- if (pin->get_laddr() != region.addr) {
+ if (pin->get_key() != region.addr) {
ERRORT(
"inconsistent laddr: pin: {} region {}",
ctx.t,
- pin->get_laddr(),
+ pin->get_key(),
region.addr);
}
- ceph_assert(pin->get_laddr() == region.addr);
+ ceph_assert(pin->get_key() == region.addr);
return ObjectDataHandler::write_iertr::now();
});
}
using split_ret = get_iertr::future<split_ret_bare>;
split_ret split_pin_left(context_t ctx, LBAPinRef &pin, laddr_t offset)
{
- const auto pin_offset = pin->get_laddr();
+ const auto pin_offset = pin->get_key();
assert_aligned(pin_offset);
ceph_assert(offset >= pin_offset);
if (offset == pin_offset) {
);
} else {
// Data, return up to offset to prepend
- auto to_prepend = offset - pin->get_laddr();
+ auto to_prepend = offset - pin->get_key();
return read_pin(ctx, pin->duplicate()
).si_then([to_prepend](auto extent) {
return get_iertr::make_ready_future<split_ret_bare>(
/// Reverse of split_pin_left
split_ret split_pin_right(context_t ctx, LBAPinRef &pin, laddr_t end)
{
- const auto pin_begin = pin->get_laddr();
- const auto pin_end = pin->get_laddr() + pin->get_length();
+ const auto pin_begin = pin->get_key();
+ const auto pin_end = pin->get_key() + pin->get_length();
assert_aligned(pin_end);
ceph_assert(pin_end >= end);
if (end == pin_end) {
).si_then([max_object_size=max_object_size, &object_data](auto pin) {
ceph_assert(pin->get_length() == max_object_size);
object_data.update_reserved(
- pin->get_laddr(),
+ pin->get_key(),
pin->get_length());
return write_iertr::now();
});
_pins.swap(pins);
ceph_assert(pins.size());
auto &pin = *pins.front();
- ceph_assert(pin.get_laddr() >= object_data.get_reserved_data_base());
+ ceph_assert(pin.get_key() >= object_data.get_reserved_data_base());
ceph_assert(
- pin.get_laddr() <= object_data.get_reserved_data_base() + size);
- auto pin_offset = pin.get_laddr() -
+ pin.get_key() <= object_data.get_reserved_data_base() + size);
+ auto pin_offset = pin.get_key() -
object_data.get_reserved_data_base();
- if ((pin.get_laddr() == (object_data.get_reserved_data_base() + size)) ||
+ if ((pin.get_key() == (object_data.get_reserved_data_base() + size)) ||
(pin.get_paddr().is_zero())) {
/* First pin is exactly at the boundary or is a zero pin. Either way,
* remove all pins and add a single zero pin to the end. */
to_write.emplace_back(
- pin.get_laddr(),
+ pin.get_key(),
object_data.get_reserved_data_len() - pin_offset);
return clear_iertr::now();
} else {
));
bl.append_zero(p2roundup(size, ctx.tm.get_block_size()) - size);
to_write.emplace_back(
- pin.get_laddr(),
+ pin.get_key(),
bl);
to_write.emplace_back(
object_data.get_reserved_data_base() +
offset,
bl.length());
ceph_assert(pins.size() >= 1);
- auto pin_begin = pins.front()->get_laddr();
+ auto pin_begin = pins.front()->get_key();
ceph_assert(pin_begin <= offset);
- auto pin_end = pins.back()->get_laddr() + pins.back()->get_length();
+ auto pin_end = pins.back()->get_key() + pins.back()->get_length();
ceph_assert(pin_end >= (offset + bl.length()));
return split_pin_left(
).si_then([ctx, loffset, len, &ret](auto _pins) {
// offset~len falls within reserved region and len > 0
ceph_assert(_pins.size() >= 1);
- ceph_assert((*_pins.begin())->get_laddr() <= loffset);
+ ceph_assert((*_pins.begin())->get_key() <= loffset);
return seastar::do_with(
std::move(_pins),
loffset,
-> read_iertr::future<> {
ceph_assert(current <= (loffset + len));
ceph_assert(
- (loffset + len) > pin->get_laddr());
+ (loffset + len) > pin->get_key());
laddr_t end = std::min(
- pin->get_laddr() + pin->get_length(),
+ pin->get_key() + pin->get_length(),
loffset + len);
if (pin->get_paddr().is_zero()) {
ceph_assert(end > current); // See LBAManager::get_mappings
len
).si_then([loffset, len, &object_data, &ret](auto &&pins) {
ceph_assert(pins.size() >= 1);
- ceph_assert((*pins.begin())->get_laddr() <= loffset);
+ ceph_assert((*pins.begin())->get_key() <= loffset);
for (auto &&i: pins) {
if (!(i->get_paddr().is_zero())) {
- auto ret_left = std::max(i->get_laddr(), loffset);
+ auto ret_left = std::max(i->get_key(), loffset);
auto ret_right = std::min(
- i->get_laddr() + i->get_length(),
+ i->get_key() + i->get_length(),
loffset + len);
assert(ret_right > ret_left);
ret.emplace(
};
/**
- * lba_root_t
+ * phy_tree_root_t
*/
-class __attribute__((packed)) lba_root_t {
+class __attribute__((packed)) phy_tree_root_t {
paddr_le_t root_addr;
depth_le_t depth = init_extent_len_le(0);
public:
- lba_root_t() = default;
+ phy_tree_root_t() = default;
- lba_root_t(paddr_t addr, depth_t depth)
+ phy_tree_root_t(paddr_t addr, depth_t depth)
: root_addr(addr), depth(init_depth_le(depth)) {}
- lba_root_t(const lba_root_t &o) = default;
- lba_root_t(lba_root_t &&o) = default;
- lba_root_t &operator=(const lba_root_t &o) = default;
- lba_root_t &operator=(lba_root_t &&o) = default;
+ phy_tree_root_t(const phy_tree_root_t &o) = default;
+ phy_tree_root_t(phy_tree_root_t &&o) = default;
+ phy_tree_root_t &operator=(const phy_tree_root_t &o) = default;
+ phy_tree_root_t &operator=(phy_tree_root_t &&o) = default;
paddr_t get_location() const {
return root_addr;
}
};
+using lba_root_t = phy_tree_root_t;
/**
* root_t
return lba_manager->get_mapping(
t,
laddr).si_then([=, &t] (LBAPinRef pin) -> inner_ret {
- ceph_assert(pin->get_laddr() == laddr);
+ ceph_assert(pin->get_key() == laddr);
if (pin->get_paddr() == addr) {
if (pin->get_length() != (extent_len_t)len) {
ERRORT(
"Invalid pin {}~{} {} found for "
"extent {} {}~{} {}",
t,
- pin->get_laddr(),
+ pin->get_key(),
pin->get_length(),
pin->get_paddr(),
type,
std::map<laddr_t, lba_map_val_t> check;
auto get_op_context(Transaction &t) {
- return op_context_t{*cache, t};
+ return op_context_t<laddr_t>{*cache, t};
}
LBAManager::mkfs_ret test_structure_setup(Transaction &t) final {
}).unsafe_get0();
logger().debug("alloc'd: {}", *ret);
EXPECT_EQ(len, ret->get_length());
- auto [b, e] = get_overlap(t, ret->get_laddr(), len);
+ auto [b, e] = get_overlap(t, ret->get_key(), len);
EXPECT_EQ(b, e);
t.mappings.emplace(
std::make_pair(
- ret->get_laddr(),
+ ret->get_key(),
test_extent_t{
ret->get_paddr(),
ret->get_length(),
EXPECT_EQ(ret_list.size(), 1);
auto &ret = *ret_list.begin();
EXPECT_EQ(i.second.addr, ret->get_paddr());
- EXPECT_EQ(laddr, ret->get_laddr());
+ EXPECT_EQ(laddr, ret->get_key());
EXPECT_EQ(len, ret->get_length());
auto ret_pin = with_trans_intr(
t, laddr);
}).unsafe_get0();
EXPECT_EQ(i.second.addr, ret_pin->get_paddr());
- EXPECT_EQ(laddr, ret_pin->get_laddr());
+ EXPECT_EQ(laddr, ret_pin->get_key());
EXPECT_EQ(len, ret_pin->get_length());
}
with_trans_intr(
check_mappings(t);
check_mappings();
}
- incref_mapping(t, ret->get_laddr());
- decref_mapping(t, ret->get_laddr());
+ incref_mapping(t, ret->get_key());
+ decref_mapping(t, ret->get_key());
}
logger().debug("submitting transaction");
submit_test_transaction(std::move(t));