--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "crimson/common/log.h"
+
+#include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h"
+
+namespace {
+ seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_filestore);
+ }
+}
+
+namespace crimson::os::seastore::lba_manager::btree {
+
+btree_range_pin_t::~btree_range_pin_t()
+{
+ assert(!pins == !is_linked());
+ assert(!ref);
+ if (pins) {
+ logger().debug("{}: removing {}", __func__, *this);
+ pins->remove_pin(*this, true);
+ }
+ extent = nullptr;
+}
+
+void btree_pin_set_t::remove_pin(btree_range_pin_t &pin, bool do_check_parent)
+{
+ logger().debug("{}: {}", __func__, pin);
+ assert(pin.is_linked());
+ assert(pin.pins);
+ assert(!pin.ref);
+
+ pins.erase(pin);
+ pin.pins = nullptr;
+
+ if (do_check_parent) {
+ check_parent(pin);
+ }
+}
+
+btree_range_pin_t *btree_pin_set_t::maybe_get_parent(
+ const lba_node_meta_t &meta)
+{
+ auto cmeta = meta;
+ cmeta.depth++;
+ auto iter = pins.upper_bound(cmeta, btree_range_pin_t::meta_cmp_t());
+ if (iter == pins.begin()) {
+ return nullptr;
+ } else {
+ --iter;
+ if (iter->range.is_parent_of(meta)) {
+ return &*iter;
+ } else {
+ return nullptr;
+ }
+ }
+}
+
+const btree_range_pin_t *btree_pin_set_t::maybe_get_first_child(
+ const lba_node_meta_t &meta) const
+{
+ if (meta.depth == 0) {
+ return nullptr;
+ }
+
+ auto cmeta = meta;
+ cmeta.depth--;
+
+ auto iter = pins.lower_bound(cmeta, btree_range_pin_t::meta_cmp_t());
+ if (iter == pins.end()) {
+ return nullptr;
+ } else if (meta.is_parent_of(iter->range)) {
+ return &*iter;
+ } else {
+ return nullptr;
+ }
+}
+
+void btree_pin_set_t::release_if_no_children(btree_range_pin_t &pin)
+{
+ assert(pin.is_linked());
+ if (maybe_get_first_child(pin.range) == nullptr) {
+ pin.drop_ref();
+ }
+}
+
+void btree_pin_set_t::add_pin(btree_range_pin_t &pin)
+{
+ assert(!pin.is_linked());
+ assert(!pin.pins);
+ assert(!pin.ref);
+
+ auto [prev, inserted] = pins.insert(pin);
+ if (!inserted) {
+ logger().error("{}: unable to add {}, found {}", __func__, pin, *prev);
+ assert(0 == "impossible");
+ return;
+ }
+ pin.pins = this;
+ if (!pin.is_root()) {
+ auto *parent = maybe_get_parent(pin.range);
+ assert(parent);
+ if (!parent->has_ref()) {
+ logger().debug("{}: acquiring parent {}", __func__, parent);
+ parent->acquire_ref();
+ } else {
+ logger().debug("{}: parent has ref {}", __func__, parent);
+ }
+ }
+ if (maybe_get_first_child(pin.range) != nullptr) {
+ logger().debug("{}: acquiring self {}", __func__, pin);
+ pin.acquire_ref();
+ }
+}
+
+void btree_pin_set_t::retire(btree_range_pin_t &pin)
+{
+ pin.drop_ref();
+ remove_pin(pin, false);
+}
+
+void btree_pin_set_t::check_parent(btree_range_pin_t &pin)
+{
+ auto parent = maybe_get_parent(pin.range);
+ if (parent) {
+ logger().debug("{}: releasing parent {}", __func__, *parent);
+ release_if_no_children(*parent);
+ } else {
+ assert(pin.is_root());
+ }
+}
+
+}
#pragma once
+#include <boost/intrusive/set.hpp>
+
#include "crimson/os/seastore/cached_extent.h"
#include "crimson/os/seastore/seastore_types.h"
namespace crimson::os::seastore::lba_manager::btree {
+class LBANode;
+using LBANodeRef = TCachedExtentRef<LBANode>;
+
struct lba_node_meta_t {
laddr_t begin = 0;
laddr_t end = 0;
depth_t depth = 0;
+ bool is_parent_of(const lba_node_meta_t &other) const {
+ return (depth == other.depth + 1) &&
+ (begin <= other.begin) &&
+ (end >= other.end);
+ }
+
std::pair<lba_node_meta_t, lba_node_meta_t> split_into(laddr_t pivot) const {
return std::make_pair(
lba_node_meta_t{begin, pivot, depth},
lba_node_meta_t{pivot, end, depth});
}
- static lba_node_meta_t merge_from(const lba_node_meta_t &lhs, const lba_node_meta_t &rhs) {
+ static lba_node_meta_t merge_from(
+ const lba_node_meta_t &lhs, const lba_node_meta_t &rhs) {
assert(lhs.depth == rhs.depth);
return lba_node_meta_t{lhs.begin, rhs.end, lhs.depth};
}
lba_node_meta_t{lhs.begin, pivot, lhs.depth},
lba_node_meta_t{pivot, rhs.end, lhs.depth});
}
+
+ bool is_root() const {
+ return begin == 0 && end == L_ADDR_MAX;
+ }
};
inline std::ostream &operator<<(
<< ")";
}
-/* BtreeLBAPin
+/**
+ * btree_range_pin_t
+ *
+ * Element tracked by btree_pin_set_t below. Encapsulates the intrusive_set
+ * hook, the lba_node_meta_t representing the lba range covered by a node,
+ * and extent and ref members intended to hold a reference when the extent
+ * should be pinned.
+ */
+class btree_pin_set_t;
+class btree_range_pin_t : public boost::intrusive::set_base_hook<> {
+ friend class btree_pin_set_t;
+ lba_node_meta_t range;
+
+ btree_pin_set_t *pins = nullptr;
+
+ // We need to be able to remember extent without holding a reference,
+ // but we can do it more compactly -- TODO
+ CachedExtent *extent = nullptr;
+ CachedExtentRef ref;
+
+ using index_t = boost::intrusive::set<btree_range_pin_t>;
+
+ static auto get_tuple(const lba_node_meta_t &meta) {
+ return std::make_tuple(-meta.depth, meta.begin);
+ }
+
+ void acquire_ref() {
+ ref = CachedExtentRef(extent);
+ }
+
+ void drop_ref() {
+ ref.reset();
+ }
+
+public:
+ btree_range_pin_t() = default;
+ btree_range_pin_t(CachedExtent *extent)
+ : extent(extent) {}
+ btree_range_pin_t(const btree_range_pin_t &rhs, CachedExtent *extent)
+ : range(rhs.range), extent(extent) {}
+
+ bool has_ref() const {
+ return !!ref;
+ }
+
+ bool is_root() const {
+ return range.is_root();
+ }
+
+ void set_range(const lba_node_meta_t &nrange) {
+ range = nrange;
+ }
+ void set_extent(CachedExtent *nextent) {
+ assert(!extent);
+ extent = nextent;
+ }
+
+ friend bool operator<(
+ const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
+ return get_tuple(lhs.range) < get_tuple(rhs.range);
+ }
+ friend bool operator>(
+ const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
+ return get_tuple(lhs.range) > get_tuple(rhs.range);
+ }
+ friend bool operator==(
+ const btree_range_pin_t &lhs, const btree_range_pin_t &rhs) {
+ return get_tuple(lhs.range) == rhs.get_tuple(rhs.range);
+ }
+
+ struct meta_cmp_t {
+ bool operator()(
+ const btree_range_pin_t &lhs, const lba_node_meta_t &rhs) const {
+ return get_tuple(lhs.range) < get_tuple(rhs);
+ }
+ bool operator()(
+ const lba_node_meta_t &lhs, const btree_range_pin_t &rhs) const {
+ return get_tuple(lhs) < get_tuple(rhs.range);
+ }
+ };
+
+ friend std::ostream &operator<<(
+ std::ostream &lhs,
+ const btree_range_pin_t &rhs) {
+ return lhs << "btree_range_pin_t("
+ << "begin=" << rhs.range.begin
+ << ", end=" << rhs.range.end
+ << ", depth=" << rhs.range.depth
+ << ", extent=" << rhs.extent
+ << ")";
+ }
+
+ friend class BtreeLBAPin;
+ ~btree_range_pin_t();
+};
+
+/**
+ * btree_pin_set_t
*
- * References leaf node
+ * Ensures that for every cached node, all parent LBANodes required
+ * to map it are present in cache. Relocating these nodes can
+ * therefore be done without further reads or cache space.
*
- * TODO: does not at this time actually keep the relevant
- * leaf resident in memory. This is actually a bit tricky
- * as we can mutate and therefore replace a leaf referenced
- * by other, uninvolved but cached extents. Will need to
- * come up with some kind of pinning mechanism that handles
- * that well.
+ * Contains a btree_range_pin_t for every clean or dirty LBANode
+ * or LogicalCachedExtent instance in cache at any point in time.
+ * For any LBANode, the contained btree_range_pin_t will hold
+ * a reference to that node pinning it in cache as long as that
+ * node has children in the set. This invariant can be violated
+ * only by calling retire_extent and is repaired by calling
+ * check_parent synchronously after adding any new extents.
*/
-struct BtreeLBAPin : LBAPin {
+class btree_pin_set_t {
+ friend class btree_range_pin_t;
+ using pins_t = btree_range_pin_t::index_t;
+ pins_t pins;
+
+ pins_t::iterator get_iter(btree_range_pin_t &pin) {
+ return pins_t::s_iterator_to(pin);
+ }
+
+ /// Removes pin from set optionally checking whether parent has other children
+ void remove_pin(btree_range_pin_t &pin, bool check_parent);
+
+ /// Returns parent pin if exists
+ btree_range_pin_t *maybe_get_parent(const lba_node_meta_t &pin);
+
+ /// Returns earliest child pin if exist
+ const btree_range_pin_t *maybe_get_first_child(const lba_node_meta_t &pin) const;
+
+ /// Releases pin if it has no children
+ void release_if_no_children(btree_range_pin_t &pin);
+
+public:
+ /// Adds pin to set, assumes set is consistent
+ void add_pin(btree_range_pin_t &pin);
+
+ /**
+ * retire/check_parent
+ *
+ * See BtreeLBAManager::complete_transaction.
+ * retire removes the specified pin from the set, but does not
+ * check parents. After any new extents are added to the set,
+ * the caller is required to call check_parent to restore the
+ * invariant.
+ */
+ void retire(btree_range_pin_t &pin);
+ void check_parent(btree_range_pin_t &pin);
+
+ ~btree_pin_set_t() {
+ assert(pins.empty());
+ }
+};
+
+class BtreeLBAPin : public LBAPin {
+ friend class BtreeLBAManager;
paddr_t paddr;
- laddr_t laddr = L_ADDR_NULL;
- extent_len_t length = 0;
- unsigned refcount = 0;
+ btree_range_pin_t pin;
public:
+ BtreeLBAPin() = default;
+
BtreeLBAPin(
paddr_t paddr,
- laddr_t laddr,
- extent_len_t length)
- : paddr(paddr), laddr(laddr), length(length) {}
+ lba_node_meta_t &&meta)
+ : paddr(paddr) {
+ pin.set_range(std::move(meta));
+ }
+
+ void link_extent(LogicalCachedExtent *ref) final {
+ pin.set_extent(ref);
+ }
extent_len_t get_length() const final {
- return length;
+ assert(pin.range.end > pin.range.begin);
+ return pin.range.end - pin.range.begin;
}
+
paddr_t get_paddr() const final {
return paddr;
}
+
laddr_t get_laddr() const final {
- return laddr;
+ return pin.range.begin;
}
+
LBAPinRef duplicate() const final {
- return LBAPinRef(new BtreeLBAPin(*this));
+ auto ret = std::unique_ptr<BtreeLBAPin>(new BtreeLBAPin);
+ ret->pin.set_range(pin.range);
+ ret->paddr = paddr;
+ return ret;
}
};