omap_manager.cc
omap_manager/btree/btree_omap_manager.cc
omap_manager/btree/omap_btree_node_impl.cc
+ btree/btree_range_pin.cc
+ btree/fixed_kv_node.cc
onode.cc
onode_manager/staged-fltree/node.cc
onode_manager/staged-fltree/node_extent_manager.cc
const_iterator insert(
const_iterator iter,
paddr_t key,
- backref_map_val_t val) final {
+ backref_map_val_t val,
+ LogicalCachedExtent*) final {
journal_insert(
iter,
key,
void update(
const_iterator iter,
- backref_map_val_t val) final {
+ backref_map_val_t val,
+ LogicalCachedExtent*) final {
return journal_update(
iter,
val,
c,
*state.insert_iter,
state.last_end,
- val
+ val,
+ nullptr
).si_then([&state, c, addr, len, key](auto &&p) {
LOG_PREFIX(BtreeBackrefManager::new_mapping);
auto [iter, inserted] = std::move(p);
BtreeBackrefPin() = default;
BtreeBackrefPin(
CachedExtentRef parent,
+ uint16_t pos,
backref_map_val_t &val,
backref_node_meta_t &&meta)
: BtreeNodePin(
parent,
+ pos,
val.laddr,
val.len,
std::forward<backref_node_meta_t>(meta)),
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "crimson/os/seastore/btree/btree_range_pin.h"
+#include "crimson/os/seastore/btree/fixed_kv_node.h"
+
+namespace crimson::os::seastore {
+
+template <typename key_t, typename val_t>
+void BtreeNodePin<key_t, val_t>::link_extent(LogicalCachedExtent *ref) {
+ assert(ref->is_valid());
+ // it's only when reading logical extents from disk that we need to
+ // link them to lba leaves
+ if (!ref->is_pending() && !ref->is_exist_clean()) {
+ assert(parent);
+ assert(pos != std::numeric_limits<uint16_t>::max());
+ if (parent->is_initial_pending()) {
+ auto &p = ((FixedKVNode<key_t>&)*parent).get_stable_for_key(
+ pin.range.begin);
+ p.link_child(ref, pos);
+ } else if (parent->is_mutation_pending()) {
+ auto &p = (FixedKVNode<key_t>&)*parent->get_prior_instance();
+ p.link_child(ref, pos);
+ } else {
+ assert(!parent->is_pending() && parent->is_valid());
+ auto &p = (FixedKVNode<key_t>&)*parent;
+ p.link_child(ref, pos);
+ }
+ pos = std::numeric_limits<uint16_t>::max();
+ }
+ pin.set_extent(ref);
+}
+
+template void BtreeNodePin<laddr_t, paddr_t>::link_extent(LogicalCachedExtent*);
+template void BtreeNodePin<paddr_t, laddr_t>::link_extent(LogicalCachedExtent*);
+} // namespace crimson::os::seastore
val_t value;
extent_len_t len;
btree_range_pin_t<key_t> pin;
+ uint16_t pos = std::numeric_limits<uint16_t>::max();
public:
using val_type = val_t;
BtreeNodePin(
CachedExtentRef parent,
+ uint16_t pos,
val_t &value,
extent_len_t len,
fixed_kv_node_meta_t<key_t> &&meta)
- : parent(parent), value(value), len(len) {
+ : parent(parent), value(value), len(len), pos(pos) {
pin.set_range(std::move(meta));
}
+ CachedExtentRef get_parent() const final {
+ return parent;
+ }
+
btree_range_pin_t<key_t>& get_range_pin() {
return pin;
}
parent = pin;
}
- void link_extent(LogicalCachedExtent *ref) final {
- pin.set_extent(ref);
- }
+ void link_extent(LogicalCachedExtent *ref) final;
extent_len_t get_length() const final {
ceph_assert(pin.range.end > pin.range.begin);
#include "crimson/os/seastore/btree/btree_range_pin.h"
#include "crimson/os/seastore/root_block.h"
+#define RESERVATION_PTR reinterpret_cast<ChildableCachedExtent*>(0x1)
+
namespace crimson::os::seastore::lba_manager::btree {
struct lba_map_val_t;
}
namespace crimson::os::seastore {
+bool is_valid_child_ptr(ChildableCachedExtent* child);
+
template <typename T>
phy_tree_root_t& get_phy_tree_root(root_t& r);
auto key = get_key();
return std::make_unique<pin_t>(
leaf.node,
+ leaf.pos,
val,
fixed_kv_node_meta_t<node_key_t>{ key, key + val.len, 0 });
}
op_context_t<node_key_t> c,
iterator iter,
node_key_t laddr,
- node_val_t val
+ node_val_t val,
+ LogicalCachedExtent* nextent
) {
LOG_PREFIX(FixedKVBtree::insert);
SUBTRACET(
iter.is_end() ? min_max_t<node_key_t>::max : iter.get_key());
return seastar::do_with(
iter,
- [this, c, laddr, val](auto &ret) {
+ [this, c, laddr, val, nextent](auto &ret) {
return find_insertion(
c, laddr, ret
- ).si_then([this, c, laddr, val, &ret] {
+ ).si_then([this, c, laddr, val, &ret, nextent] {
if (!ret.at_boundary() && ret.get_key() == laddr) {
return insert_ret(
interruptible::ready_future_marker{},
++(get_tree_stats<self_type>(c.trans).num_inserts);
return handle_split(
c, ret
- ).si_then([c, laddr, val, &ret] {
+ ).si_then([c, laddr, val, &ret, nextent] {
if (!ret.leaf.node->is_mutable()) {
CachedExtentRef mut = c.cache.duplicate_for_write(
c.trans, ret.leaf.node
assert(iter == ret.leaf.node->end() || iter->get_key() > laddr);
assert(laddr >= ret.leaf.node->get_meta().begin &&
laddr < ret.leaf.node->get_meta().end);
- ret.leaf.node->insert(iter, laddr, val);
+ ret.leaf.node->insert(iter, laddr, val, nextent);
return insert_ret(
interruptible::ready_future_marker{},
std::make_pair(ret, true));
insert_ret insert(
op_context_t<node_key_t> c,
node_key_t laddr,
- node_val_t val) {
+ node_val_t val,
+ LogicalCachedExtent* nextent) {
return lower_bound(
c, laddr
- ).si_then([this, c, laddr, val](auto iter) {
- return this->insert(c, iter, laddr, val);
+ ).si_then([this, c, laddr, val, nextent](auto iter) {
+ return this->insert(c, iter, laddr, val, nextent);
});
}
update_ret update(
op_context_t<node_key_t> c,
iterator iter,
- node_val_t val)
+ node_val_t val,
+ LogicalCachedExtent* nextent)
{
LOG_PREFIX(FixedKVBtree::update);
SUBTRACET(
++(get_tree_stats<self_type>(c.trans).num_updates);
iter.leaf.node->update(
iter.leaf.node->iter_idx(iter.leaf.pos),
- val);
+ val,
+ nextent);
return update_ret(
interruptible::ready_future_marker{},
iter);
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "crimson/os/seastore/btree/fixed_kv_node.h"
+
+namespace crimson::os::seastore {
+
+bool is_valid_child_ptr(ChildableCachedExtent* child) {
+ return child != nullptr && child != RESERVATION_PTR;
+}
+
+} // namespace crimson::os::seastore
* Base class enabling recursive lookup between internal and leaf nodes.
*/
template <typename node_key_t>
-struct FixedKVNode : CachedExtent {
+struct FixedKVNode : ChildableCachedExtent {
using FixedKVNodeRef = TCachedExtentRef<FixedKVNode>;
- struct parent_tracker_t
- : public boost::intrusive_ref_counter<
- parent_tracker_t, boost::thread_unsafe_counter> {
- parent_tracker_t(FixedKVNodeRef parent)
- : parent(parent) {}
- parent_tracker_t(FixedKVNode* parent)
- : parent(parent) {}
- FixedKVNodeRef parent = nullptr;
- ~parent_tracker_t() {
- // this is parent's tracker, reset it
- if (parent->my_tracker == this) {
- parent->my_tracker = nullptr;
- }
- }
- };
-
- using parent_tracker_ref = boost::intrusive_ptr<parent_tracker_t>;
btree_range_pin_t<node_key_t> pin;
struct copy_source_cmp_t {
* its "prior_instance" if the node is the result of a rewrite), with which
* the lba range of this node overlaps.
*/
- std::vector<CachedExtent*> children;
+ std::vector<ChildableCachedExtent*> children;
std::set<FixedKVNodeRef, copy_source_cmp_t> copy_sources;
uint16_t capacity = 0;
parent_tracker_t* my_tracker = nullptr;
- parent_tracker_ref parent_tracker;
RootBlockRef root_block;
+ bool is_linked() {
+ assert(!has_parent_tracker() || !(bool)root_block);
+ return (bool)has_parent_tracker() || (bool)root_block;
+ }
+
FixedKVNode(uint16_t capacity, ceph::bufferptr &&ptr)
- : CachedExtent(std::move(ptr)),
+ : ChildableCachedExtent(std::move(ptr)),
pin(this),
children(capacity, nullptr),
capacity(capacity) {}
FixedKVNode(const FixedKVNode &rhs)
- : CachedExtent(rhs),
+ : ChildableCachedExtent(rhs),
pin(rhs.pin, this),
children(rhs.capacity, nullptr),
capacity(rhs.capacity) {}
set_child_ptracker(child);
}
+ virtual bool is_leaf_and_has_children() const = 0;
+
template<typename iter_t>
void insert_child_ptr(iter_t iter, ChildableCachedExtent* child) {
auto raw_children = children.data();
&raw_children[offset + 1],
&raw_children[offset],
(get_node_size() - offset) * sizeof(ChildableCachedExtent*));
- children[offset] = child;
- set_child_ptracker(child);
+ if (child) {
+ children[offset] = child;
+ set_child_ptracker(child);
+ } else {
+ // this can only happen when reserving lba spaces
+ ceph_assert(is_leaf_and_has_children());
+ // this is to avoid mistakenly copying pointers from
+ // copy sources when committing this lba node, because
+ // we rely on pointers' "nullness" to avoid copying
+ // pointers for updated values
+ children[offset] = RESERVATION_PTR;
+ }
}
template<typename iter_t>
: stable_parent(stable_parent), pos(pos) {}
};
- void link_child(FixedKVNode* child, uint16_t pos) {
+ void link_child(ChildableCachedExtent* child, uint16_t pos) {
assert(pos < get_node_size());
assert(child);
ceph_assert(!is_pending());
auto pos = iter.get_offset();
assert(children.capacity());
auto child = children[pos];
- if (child) {
+ if (is_valid_child_ptr(child)) {
return child_pos_t(child->get_transactional_view(t));
} else if (is_pending()) {
auto key = iter.get_key();
auto &sparent = get_stable_for_key(key);
auto spos = sparent.child_pos_for_key(key);
auto child = sparent.children[spos];
- if (child) {
+ if (is_valid_child_ptr(child)) {
return child_pos_t(child->get_transactional_view(t));
} else {
return child_pos_t(&sparent, spos);
return;
}
ceph_assert(!root_block);
- parent_tracker = prior.parent_tracker;
- auto &parent = parent_tracker->parent;
- assert(parent);
- assert(parent->is_valid());
+ take_prior_parent_tracker();
+ assert(is_parent_valid());
+ auto parent = get_parent_node<FixedKVNode>();
//TODO: can this search be avoided?
auto off = parent->lower_bound_offset(get_node_meta().begin);
assert(parent->get_key_from_idx(off) == get_node_meta().begin);
assert(prior.my_tracker || prior.is_children_empty());
if (prior.my_tracker) {
- prior.my_tracker->parent.reset(this);
+ prior.my_tracker->reset_parent(this);
my_tracker = prior.my_tracker;
// All my initial pending children is pointing to the original
// tracker which has been dropped by the above line, so need
ceph_assert(end <= children.end());
for (auto it = begin; it != end; it++) {
auto child = *it;
- if (child) {
- set_child_ptracker((FixedKVNode*)child);
+ if (is_valid_child_ptr(child)) {
+ set_child_ptracker(child);
}
}
}
}
void on_invalidated(Transaction &t) final {
- parent_tracker.reset();
+ reset_parent_tracker();
}
bool is_rewrite() {
void on_initial_write() final {
// All in-memory relative addrs are necessarily block-relative
resolve_relative_addrs(get_paddr());
- ceph_assert(
- parent_tracker
- ? (parent_tracker->parent && parent_tracker->parent->is_valid())
- : true);
+ if (pin.is_root()) {
+ reset_parent_tracker();
+ }
+ assert(has_parent_tracker() ? (is_parent_valid()) : true);
}
- void set_child_ptracker(FixedKVNode *child) {
- if (!my_tracker) {
- my_tracker = new parent_tracker_t(this);
+ void set_child_ptracker(ChildableCachedExtent *child) {
+ if (!this->my_tracker) {
+ this->my_tracker = new parent_tracker_t(this);
}
- child->parent_tracker.reset(my_tracker);
+ child->reset_parent_tracker(this->my_tracker);
}
void on_clean_read() final {
: FixedKVNode<NODE_KEY>(rhs),
node_layout_t(this->get_bptr().c_str()) {}
+ bool is_leaf_and_has_children() const final {
+ return false;
+ }
+
uint16_t get_node_split_pivot() final {
return this->get_split_pivot().get_offset();
}
ceph_assert(this->root_block);
unlink_phy_tree_root_node<NODE_KEY>(this->root_block);
} else {
- ceph_assert(this->parent_tracker);
- auto &parent = this->parent_tracker->parent;
- ceph_assert(parent);
+ ceph_assert(this->is_parent_valid());
+ auto parent = this->template get_parent_node<FixedKVNode<NODE_KEY>>();
auto off = parent->lower_bound_offset(this->get_meta().begin);
assert(parent->get_key_from_idx(off) == this->get_meta().begin);
assert(parent->children[off] == this);
}
}
- std::ostream &print_detail(std::ostream &out) const
+ std::ostream &_print_detail(std::ostream &out) const
{
out << ", size=" << this->get_size()
<< ", meta=" << this->get_meta()
- << ", parent_tracker=" << (void*)this->parent_tracker.get();
- if (this->parent_tracker) {
- out << ", parent=" << (void*)this->parent_tracker->parent.get();
- }
- out << ", my_tracker=" << (void*)this->my_tracker;
+ << ", my_tracker=" << (void*)this->my_tracker;
if (this->my_tracker) {
- out << ", my_tracker->parent=" << (void*)this->my_tracker->parent.get();
+ out << ", my_tracker->parent=" << (void*)this->my_tracker->get_parent().get();
}
return out << ", root_block=" << (void*)this->root_block.get();
}
VAL,
VAL_LE>;
using internal_const_iterator_t = typename node_layout_t::const_iterator;
+ using this_type_t = FixedKVLeafNode<
+ CAPACITY,
+ NODE_KEY,
+ NODE_KEY_LE,
+ VAL,
+ VAL_LE,
+ node_size,
+ node_type_t,
+ has_children>;
+ using base_t = FixedKVNode<NODE_KEY>;
FixedKVLeafNode(ceph::bufferptr &&ptr)
- : FixedKVNode<NODE_KEY>(0, std::move(ptr)),
+ : FixedKVNode<NODE_KEY>(has_children ? CAPACITY : 0, std::move(ptr)),
node_layout_t(this->get_bptr().c_str()) {}
FixedKVLeafNode(const FixedKVLeafNode &rhs)
: FixedKVNode<NODE_KEY>(rhs),
static constexpr bool do_has_children = has_children;
+ bool is_leaf_and_has_children() const final {
+ return has_children;
+ }
+
uint16_t get_node_split_pivot() final {
return this->get_split_pivot().get_offset();
}
- bool validate_stable_children() final {
+ bool validate_stable_children() override {
return true;
}
ceph_assert(this->root_block);
unlink_phy_tree_root_node<NODE_KEY>(this->root_block);
} else {
- ceph_assert(this->parent_tracker);
- auto &parent = this->parent_tracker->parent;
- ceph_assert(parent);
+ ceph_assert(this->is_parent_valid());
+ auto parent = this->template get_parent_node<FixedKVNode<NODE_KEY>>();
auto off = parent->lower_bound_offset(this->get_meta().begin);
assert(parent->get_key_from_idx(off) == this->get_meta().begin);
assert(parent->children[off] == this);
}
}
- void on_replace_prior(Transaction &t) final {
- this->set_parent_tracker();
- assert(this->mutate_state.empty());
+ void prepare_write() final {
+ if constexpr (has_children) {
+ if (this->is_initial_pending()) {
+ if (this->is_rewrite()) {
+ this->set_children_from_prior_instance();
+ }
+ this->copy_children_from_stable_sources(
+ [this](base_t &node, uint16_t pos) {
+ ceph_assert(node.get_type() == this->get_type());
+ auto &n = static_cast<this_type_t&>(node);
+ return n.iter_idx(pos);
+ }
+ );
+ if (this->is_rewrite()) {
+ this->reset_prior_instance();
+ } else {
+ this->adjust_ptracker_for_children();
+ }
+ assert(this->validate_stable_children());
+ this->copy_sources.clear();
+ }
+ }
+ assert(this->is_initial_pending()
+ ? this->copy_sources.empty():
+ true);
+ }
+
+ void on_replace_prior(Transaction&) final {
+ ceph_assert(!this->is_rewrite());
+ if constexpr (has_children) {
+ this->set_children_from_prior_instance();
+ auto &prior = (this_type_t&)(*this->get_prior_instance());
+ auto copied = this->copy_children_from_stable_source(
+ prior,
+ prior.begin(),
+ prior.end(),
+ this->begin());
+ ceph_assert(copied <= get_node_size());
+ assert(this->validate_stable_children());
+ this->set_parent_tracker_from_prior_instance();
+ } else {
+ this->set_parent_tracker_from_prior_instance();
+ }
}
uint16_t lower_bound_offset(NODE_KEY key) const final {
virtual void update(
internal_const_iterator_t iter,
- VAL val) = 0;
+ VAL val,
+ LogicalCachedExtent* nextent) = 0;
virtual internal_const_iterator_t insert(
internal_const_iterator_t iter,
NODE_KEY addr,
- VAL val) = 0;
+ VAL val,
+ LogicalCachedExtent* nextent) = 0;
virtual void remove(internal_const_iterator_t iter) = 0;
std::tuple<Ref, Ref, NODE_KEY>
c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION);
auto right = c.cache.template alloc_new_extent<node_type_t>(
c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION);
+ if constexpr (has_children) {
+ this->split_child_ptrs(*left, *right);
+ }
auto pivot = this->split_into(*left, *right);
left->pin.set_range(left->get_meta());
right->pin.set_range(right->get_meta());
Ref &right) {
auto replacement = c.cache.template alloc_new_extent<node_type_t>(
c.trans, node_size, placement_hint_t::HOT, INIT_GENERATION);
+ if constexpr (has_children) {
+ replacement->merge_child_ptrs(*this, *right);
+ }
replacement->merge_from(*this, *right->template cast<node_type_t>());
replacement->pin.set_range(replacement->get_meta());
return replacement;
prefer_left,
*replacement_left,
*replacement_right);
+ if constexpr (has_children) {
+ this->balance_child_ptrs(
+ *this,
+ right,
+ prefer_left,
+ *replacement_left,
+ *replacement_right);
+ }
replacement_left->pin.set_range(replacement_left->get_meta());
replacement_right->pin.set_range(replacement_right->get_meta());
this->resolve_relative_addrs(base);
}
- std::ostream &print_detail(std::ostream &out) const
+ std::ostream &_print_detail(std::ostream &out) const
{
- out << ", size=" << this->get_size()
- << ", meta=" << this->get_meta()
- << ", parent_tracker=" << (void*)this->parent_tracker.get();
- if (this->parent_tracker) {
- out << ", parent=" << (void*)this->parent_tracker->parent.get();
- }
- return out;
+ return out << ", size=" << this->get_size()
+ << ", meta=" << this->get_meta();
}
constexpr static size_t get_min_capacity() {
#include "crimson/common/log.h"
+#include "crimson/os/seastore/btree/fixed_kv_node.h"
+
namespace {
[[maybe_unused]] seastar::logger& logger() {
return crimson::get_logger(ceph_subsys_seastore_tm);
}
}
-std::ostream &LogicalCachedExtent::print_detail(std::ostream &out) const
+std::ostream &operator<<(std::ostream &out, const parent_tracker_t &tracker) {
+ return out << "parent_tracker=" << (void*)&tracker
+ << ", parent=" << (void*)tracker.get_parent().get();
+}
+
+std::ostream &ChildableCachedExtent::print_detail(std::ostream &out) const {
+ if (parent_tracker) {
+ out << *parent_tracker;
+ } else {
+ out << ", parent_tracker=" << (void*)nullptr;
+ }
+ _print_detail(out);
+ return out;
+}
+
+std::ostream &LogicalCachedExtent::_print_detail(std::ostream &out) const
{
out << ", laddr=" << laddr;
if (pin) {
on_invalidated(t);
}
+LogicalCachedExtent::~LogicalCachedExtent() {
+ if (has_parent_tracker() && is_valid() && !is_pending()) {
+ assert(get_parent_node());
+ auto parent = get_parent_node<FixedKVNode<laddr_t>>();
+ auto off = parent->lower_bound_offset(laddr);
+ assert(parent->get_key_from_idx(off) == laddr);
+ assert(parent->children[off] == this);
+ parent->children[off] = nullptr;
+ }
+}
+
+void LogicalCachedExtent::on_replace_prior(Transaction &t) {
+ assert(is_mutation_pending());
+ take_prior_parent_tracker();
+ assert(get_parent_node());
+ auto parent = get_parent_node<FixedKVNode<laddr_t>>();
+ //TODO: can this search be avoided?
+ auto off = parent->lower_bound_offset(laddr);
+ assert(parent->get_key_from_idx(off) == laddr);
+ parent->children[off] = this;
+}
+
+parent_tracker_t::~parent_tracker_t() {
+ // this is parent's tracker, reset it
+ auto &p = (FixedKVNode<laddr_t>&)*parent;
+ if (p.my_tracker == this) {
+ p.my_tracker = nullptr;
+ }
+}
+
std::ostream &operator<<(std::ostream &out, const LBAPin &rhs)
{
return out << "LBAPin(" << rhs.get_key() << "~" << rhs.get_length()
size_t node_size,
bool leaf_has_children>
class FixedKVBtree;
+template <typename, typename>
+class BtreeNodePin;
// #define DEBUG_CACHED_EXTENT_REF
#ifdef DEBUG_CACHED_EXTENT_REF
void set_invalid(Transaction &t);
+ // a rewrite extent has an invalid prior_instance,
+ // and a mutation_pending extent has a valid prior_instance
CachedExtentRef get_prior_instance() {
return prior_instance;
}
friend class crimson::os::seastore::SegmentedAllocator;
friend class crimson::os::seastore::TransactionManager;
friend class crimson::os::seastore::ExtentPlacementManager;
+ template <typename, typename>
+ friend class BtreeNodePin;
};
std::ostream &operator<<(std::ostream &, CachedExtent::extent_state_t);
virtual key_t get_key() const = 0;
virtual PhysicalNodePinRef<key_t, val_t> duplicate() const = 0;
virtual bool has_been_invalidated() const = 0;
+ virtual CachedExtentRef get_parent() const = 0;
virtual ~PhysicalNodePin() {}
};
}
};
+class parent_tracker_t
+ : public boost::intrusive_ref_counter<
+ parent_tracker_t, boost::thread_unsafe_counter> {
+public:
+ parent_tracker_t(CachedExtentRef parent)
+ : parent(parent) {}
+ parent_tracker_t(CachedExtent* parent)
+ : parent(parent) {}
+ ~parent_tracker_t();
+ template <typename T = CachedExtent>
+ TCachedExtentRef<T> get_parent() const {
+ ceph_assert(parent);
+ if constexpr (std::is_same_v<T, CachedExtent>) {
+ return parent;
+ } else {
+ return parent->template cast<T>();
+ }
+ }
+ void reset_parent(CachedExtentRef p) {
+ parent = p;
+ }
+ bool is_valid() const {
+ return parent && parent->is_valid();
+ }
+private:
+ CachedExtentRef parent;
+};
+
+std::ostream &operator<<(std::ostream &, const parent_tracker_t &);
+
+using parent_tracker_ref = boost::intrusive_ptr<parent_tracker_t>;
+
+class ChildableCachedExtent : public CachedExtent {
+public:
+ template <typename... T>
+ ChildableCachedExtent(T&&... t) : CachedExtent(std::forward<T>(t)...) {}
+ bool has_parent_tracker() const {
+ return (bool)parent_tracker;
+ }
+ void reset_parent_tracker(parent_tracker_t *p = nullptr) {
+ parent_tracker.reset(p);
+ }
+ bool is_parent_valid() const {
+ return parent_tracker && parent_tracker->is_valid();
+ }
+ template <typename T = CachedExtent>
+ TCachedExtentRef<T> get_parent_node() const {
+ assert(parent_tracker);
+ return parent_tracker->template get_parent<T>();
+ }
+ void take_prior_parent_tracker() {
+ auto &prior = (ChildableCachedExtent&)(*get_prior_instance());
+ parent_tracker = prior.parent_tracker;
+ }
+ std::ostream &print_detail(std::ostream &out) const final;
+private:
+ parent_tracker_ref parent_tracker;
+ virtual std::ostream &_print_detail(std::ostream &out) const {
+ return out;
+ }
+};
/**
* LogicalCachedExtent
*
* Users of TransactionManager should be using extents derived from
* LogicalCachedExtent.
*/
-class LogicalCachedExtent : public CachedExtent {
+class LogicalCachedExtent : public ChildableCachedExtent {
public:
template <typename... T>
- LogicalCachedExtent(T&&... t) : CachedExtent(std::forward<T>(t)...) {}
+ LogicalCachedExtent(T&&... t)
+ : ChildableCachedExtent(std::forward<T>(t)...)
+ {}
void set_pin(LBAPinRef &&npin) {
assert(!pin);
return true;
}
- std::ostream &print_detail(std::ostream &out) const final;
+ std::ostream &_print_detail(std::ostream &out) const final;
+
+ void on_replace_prior(Transaction &t) final;
+
+ virtual ~LogicalCachedExtent();
protected:
+
virtual void apply_delta(const ceph::bufferlist &bl) = 0;
virtual std::ostream &print_detail_l(std::ostream &out) const {
return out;
private:
laddr_t laddr = L_ADDR_NULL;
LBAPinRef pin;
+
+ template <
+ typename node_key_t,
+ typename node_val_t,
+ typename internal_node_t,
+ typename leaf_node_t,
+ typename pin_t,
+ size_t node_size,
+ bool leaf_has_children>
+ friend class FixedKVBtree;
};
using LogicalCachedExtentRef = TCachedExtentRef<LogicalCachedExtent>;
t,
extent->get_laddr(),
extent->get_prior_paddr_and_reset(),
- extent->get_paddr()
+ extent->get_paddr(),
+ nullptr // all the extents should have already been
+ // added to the fixed_kv_btree
);
});
}
-template <bool leaf_has_children>
LBAManagerRef lba_manager::create_lba_manager(Cache &cache) {
- return LBAManagerRef(new btree::BtreeLBAManager<leaf_has_children>(cache));
+ return LBAManagerRef(new btree::BtreeLBAManager(cache));
}
-template LBAManagerRef lba_manager::create_lba_manager<true>(Cache &cache);
-template LBAManagerRef lba_manager::create_lba_manager<false>(Cache &cache);
-
}
Transaction &t,
laddr_t hint,
extent_len_t len,
- paddr_t addr) = 0;
+ paddr_t addr,
+ LogicalCachedExtent *nextent) = 0;
struct ref_update_result_t {
unsigned refcount = 0;
Transaction& t,
laddr_t laddr,
paddr_t prev_addr,
- paddr_t paddr) = 0;
+ paddr_t paddr,
+ LogicalCachedExtent *nextent) = 0;
/**
* update_mappings
class Cache;
namespace lba_manager {
-template <bool leaf_has_children>
LBAManagerRef create_lba_manager(Cache &cache);
}
Transaction &t,
laddr_t hint,
extent_len_t len,
- paddr_t addr)
+ paddr_t addr,
+ LogicalCachedExtent* nextent)
{
struct state_t {
laddr_t last_end;
cache,
c,
hint,
- [this, FNAME, c, hint, len, addr, lookup_attempts, &t](auto &btree, auto &state) {
+ [this, FNAME, c, hint, len, addr, lookup_attempts,
+ &t, nextent](auto &btree, auto &state) {
return LBABtree::iterate_repeat(
c,
btree.upper_bound_right(c, hint),
interruptible::ready_future_marker{},
seastar::stop_iteration::no);
}
- }).si_then([FNAME, c, addr, len, hint, &btree, &state] {
+ }).si_then([FNAME, c, addr, len, hint, &btree, &state, nextent] {
return btree.insert(
c,
*state.insert_iter,
state.last_end,
- lba_map_val_t{len, addr, 1, 0}
+ lba_map_val_t{len, addr, 1, 0},
+ nextent
).si_then([&state, FNAME, c, addr, len, hint](auto &&p) {
auto [iter, inserted] = std::move(p);
TRACET("{}~{}, hint={}, inserted at {}",
Transaction& t,
laddr_t laddr,
paddr_t prev_addr,
- paddr_t addr)
+ paddr_t addr,
+ LogicalCachedExtent *nextent)
{
LOG_PREFIX(BtreeLBAManager::update_mapping);
TRACET("laddr={}, paddr {} => {}", t, laddr, prev_addr, addr);
ceph_assert(in.paddr == prev_addr);
ret.paddr = addr;
return ret;
- }
+ },
+ nextent
).si_then([&t, laddr, prev_addr, addr, FNAME](auto result) {
DEBUGT("laddr={}, paddr {} => {} done -- {}",
t, laddr, prev_addr, addr, result);
ceph_assert((int)out.refcount + delta >= 0);
out.refcount += delta;
return out;
- }
+ },
+ nullptr
).si_then([&t, addr, delta, FNAME](auto result) {
DEBUGT("laddr={}, delta={} done -- {}", t, addr, delta, result);
return ref_update_result_t{
BtreeLBAManager::_update_mapping(
Transaction &t,
laddr_t addr,
- update_func_t &&f)
+ update_func_t &&f,
+ LogicalCachedExtent* nextent)
{
auto c = get_context(t);
return with_btree_ret<LBABtree, lba_map_val_t>(
cache,
c,
- [f=std::move(f), c, addr](auto &btree) mutable {
+ [f=std::move(f), c, addr, nextent](auto &btree) mutable {
return btree.lower_bound(
c, addr
- ).si_then([&btree, f=std::move(f), c, addr](auto iter)
+ ).si_then([&btree, f=std::move(f), c, addr, nextent](auto iter)
-> _update_mapping_ret {
if (iter.is_end() || iter.get_key() != addr) {
LOG_PREFIX(BtreeLBAManager::_update_mapping);
return btree.update(
c,
iter,
- ret
+ ret,
+ nextent
).si_then([ret](auto) {
return ret;
});
BtreeLBAPin() = default;
BtreeLBAPin(
CachedExtentRef parent,
+ uint16_t pos,
lba_map_val_t &val,
lba_node_meta_t &&meta)
: BtreeNodePin(
parent,
+ pos,
val.paddr,
val.len,
std::forward<lba_node_meta_t>(meta))
Transaction &t,
laddr_t hint,
extent_len_t len,
- paddr_t addr) final;
+ paddr_t addr,
+ LogicalCachedExtent*) final;
ref_ret decref_extent(
Transaction &t,
Transaction& t,
laddr_t laddr,
paddr_t prev_addr,
- paddr_t paddr) final;
+ paddr_t paddr,
+ LogicalCachedExtent*) final;
get_physical_extent_if_live_ret get_physical_extent_if_live(
Transaction &t,
_update_mapping_ret _update_mapping(
Transaction &t,
laddr_t addr,
- update_func_t &&f);
+ update_func_t &&f,
+ LogicalCachedExtent*);
};
using BtreeLBAManagerRef = std::unique_ptr<BtreeLBAManager>;
<< ")";
}
-std::ostream &LBALeafNode::print_detail(std::ostream &out) const
+std::ostream &LBALeafNode::_print_detail(std::ostream &out) const
{
- out << ", size=" << get_size()
- << ", meta=" << get_meta()
- << ", parent_tracker=" << (void*)parent_tracker.get();
- if (parent_tracker) {
- return out << ", parent=" << (void*)parent_tracker->parent.get();
+ out << ", size=" << this->get_size()
+ << ", meta=" << this->get_meta()
+ << ", my_tracker=" << (void*)this->my_tracker;
+ if (this->my_tracker) {
+ out << ", my_tracker->parent=" << (void*)this->my_tracker->get_parent().get();
}
- return out << ", root_block=" << (void*)root_block.get();
+ return out << ", root_block=" << (void*)this->root_block.get();
}
void LBALeafNode::resolve_relative_addrs(paddr_t base)
LBALeafNode,
true> {
using Ref = TCachedExtentRef<LBALeafNode>;
- using internal_iterator_t = const_iterator;
+ using parent_type_t = FixedKVLeafNode<
+ LEAF_NODE_CAPACITY,
+ laddr_t, laddr_le_t,
+ lba_map_val_t, lba_map_val_le_t,
+ LBA_BLOCK_SIZE,
+ LBALeafNode,
+ true>;
+ using internal_const_iterator_t =
+ typename parent_type_t::node_layout_t::const_iterator;
+ using internal_iterator_t =
+ typename parent_type_t::node_layout_t::iterator;
template <typename... T>
LBALeafNode(T&&... t) :
- FixedKVLeafNode(std::forward<T>(t)...) {}
+ parent_type_t(std::forward<T>(t)...) {}
static constexpr extent_types_t TYPE = extent_types_t::LADDR_LEAF;
+ bool validate_stable_children() final {
+ LOG_PREFIX(LBALeafNode::validate_stable_children);
+ if (this->children.empty()) {
+ return false;
+ }
+
+ for (auto i : *this) {
+ auto child = (LogicalCachedExtent*)this->children[i.get_offset()];
+ if (is_valid_child_ptr(child) && child->get_laddr() != i.get_key()) {
+ SUBERROR(seastore_fixedkv_tree,
+ "stable child not valid: child {}, key {}",
+ *child,
+ i.get_key());
+ ceph_abort();
+ return false;
+ }
+ }
+ return true;
+ }
+
void update(
- const_iterator iter,
- lba_map_val_t val) final {
- val.paddr = maybe_generate_relative(val.paddr);
- return journal_update(
+ internal_const_iterator_t iter,
+ lba_map_val_t val,
+ LogicalCachedExtent* nextent) final {
+ LOG_PREFIX(LBALeafNode::update);
+ if (nextent) {
+ SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, {}",
+ this->pending_for_transaction,
+ iter.get_offset(),
+ *nextent);
+ // child-ptr may already be correct, see LBAManager::update_mappings()
+ this->update_child_ptr(iter, nextent);
+ }
+ val.paddr = this->maybe_generate_relative(val.paddr);
+ return this->journal_update(
iter,
val,
- maybe_get_delta_buffer());
+ this->maybe_get_delta_buffer());
}
- const_iterator insert(
- const_iterator iter,
+ internal_const_iterator_t insert(
+ internal_const_iterator_t iter,
laddr_t addr,
- lba_map_val_t val) final {
- val.paddr = maybe_generate_relative(val.paddr);
- journal_insert(
+ lba_map_val_t val,
+ LogicalCachedExtent* nextent) final {
+ LOG_PREFIX(LBALeafNode::insert);
+ SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}, extent {}",
+ this->pending_for_transaction,
+ iter.get_offset(),
+ addr,
+ (void*)nextent);
+ this->insert_child_ptr(iter, nextent);
+ val.paddr = this->maybe_generate_relative(val.paddr);
+ this->journal_insert(
iter,
addr,
val,
- maybe_get_delta_buffer());
+ this->maybe_get_delta_buffer());
return iter;
}
- void remove(const_iterator iter) final {
- return journal_remove(
+ void remove(internal_const_iterator_t iter) final {
+ LOG_PREFIX(LBALeafNode::remove);
+ SUBTRACE(seastore_fixedkv_tree, "trans.{}, pos {}, key {}",
+ this->pending_for_transaction,
+ iter.get_offset(),
+ iter.get_key());
+ assert(iter != this->end());
+ this->remove_child_ptr(iter);
+ return this->journal_remove(
iter,
- maybe_get_delta_buffer());
+ this->maybe_get_delta_buffer());
}
// See LBAInternalNode, same concept
void resolve_relative_addrs(paddr_t base);
- void node_resolve_vals(iterator from, iterator to) const final {
- if (is_initial_pending()) {
+ void node_resolve_vals(
+ internal_iterator_t from,
+ internal_iterator_t to) const final
+ {
+ if (this->is_initial_pending()) {
for (auto i = from; i != to; ++i) {
auto val = i->get_val();
if (val.paddr.is_relative()) {
assert(val.paddr.is_block_relative());
- val.paddr = get_paddr().add_relative(val.paddr);
+ val.paddr = this->get_paddr().add_relative(val.paddr);
i->set_val(val);
}
}
}
}
- void node_unresolve_vals(iterator from, iterator to) const final {
- if (is_initial_pending()) {
+ void node_unresolve_vals(
+ internal_iterator_t from,
+ internal_iterator_t to) const final
+ {
+ if (this->is_initial_pending()) {
for (auto i = from; i != to; ++i) {
auto val = i->get_val();
if (val.paddr.is_relative()) {
auto val = i->get_val();
assert(val.paddr.is_record_relative());
- val.paddr = val.paddr.block_relative_to(get_paddr());
+ val.paddr = val.paddr.block_relative_to(this->get_paddr());
i->set_val(val);
}
}
return TYPE;
}
- std::ostream &print_detail(std::ostream &out) const final;
+ std::ostream &_print_detail(std::ostream &out) const final;
};
using LBALeafNodeRef = TCachedExtentRef<LBALeafNode>;
return out << "LADDR_INTERNAL";
case extent_types_t::LADDR_LEAF:
return out << "LADDR_LEAF";
+ case extent_types_t::DINK_LADDR_LEAF:
+ return out << "LADDR_LEAF";
case extent_types_t::ONODE_BLOCK_STAGED:
return out << "ONODE_BLOCK_STAGED";
case extent_types_t::OMAP_INNER:
ROOT = 0,
LADDR_INTERNAL = 1,
LADDR_LEAF = 2,
- DINK_LADDR_LEAF = 3,
+ DINK_LADDR_LEAF = 3, // should only be used for unitttests
OMAP_INNER = 4,
OMAP_LEAF = 5,
ONODE_BLOCK_STAGED = 6,
t,
lextent->get_laddr(),
lextent->get_paddr(),
- nlextent->get_paddr());
+ nlextent->get_paddr(),
+ nlextent.get());
}
TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent(
assert(!extent.has_pin());
assert(!extent.has_been_invalidated());
assert(!pin->has_been_invalidated());
+ assert(pin->get_parent());
extent.set_pin(std::move(pin));
lba_manager->add_pin(extent.get_pin());
}
t,
laddr_hint,
len,
- ext->get_paddr()
+ ext->get_paddr(),
+ ext.get()
).si_then([ext=std::move(ext), laddr_hint, &t, FNAME](auto &&ref) mutable {
ext->set_pin(std::move(ref));
SUBDEBUGT(seastore_tm, "new extent: {}, laddr_hint: {}", t, *ext, laddr_hint);
t,
laddr_hint,
length,
- existing_paddr
+ existing_paddr,
+ ext.get()
).si_then([ext=std::move(ext), laddr_hint, this](auto &&ref) {
ceph_assert(laddr_hint == ref->get_key());
ext->set_pin(std::move(ref));
t,
hint,
len,
- P_ADDR_ZERO);
+ P_ADDR_ZERO,
+ nullptr);
}
/* alloc_extents
check.emplace(addr, get_map_val(len));
lba_btree_update([=, this](auto &btree, auto &t) {
return btree.insert(
- get_op_context(t), addr, get_map_val(len)
+ get_op_context(t), addr, get_map_val(len), nullptr
).si_then([](auto){});
});
}
}
struct btree_lba_manager_test : btree_test_base {
- BtreeLBAManagerRef<false> lba_manager;
+ BtreeLBAManagerRef lba_manager;
btree_lba_manager_test() = default;
auto ret = with_trans_intr(
*t.t,
[=, this](auto &t) {
- return lba_manager->alloc_extent(t, hint, len, paddr);
+ return lba_manager->alloc_extent(t, hint, len, paddr, nullptr);
}).unsafe_get0();
logger().debug("alloc'd: {}", *ret);
EXPECT_EQ(len, ret->get_length());