Also removes LBANode::depth.
Signed-off-by: Samuel Just <sjust@redhat.com>
*/
template <
size_t CAPACITY,
+ typename Meta,
+ typename MetaInt,
typename K,
typename KINT,
typename V,
class FixedKVNodeLayout {
char *buf = nullptr;
- using L = absl::container_internal::Layout<ceph_le32, KINT, VINT>;
- static constexpr L layout{1, CAPACITY, CAPACITY};
+ using L = absl::container_internal::Layout<ceph_le32, MetaInt, KINT, VINT>;
+ static constexpr L layout{1, 1, CAPACITY, CAPACITY};
public:
template <bool is_const>
*layout.template Pointer<0>(buf) = size;
}
+ /**
+ * get_meta/set_meta
+ *
+ * Enables stashing a templated type within the layout.
+ * Cannot be modified after initial write as it is not represented
+ * in delta_t
+ */
+ Meta get_meta() const {
+ MetaInt &metaint = *layout.template Pointer<1>(buf);
+ return Meta(metaint);
+ }
+ void set_meta(const Meta &meta) {
+ *layout.template Pointer<1>(buf) = MetaInt(meta);
+ }
+
constexpr static size_t get_capacity() {
return CAPACITY;
}
right.copy_from_foreign(right.begin(), piviter, end());
right.set_size(end() - piviter);
+ auto [lmeta, rmeta] = get_meta().split_into(piviter->get_key());
+ left.set_meta(lmeta);
+ right.set_meta(rmeta);
+
return piviter->get_key();
}
right.begin(),
right.end());
set_size(left.get_size() + right.get_size());
+ set_meta(Meta::merge_from(left.get_meta(), right.get_meta()));
}
/**
if (total % 2 && prefer_left) {
pivot_idx++;
}
- auto replacement_pivot = pivot_idx > left.get_size() ?
+ auto replacement_pivot = pivot_idx >= left.get_size() ?
right.iter_idx(pivot_idx - left.get_size())->get_key() :
left.iter_idx(pivot_idx)->get_key();
replacement_right.set_size(total - pivot_idx);
}
+ auto [lmeta, rmeta] = Meta::rebalance(
+ left.get_meta(), right.get_meta(), replacement_pivot);
+ replacement_left.set_meta(lmeta);
+ replacement_right.set_meta(rmeta);
return replacement_pivot;
}
* Get pointer to start of key array
*/
KINT *get_key_ptr() {
- return layout.template Pointer<1>(buf);
+ return layout.template Pointer<2>(buf);
}
const KINT *get_key_ptr() const {
- return layout.template Pointer<1>(buf);
+ return layout.template Pointer<2>(buf);
}
/**
* Get pointer to start of val array
*/
VINT *get_val_ptr() {
- return layout.template Pointer<2>(buf);
+ return layout.template Pointer<3>(buf);
}
const VINT *get_val_ptr() const {
- return layout.template Pointer<2>(buf);
+ return layout.template Pointer<3>(buf);
}
/**
auto root_leaf = cache.alloc_new_extent<LBALeafNode>(
t,
LBA_BLOCK_SIZE);
- root_leaf->set_depth(1);
root_leaf->set_size(0);
+ root_leaf->set_meta({0, L_ADDR_MAX, 1});
croot->get_lba_root() =
root_t{
1,
croot = mut_croot->cast<RootBlock>();
}
auto nroot = cache.alloc_new_extent<LBAInternalNode>(t, LBA_BLOCK_SIZE);
- nroot->set_depth(root->depth + 1);
+ nroot->set_meta({0, L_ADDR_MAX, croot->root.lba_depth + 1});
nroot->journal_insert(
nroot->begin(),
L_ADDR_MIN,
root->get_paddr(),
nullptr);
croot->get_lba_root().lba_root_addr = nroot->get_paddr();
- croot->get_lba_root().lba_depth = root->depth + 1;
+ croot->get_lba_root().lba_depth = root->get_node_meta().depth + 1;
return nroot->split_entry(
get_context(t),
laddr, nroot->begin(), root);
namespace crimson::os::seastore::lba_manager::btree {
+struct lba_node_meta_t {
+ laddr_t begin = 0;
+ laddr_t end = 0;
+ depth_t depth = 0;
+
+ std::pair<lba_node_meta_t, lba_node_meta_t> split_into(laddr_t pivot) const {
+ return std::make_pair(
+ lba_node_meta_t{begin, pivot, depth},
+ lba_node_meta_t{pivot, end, depth});
+ }
+
+ static lba_node_meta_t merge_from(const lba_node_meta_t &lhs, const lba_node_meta_t &rhs) {
+ assert(lhs.depth == rhs.depth);
+ return lba_node_meta_t{lhs.begin, rhs.end, lhs.depth};
+ }
+
+ static std::pair<lba_node_meta_t, lba_node_meta_t>
+ rebalance(const lba_node_meta_t &lhs, const lba_node_meta_t &rhs, laddr_t pivot) {
+ assert(lhs.depth == rhs.depth);
+ return std::make_pair(
+ lba_node_meta_t{lhs.begin, pivot, lhs.depth},
+ lba_node_meta_t{pivot, rhs.end, lhs.depth});
+ }
+};
+
+inline std::ostream &operator<<(
+ std::ostream &lhs,
+ const lba_node_meta_t &rhs)
+{
+ return lhs << "btree_node_meta_t("
+ << "begin=" << rhs.begin
+ << ", end=" << rhs.end
+ << ", depth=" << rhs.depth
+ << ")";
+}
+
/* BtreeLBAPin
*
* References leaf node
#include <string.h>
#include "crimson/common/log.h"
+#include "crimson/os/seastore/lba_manager/btree/btree_range_pin.h"
namespace crimson::os::seastore::lba_manager::btree {
using lookup_range_ertr = LBAManager::get_mapping_ertr;
using lookup_range_ret = LBAManager::get_mapping_ret;
- depth_t depth = 0;
LBANode(ceph::bufferptr &&ptr) : CachedExtent(std::move(ptr)) {}
- LBANode(const LBANode &rhs) = default;
+ LBANode(const LBANode &rhs)
+ : CachedExtent(rhs) {}
- void set_depth(depth_t _depth) { depth = _depth; }
+ virtual lba_node_meta_t get_node_meta() const = 0;
/**
* lookup_range
std::ostream &LBAInternalNode::print_detail(std::ostream &out) const
{
return out << ", size=" << get_size()
- << ", depth=" << depth;
+ << ", meta=" << get_meta();
}
LBAInternalNode::lookup_range_ret LBAInternalNode::lookup_range(
[this, c, &result, addr, len](const auto &val) mutable {
return get_lba_btree_extent(
c,
- depth-1,
+ get_meta().depth - 1,
val.get_val(),
get_paddr()).safe_then(
[c, &result, addr, len](auto extent) mutable {
auto insertion_pt = get_containing_child(laddr);
return get_lba_btree_extent(
c,
- depth-1,
+ get_meta().depth - 1,
insertion_pt->get_val(),
get_paddr()).safe_then(
[this, insertion_pt, c, laddr, val=std::move(val)](
{
return get_lba_btree_extent(
c,
- depth-1,
+ get_meta().depth - 1,
get_containing_child(laddr)->get_val(),
get_paddr()
).safe_then([this, c, laddr](LBANodeRef extent) {
}
return get_lba_btree_extent(
c,
- depth-1,
+ get_meta().depth - 1,
i->get_val(),
get_paddr()
).safe_then([c, &i, len](auto extent) mutable {
auto donor_iter = donor_is_left ? iter - 1 : iter + 1;
return get_lba_btree_extent(
c,
- depth - 1,
+ get_meta().depth - 1,
donor_iter->get_val(),
get_paddr()
).safe_then([this, c, addr, iter, entry, donor_iter, donor_is_left](
std::ostream &LBALeafNode::print_detail(std::ostream &out) const
{
return out << ", size=" << get_size()
- << ", depth=" << depth;
+ << ", meta=" << get_meta();
}
LBALeafNode::lookup_range_ret LBALeafNode::lookup_range(
c.trans,
offset,
LBA_BLOCK_SIZE).safe_then([depth](auto ret) {
- ret->set_depth(depth);
+ auto meta = ret->get_meta();
+ if (ret->get_size()) {
+ ceph_assert(meta.begin <= ret->begin()->get_key());
+ ceph_assert(meta.end > (ret->end() - 1)->get_key());
+ }
return LBANodeRef(ret.detach(), /* add_ref = */ false);
});
-
} else {
logger().debug(
"get_lba_btree_extent: reading leaf at offset {}, depth {}",
logger().debug(
"get_lba_btree_extent: read leaf at offset {}",
offset);
- ret->set_depth(depth);
+ auto meta = ret->get_meta();
+ if (ret->get_size()) {
+ ceph_assert(meta.begin <= ret->begin()->get_key());
+ ceph_assert(meta.end > (ret->end() - 1)->get_key());
+ }
return LBANodeRef(ret.detach(), /* add_ref = */ false);
});
}
constexpr size_t LBA_BLOCK_SIZE = 4096;
+/**
+ * lba_node_meta_le_t
+ *
+ * On disk layout for lba_node_meta_t
+ */
+struct lba_node_meta_le_t {
+ laddr_le_t begin = init_le64(0);
+ laddr_le_t end = init_le64(0);
+ depth_le_t depth = init_les32(0);
+
+ lba_node_meta_le_t() = default;
+ lba_node_meta_le_t(const lba_node_meta_le_t &) = default;
+ explicit lba_node_meta_le_t(const lba_node_meta_t &val)
+ : begin(init_le64(val.begin)),
+ end(init_le64(val.end)),
+ depth(init_les32(val.depth)) {}
+
+ operator lba_node_meta_t() const {
+ return lba_node_meta_t{ begin, end, depth };
+ }
+};
+
+
/**
* LBAInternalNode
*
* LBA Tree.
*
* Layout (4k):
- * size : uint32_t[1] (1*4)b
- * keys : laddr_t[255] (255*8)b
- * values : paddr_t[255] (255*8)b
- * = 4084
+ * size : uint32_t[1] 4b
+ * (padding) : 4b
+ * meta : lba_node_meta_le_t[3] (1*24)b
+ * keys : laddr_t[255] (254*8)b
+ * values : paddr_t[255] (254*8)b
+ * = 4096
* TODO: make the above capacity calculation part of FixedKVNodeLayout
+ * TODO: the above alignment probably isn't portable without further work
*/
-constexpr size_t INTERNAL_NODE_CAPACITY = 255;
+constexpr size_t INTERNAL_NODE_CAPACITY = 254;
struct LBAInternalNode
: LBANode,
common::FixedKVNodeLayout<
INTERNAL_NODE_CAPACITY,
+ lba_node_meta_t, lba_node_meta_le_t,
laddr_t, laddr_le_t,
paddr_t, paddr_le_t> {
using internal_iterator_t = const_iterator;
static constexpr extent_types_t type = extent_types_t::LADDR_INTERNAL;
+ lba_node_meta_t get_node_meta() const final { return get_meta(); }
+
CachedExtentRef duplicate_for_write() final {
assert(delta_buffer.empty());
return CachedExtentRef(new LBAInternalNode(*this));
* LBA Tree.
*
* Layout (4k):
- * num_entries: uint32_t 4b
- * keys : laddr_t[170] (146*8)b
- * values : lba_map_val_t[170] (146*20)b
- * = 4090
+ * size : uint32_t[1] 4b
+ * (padding) : 4b
+ * meta : lba_node_meta_le_t[3] (1*24)b
+ * keys : laddr_t[170] (145*8)b
+ * values : lba_map_val_t[170] (145*20)b
+ * = 4092
*
* TODO: update FixedKVNodeLayout to handle the above calculation
+ * TODO: the above alignment probably isn't portable without further work
*/
-constexpr size_t LEAF_NODE_CAPACITY = 146;
+constexpr size_t LEAF_NODE_CAPACITY = 145;
/**
* lba_map_val_le_t
: LBANode,
common::FixedKVNodeLayout<
LEAF_NODE_CAPACITY,
+ lba_node_meta_t, lba_node_meta_le_t,
laddr_t, laddr_le_t,
lba_map_val_t, lba_map_val_le_t> {
using internal_iterator_t = const_iterator;
static constexpr extent_types_t type = extent_types_t::LADDR_LEAF;
+ lba_node_meta_t get_node_meta() const final { return get_meta(); }
+
CachedExtentRef duplicate_for_write() final {
assert(delta_buffer.empty());
return CachedExtentRef(new LBALeafNode(*this));
namespace crimson::os::seastore {
-using depth_t = uint32_t;
-
/**
* root_t
*
namespace crimson::os::seastore {
+using depth_t = int32_t;
+using depth_le_t = ceph_les32;
+
using checksum_t = uint32_t;
// Identifies segment location on disk, see SegmentManager,
operator test_val_t() const {
return test_val_t{t1, t2};
}
+};
- bool operator==(const test_val_t &rhs) const {
+struct test_meta_t {
+ uint32_t t1 = 0;
+ uint32_t t2 = 0;
+
+ bool operator==(const test_meta_t &rhs) const {
return rhs.t1 == t1 && rhs.t2 == t2;
}
- bool operator!=(const test_val_t &rhs) const {
+ bool operator!=(const test_meta_t &rhs) const {
return !(*this == rhs);
}
+
+ std::pair<test_meta_t, test_meta_t> split_into(uint32_t pivot) const {
+ return std::make_pair(
+ test_meta_t{t1, pivot},
+ test_meta_t{pivot, t2});
+ }
+
+ static test_meta_t merge_from(const test_meta_t &lhs, const test_meta_t &rhs) {
+ return test_meta_t{lhs.t1, rhs.t2};
+ }
+
+ static std::pair<test_meta_t, test_meta_t>
+ rebalance(const test_meta_t &lhs, const test_meta_t &rhs, uint32_t pivot) {
+ return std::make_pair(
+ test_meta_t{lhs.t1, pivot},
+ test_meta_t{pivot, rhs.t2});
+ }
};
-constexpr size_t CAPACITY = 341;
+struct test_meta_le_t {
+ ceph_le32 t1 = init_le32(0);
+ ceph_le32 t2 = init_le32(0);
+
+ test_meta_le_t() = default;
+ test_meta_le_t(const test_meta_le_t &) = default;
+ test_meta_le_t(const test_meta_t &nv)
+ : t1(init_le32(nv.t1)), t2(init_le32(nv.t2)) {}
+
+ operator test_meta_t() const {
+ return test_meta_t{t1, t2};
+ }
+};
+
+constexpr size_t CAPACITY = 339;
struct TestNode : FixedKVNodeLayout<
CAPACITY,
+ test_meta_t, test_meta_le_t,
uint32_t, ceph_le32,
test_val_t, test_val_le_t> {
char buf[4096];
TestNode() : FixedKVNodeLayout(buf) {
memset(buf, 0, sizeof(buf));
+ set_meta({0, std::numeric_limits<uint32_t>::max()});
}
TestNode(const TestNode &rhs)
: FixedKVNodeLayout(buf) {
node.split_into(split_left, split_right);
ASSERT_EQ(split_left.get_size() + split_right.get_size(), CAPACITY);
+ ASSERT_EQ(split_left.get_meta().t1, split_left.begin()->get_key());
+ ASSERT_EQ(split_left.get_meta().t2, split_right.get_meta().t1);
+ ASSERT_EQ(split_right.get_meta().t2, std::numeric_limits<uint32_t>::max());
+
num = 0;
for (auto &i : split_left) {
ASSERT_EQ(i.get_key(), num);
++num;
++iter;
}
+ node.set_meta({0, num});
+ node2.set_meta({num, std::numeric_limits<uint32_t>::max()});
iter = node2.begin();
while (num < (2 * (CAPACITY / 2))) {
node2.journal_insert(iter, num, test_val_t{num, num}, nullptr);
auto node_merged = TestNode();
node_merged.merge_from(node, node2);
+ ASSERT_EQ(
+ node_merged.get_meta(),
+ (test_meta_t{0, std::numeric_limits<uint32_t>::max()}));
+
ASSERT_EQ(node_merged.get_size(), total);
num = 0;
for (auto &i : node_merged) {
++num;
++iter;
}
+ node.set_meta({0, num});
+ node2.set_meta({num, std::numeric_limits<uint32_t>::max()});
iter = node2.begin();
while (num < (left + right)) {
node2.journal_insert(iter, num, test_val_t{num, num}, nullptr);
auto node_balanced = TestNode();
auto node_balanced2 = TestNode();
- TestNode::balance_into_new_nodes(
+ auto pivot = TestNode::balance_into_new_nodes(
node,
node2,
prefer_left,
ASSERT_EQ(total, node_balanced.get_size() + node_balanced2.get_size());
+ unsigned left_size, right_size;
if (total % 2) {
if (prefer_left) {
- ASSERT_EQ(node_balanced.get_size(), node_balanced2.get_size() + 1);
+ left_size = (total/2) + 1;
+ right_size = total/2;
} else {
- ASSERT_EQ(node_balanced.get_size() + 1, node_balanced2.get_size());
+ left_size = total/2;
+ right_size = (total/2) + 1;
}
} else {
- ASSERT_EQ(node_balanced.get_size(), node_balanced2.get_size());
+ left_size = right_size = total/2;
}
+ ASSERT_EQ(pivot, left_size);
+ ASSERT_EQ(left_size, node_balanced.get_size());
+ ASSERT_EQ(right_size, node_balanced2.get_size());
+
+ ASSERT_EQ(
+ node_balanced.get_meta(),
+ (test_meta_t{0, left_size}));
+ ASSERT_EQ(
+ node_balanced2.get_meta(),
+ (test_meta_t{left_size, std::numeric_limits<uint32_t>::max()}));
num = 0;
for (auto &i: node_balanced) {