lba_manager/btree/btree_lba_manager.cc
lba_manager/btree/lba_btree_node_impl.cc
lba_manager/btree/btree_range_pin.cc
+ omap_manager.cc
+ omap_manager/btree/btree_omap_manager.cc
+ omap_manager/btree/omap_btree_node_impl.cc
onode.cc
onode_manager/simple-fltree/onode_block.cc
onode_manager/simple-fltree/onode_delta.cc
// included for get_extent_by_type
#include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h"
#include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h"
+#include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h"
#include "crimson/os/seastore/onode_manager/simple-fltree/onode_block.h"
#include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h"
#include "test/crimson/seastore/test_block.h"
return alloc_new_extent<extentmap_manager::ExtMapInnerNode>(t, length);
case extent_types_t::EXTMAP_LEAF:
return alloc_new_extent<extentmap_manager::ExtMapLeafNode>(t, length);
+ case extent_types_t::OMAP_INNER:
+ return alloc_new_extent<omap_manager::OMapInnerNode>(t, length);
+ case extent_types_t::OMAP_LEAF:
+ return alloc_new_extent<omap_manager::OMapLeafNode>(t, length);
case extent_types_t::TEST_BLOCK:
return alloc_new_extent<TestBlock>(t, length);
case extent_types_t::TEST_BLOCK_PHYSICAL:
).safe_then([](auto extent) {
return CachedExtentRef(extent.detach(), false /* add_ref */);
});
+ case extent_types_t::OMAP_INNER:
+ return get_extent<omap_manager::OMapInnerNode>(offset, length
+ ).safe_then([](auto extent) {
+ return CachedExtentRef(extent.detach(), false /* add_ref */);
+ });
+ case extent_types_t::OMAP_LEAF:
+ return get_extent<omap_manager::OMapLeafNode>(offset, length
+ ).safe_then([](auto extent) {
+ return CachedExtentRef(extent.detach(), false /* add_ref */);
+ });
case extent_types_t::ONODE_BLOCK:
return get_extent<OnodeBlock>(offset, length
).safe_then([](auto extent) {
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include <experimental/iterator>
+#include <iostream>
+
+#include "crimson/os/seastore/transaction_manager.h"
+#include "crimson/os/seastore/omap_manager.h"
+#include "crimson/os/seastore/omap_manager/btree/btree_omap_manager.h"
+
+namespace crimson::os::seastore::omap_manager {
+
+OMapManagerRef create_omap_manager(
+ TransactionManager &trans_manager) {
+ return OMapManagerRef(new BtreeOMapManager(trans_manager));
+}
+
+}
+
+namespace std {
+std::ostream &operator<<(std::ostream &out, const std::pair<std::string, std::string> &rhs)
+{
+ return out << "key_value_map (" << rhs.first<< "->" << rhs.second << ")";
+}
+}
+
+namespace crimson::os::seastore {
+
+std::ostream &operator<<(std::ostream &out, const std::list<std::string> &rhs)
+{
+ out << '[';
+ std::copy(std::begin(rhs), std::end(rhs), std::experimental::make_ostream_joiner(out, ", "));
+ return out << ']';
+}
+
+std::ostream &operator<<(std::ostream &out, const std::vector<std::pair<std::string, std::string>> &rhs)
+{
+ out << '[';
+ std::ostream_iterator<std::pair<std::string, std::string>> out_it(out, ", ");
+ std::copy(rhs.begin(), rhs.end(), out_it);
+ return out << ']';
+}
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <iostream>
+
+#include <boost/intrusive_ptr.hpp>
+#include <boost/smart_ptr/intrusive_ref_counter.hpp>
+
+#include <seastar/core/future.hh>
+
+#include "crimson/osd/exceptions.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/transaction_manager.h"
+
+#define OMAP_BLOCK_SIZE 4096
+
+namespace crimson::os::seastore {
+
+enum class omap_root_state_t : uint8_t {
+ INITIAL = 0,
+ MUTATED = 1,
+ NONE = 0xFF
+};
+
+struct omap_root_t {
+ depth_t depth = 0;
+ omap_root_state_t state;
+ laddr_t omap_root_laddr;
+ omap_root_t(depth_t dep, laddr_t laddr)
+ : depth(dep),
+ omap_root_laddr(laddr) { state = omap_root_state_t::INITIAL; }
+};
+
+struct list_keys_result_t {
+ std::vector<std::string> keys;
+ std::string next;
+};
+
+struct list_kvs_result_t {
+ std::vector<std::pair<std::string, std::string>> kvs;
+ std::string next;
+};
+constexpr size_t MAX_SIZE = std::numeric_limits<size_t>::max();
+std::ostream &operator<<(std::ostream &out, const std::list<std::string> &rhs);
+std::ostream &operator<<(std::ostream &out, const std::map<std::string, std::string> &rhs);
+
+class OMapManager {
+ /* all OMapManager API use reference to transfer input string parameters,
+ * the upper caller should guarantee the referenced string values alive (not freed)
+ * until these functions future resolved.
+ */
+public:
+ /* allocate omap tree root node
+ *
+ * input: Transaction &t, current transaction
+ * return: return the omap_root_t structure.
+ */
+ using initialize_omap_ertr = TransactionManager::alloc_extent_ertr;
+ using initialize_omap_ret = initialize_omap_ertr::future<omap_root_t>;
+ virtual initialize_omap_ret initialize_omap(Transaction &t) = 0;
+
+ /*get value(string) by key(string)
+ *
+ * input: omap_root_t omap_root, omap btree root information
+ * input: Transaction &t, current transaction
+ * input: string &key, omap string key
+ * return: string key->string value mapping pair.
+ */
+ using omap_get_value_ertr = TransactionManager::read_extent_ertr;
+ using omap_get_value_ret = omap_get_value_ertr::future<std::pair<std::string, std::string>>;
+ virtual omap_get_value_ret omap_get_value(omap_root_t &omap_root, Transaction &t,
+ const std::string &key) = 0;
+
+ /* set key value mapping in omap
+ *
+ * input: omap_root_t &omap_root, omap btree root information
+ * input: Transaction &t, current transaction
+ * input: string &key, omap string key
+ * input: string &value, mapped value corresponding key
+ * return: mutation_result_t, status should be success.
+ */
+ using omap_set_key_ertr = TransactionManager::read_extent_ertr;
+ using omap_set_key_ret = omap_set_key_ertr::future<bool>;
+ virtual omap_set_key_ret omap_set_key(omap_root_t &omap_root, Transaction &t,
+ const std::string &key, const std::string &value) = 0;
+
+ /* remove key value mapping in omap tree
+ *
+ * input: omap_root_t &omap_root, omap btree root information
+ * input: Transaction &t, current transaction
+ * input: string &key, omap string key
+ * return: remove success return true, else return false.
+ */
+ using omap_rm_key_ertr = TransactionManager::read_extent_ertr;
+ using omap_rm_key_ret = omap_rm_key_ertr::future<bool>;
+ virtual omap_rm_key_ret omap_rm_key(omap_root_t &omap_root, Transaction &t,
+ const std::string &key) = 0;
+
+ /* get all keys or partial keys in omap tree
+ *
+ * input: omap_root_t &omap_root, omap btree root information
+ * input: Transaction &t, current transaction
+ * input: string &start, the list keys range begin from start,
+ * if start is "", list from the first omap key
+ * input: max_result_size, the number of list keys,
+ * it it is not set, list all keys after string start
+ * return: list_keys_result_t, listed keys and next key
+ */
+ using omap_list_keys_ertr = TransactionManager::read_extent_ertr;
+ using omap_list_keys_ret = omap_list_keys_ertr::future<list_keys_result_t>;
+ virtual omap_list_keys_ret omap_list_keys(omap_root_t &omap_root, Transaction &t,
+ std::string &start,
+ size_t max_result_size = MAX_SIZE) = 0;
+
+ /* Get all or partial key-> value mapping in omap tree
+ *
+ * input: omap_root_t &omap_root, omap btree root information
+ * input: Transaction &t, current transaction
+ * input: string &start, the list keys range begin from start,
+ * if start is "" , list from the first omap key
+ * input: max_result_size, the number of list keys,
+ * it it is not set, list all keys after string start.
+ * return: list_kvs_result_t, listed key->value mapping and next key.
+ */
+ using omap_list_ertr = TransactionManager::read_extent_ertr;
+ using omap_list_ret = omap_list_ertr::future<list_kvs_result_t>;
+ virtual omap_list_ret omap_list(omap_root_t &omap_root, Transaction &t,
+ std::string &start,
+ size_t max_result_size = MAX_SIZE) = 0;
+
+ /* clear all omap tree key->value mapping
+ *
+ * input: omap_root_t &omap_root, omap btree root information
+ * input: Transaction &t, current transaction
+ */
+ using omap_clear_ertr = TransactionManager::read_extent_ertr;
+ using omap_clear_ret = omap_clear_ertr::future<>;
+ virtual omap_clear_ret omap_clear(omap_root_t &omap_root, Transaction &t) = 0;
+
+ virtual ~OMapManager() {}
+};
+using OMapManagerRef = std::unique_ptr<OMapManager>;
+
+namespace omap_manager {
+
+OMapManagerRef create_omap_manager (
+ TransactionManager &trans_manager);
+}
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string.h>
+
+#include "crimson/common/log.h"
+
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/omap_manager/btree/btree_omap_manager.h"
+#include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h"
+
+namespace {
+ seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_filestore);
+ }
+}
+
+namespace crimson::os::seastore::omap_manager {
+
+BtreeOMapManager::BtreeOMapManager(
+ TransactionManager &tm)
+ : tm(tm) {}
+
+BtreeOMapManager::initialize_omap_ret
+BtreeOMapManager::initialize_omap(Transaction &t)
+{
+
+ logger().debug("{}", __func__);
+ return tm.alloc_extent<OMapLeafNode>(t, L_ADDR_MIN, OMAP_BLOCK_SIZE)
+ .safe_then([this](auto&& root_extent) {
+ root_extent->set_size(0);
+ omap_node_meta_t meta{1};
+ root_extent->set_meta(meta);
+ omap_root_t omap_root = omap_root_t(1, root_extent->get_laddr());
+ return initialize_omap_ertr::make_ready_future<omap_root_t>(omap_root);
+ });
+}
+
+BtreeOMapManager::get_root_ret
+BtreeOMapManager::get_omap_root(omap_root_t &omap_root, Transaction &t)
+{
+ assert(omap_root.omap_root_laddr != L_ADDR_NULL);
+ laddr_t laddr = omap_root.omap_root_laddr;
+ return omap_load_extent(get_omap_context(omap_root, t), laddr, omap_root.depth);
+}
+
+BtreeOMapManager::handle_root_split_ret
+BtreeOMapManager::handle_root_split(omap_context_t oc, OMapNode::mutation_result_t mresult)
+{
+ return oc.tm.alloc_extent<OMapInnerNode>(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE)
+ .safe_then([oc, mresult](auto&& nroot) {
+ auto [left, right, pivot] = *(mresult.split_tuple);
+ omap_node_meta_t meta{oc.omap_root.depth + 1};
+ nroot->set_meta(meta);
+ nroot->journal_inner_insert(nroot->iter_begin(), left->get_laddr(),
+ "", nroot->maybe_get_delta_buffer());
+ nroot->journal_inner_insert(nroot->iter_begin() + 1, right->get_laddr(),
+ pivot, nroot->maybe_get_delta_buffer());
+ oc.omap_root.omap_root_laddr = nroot->get_laddr();
+ oc.omap_root.depth += 1;
+ oc.omap_root.state = omap_root_state_t::MUTATED;
+ return handle_root_split_ertr::make_ready_future<bool>(true);
+ });
+}
+
+BtreeOMapManager::handle_root_merge_ret
+BtreeOMapManager::handle_root_merge(omap_context_t oc, OMapNode::mutation_result_t mresult)
+{
+ auto root = *(mresult.need_merge);
+ auto iter = root->cast<OMapInnerNode>()->iter_begin();
+ oc.omap_root.omap_root_laddr = iter->get_node_key().laddr;
+ oc.omap_root.depth -= 1;
+ oc.omap_root.state = omap_root_state_t::MUTATED;
+ return oc.tm.dec_ref(oc.t, root->get_laddr()).safe_then([] (auto &&ret) {
+ return handle_root_merge_ertr::make_ready_future<bool>(true);
+ });
+}
+
+
+BtreeOMapManager::omap_get_value_ret
+BtreeOMapManager::omap_get_value(omap_root_t &omap_root, Transaction &t,
+ const std::string &key)
+{
+ logger().debug("{}: {}", __func__, key);
+ return get_omap_root(omap_root, t).safe_then([this, &omap_root, &t, &key](auto&& extent) {
+ return extent->get_value(get_omap_context(omap_root, t), key);
+ }).safe_then([](auto &&e) {
+ logger().debug("{}: {} -> {}", __func__, e.first, e.second);
+ return omap_get_value_ret(
+ omap_get_value_ertr::ready_future_marker{},
+ std::move(e));
+ });
+
+}
+
+BtreeOMapManager::omap_set_key_ret
+BtreeOMapManager::omap_set_key(omap_root_t &omap_root, Transaction &t,
+ const std::string &key, const std::string &value)
+{
+ logger().debug("{}: {} -> {}", __func__, key, value);
+ return get_omap_root(omap_root, t).safe_then([this, &omap_root, &t, &key, &value](auto root) {
+ return root->insert(get_omap_context(omap_root, t), key, value);
+ }).safe_then([this, &omap_root, &t](auto mresult) {
+ if (mresult.status == mutation_status_t::SUCCESS)
+ return omap_set_key_ertr::make_ready_future<bool>(true);
+ else if (mresult.status == mutation_status_t::SPLITTED)
+ return handle_root_split(get_omap_context(omap_root, t), mresult);
+ else
+ return omap_set_key_ertr::make_ready_future<bool>(false);
+
+ });
+
+}
+
+BtreeOMapManager::omap_rm_key_ret
+BtreeOMapManager::omap_rm_key(omap_root_t &omap_root, Transaction &t, const std::string &key)
+{
+ logger().debug("{}: {}", __func__, key);
+ return get_omap_root(omap_root, t).safe_then([this, &omap_root, &t, &key](auto root) {
+ return root->rm_key(get_omap_context(omap_root, t), key);
+ }).safe_then([this, &omap_root, &t](auto mresult) {
+ if (mresult.status == mutation_status_t::SUCCESS)
+ return omap_rm_key_ertr::make_ready_future<bool>(true);
+ else if (mresult.status == mutation_status_t::SPLITTED)
+ return handle_root_split(get_omap_context(omap_root, t), mresult);
+ else if (mresult.status == mutation_status_t::NEED_MERGE) {
+ auto root = *(mresult.need_merge);
+ if (root->get_node_size() == 1 && omap_root.depth != 1)
+ return handle_root_merge(get_omap_context(omap_root, t), mresult);
+ else
+ return omap_rm_key_ertr::make_ready_future<bool>(true);
+ }
+ else
+ return omap_rm_key_ertr::make_ready_future<bool>(false);
+ });
+
+}
+
+BtreeOMapManager::omap_list_keys_ret
+BtreeOMapManager::omap_list_keys(omap_root_t &omap_root, Transaction &t,
+ std::string &start, size_t max_result_size)
+{
+ logger().debug("{}", __func__);
+ return get_omap_root(omap_root, t).safe_then([this, &omap_root, &t, &start,
+ max_result_size] (auto extent) {
+ return extent->list_keys(get_omap_context(omap_root, t), start, max_result_size)
+ .safe_then([](auto &&result) {
+ return omap_list_keys_ret(
+ omap_list_keys_ertr::ready_future_marker{},
+ std::move(result));
+ });
+ });
+
+}
+
+BtreeOMapManager::omap_list_ret
+BtreeOMapManager::omap_list(omap_root_t &omap_root, Transaction &t,
+ std::string &start, size_t max_result_size)
+{
+ logger().debug("{}", __func__);
+ return get_omap_root(omap_root, t).safe_then([this, &omap_root, &t, &start, max_result_size]
+ (auto extent) {
+ return extent->list(get_omap_context(omap_root, t), start, max_result_size)
+ .safe_then([](auto &&result) {
+ return omap_list_ret(
+ omap_list_ertr::ready_future_marker{},
+ std::move(result));
+ });
+ });
+}
+
+BtreeOMapManager::omap_clear_ret
+BtreeOMapManager::omap_clear(omap_root_t &omap_root, Transaction &t)
+{
+ logger().debug("{}", __func__);
+ return get_omap_root(omap_root, t).safe_then([this, &omap_root, &t](auto extent) {
+ return extent->clear(get_omap_context(omap_root, t));
+ }).safe_then([this, &omap_root, &t] {
+ return tm.dec_ref(t, omap_root.omap_root_laddr).safe_then([&omap_root] (auto ret) {
+ omap_root.state = omap_root_state_t::MUTATED;
+ omap_root.depth = 0;
+ omap_root.omap_root_laddr = L_ADDR_NULL;
+ return omap_clear_ertr::now();
+ });
+ });
+}
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+#include <boost/intrusive_ptr.hpp>
+#include <boost/smart_ptr/intrusive_ref_counter.hpp>
+#include <seastar/core/future.hh>
+
+#include "include/ceph_assert.h"
+#include "crimson/osd/exceptions.h"
+
+#include "crimson/os/seastore/omap_manager.h"
+#include "crimson/os/seastore/omap_manager/btree/omap_btree_node.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/transaction_manager.h"
+
+namespace crimson::os::seastore::omap_manager {
+/**
+ * BtreeOMapManager
+ *
+ * Uses a btree to track :
+ * string -> string mapping for each onode omap
+ */
+
+class BtreeOMapManager : public OMapManager {
+ TransactionManager &tm;
+
+ omap_context_t get_omap_context(omap_root_t &omap_root, Transaction &t) {
+ return omap_context_t{omap_root, tm, t};
+ }
+
+ /* get_omap_root
+ *
+ * load omap tree root node
+ */
+ using get_root_ertr = TransactionManager::read_extent_ertr;
+ using get_root_ret = get_root_ertr::future<OMapNodeRef>;
+ get_root_ret get_omap_root(omap_root_t &omap_root, Transaction &t);
+
+ /* handle_root_split
+ *
+ * root has been splitted and need update omap_root_t
+ */
+ using handle_root_split_ertr = TransactionManager::read_extent_ertr;
+ using handle_root_split_ret = handle_root_split_ertr::future<bool>;
+ handle_root_split_ret handle_root_split(omap_context_t oc,
+ OMapNode:: mutation_result_t mresult);
+
+ /* handle_root_merge
+ *
+ * root node has only one item and it is not leaf node, need remove a layer
+ */
+ using handle_root_merge_ertr = TransactionManager::read_extent_ertr;
+ using handle_root_merge_ret = handle_root_merge_ertr::future<bool>;
+ handle_root_merge_ret handle_root_merge(omap_context_t oc,
+ OMapNode:: mutation_result_t mresult);
+
+public:
+ explicit BtreeOMapManager(TransactionManager &tm);
+
+ initialize_omap_ret initialize_omap(Transaction &t) final;
+
+ omap_get_value_ret omap_get_value(omap_root_t &omap_root, Transaction &t,
+ const std::string &key) final;
+
+ omap_set_key_ret omap_set_key(omap_root_t &omap_root, Transaction &t,
+ const std::string &key, const std::string &value) final;
+
+ omap_rm_key_ret omap_rm_key(omap_root_t &omap_root, Transaction &t,
+ const std::string &key) final;
+
+ omap_list_keys_ret omap_list_keys(omap_root_t &omap_root, Transaction &t,
+ std::string &start,
+ size_t max_result_size = MAX_SIZE) final;
+
+ omap_list_ret omap_list(omap_root_t &omap_root, Transaction &t,
+ std::string &start,
+ size_t max_result_size = MAX_SIZE) final;
+
+ omap_clear_ret omap_clear(omap_root_t &omap_root, Transaction &t) final;
+
+};
+using BtreeOMapManagerRef = std::unique_ptr<BtreeOMapManager>;
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#pragma once
+
+#include <string>
+#include <vector>
+
+//#include <boost/iterator/counting_iterator.hpp>
+
+#include "crimson/common/log.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/transaction_manager.h"
+#include "crimson/os/seastore/omap_manager.h"
+#include "crimson/os/seastore/omap_manager/btree/omap_types.h"
+
+namespace crimson::os::seastore::omap_manager{
+
+struct omap_context_t {
+ omap_root_t &omap_root;
+ TransactionManager &tm;
+ Transaction &t;
+};
+
+enum class mutation_status_t : uint8_t {
+ SUCCESS = 0,
+ SPLITTED = 1,
+ NEED_MERGE = 2,
+ FAIL = 3
+};
+
+struct OMapNode : LogicalCachedExtent {
+ using OMapNodeRef = TCachedExtentRef<OMapNode>;
+
+ struct mutation_result_t {
+ mutation_status_t status;
+ /// Only populated if SPLITTED, indicates the newly created left and right nodes
+ /// from splitting the target entry during insertion.
+ std::optional<std::tuple<OMapNodeRef, OMapNodeRef, std::string>> split_tuple;
+ /// only sopulated if need merged, indicate which entry need be doing merge in upper layer.
+ std::optional<OMapNodeRef> need_merge;
+
+ mutation_result_t(mutation_status_t s, std::optional<std::tuple<OMapNodeRef,
+ OMapNodeRef, std::string>> tuple, std::optional<OMapNodeRef> n_merge)
+ : status(s),
+ split_tuple(tuple),
+ need_merge(n_merge) {}
+ };
+
+ OMapNode(ceph::bufferptr &&ptr) : LogicalCachedExtent(std::move(ptr)) {}
+ OMapNode(const OMapNode &other)
+ : LogicalCachedExtent(other) {}
+
+ using get_value_ertr = OMapManager::omap_get_value_ertr;
+ using get_value_ret = OMapManager::omap_get_value_ret;
+ virtual get_value_ret get_value(omap_context_t oc, const std::string &key) = 0;
+
+ using insert_ertr = TransactionManager::alloc_extent_ertr;
+ using insert_ret = insert_ertr::future<mutation_result_t>;
+ virtual insert_ret insert(omap_context_t oc, const std::string &key, const std::string &value) = 0;
+
+ using rm_key_ertr = TransactionManager::alloc_extent_ertr;
+ using rm_key_ret = rm_key_ertr::future<mutation_result_t>;
+ virtual rm_key_ret rm_key(omap_context_t oc, const std::string &key) = 0;
+
+ using list_keys_ertr = OMapManager::omap_list_keys_ertr;
+ using list_keys_ret = OMapManager::omap_list_keys_ret;
+ virtual list_keys_ret list_keys(omap_context_t oc, std::string &start,
+ size_t max_result_size) = 0;
+
+ using list_ertr = OMapManager::omap_list_ertr;
+ using list_ret = OMapManager::omap_list_ret;
+ virtual list_ret list(omap_context_t oc, std::string &start, size_t max_result_size) = 0;
+
+ using clear_ertr = OMapManager::omap_clear_ertr;
+ using clear_ret = clear_ertr::future<>;
+ virtual clear_ret clear(omap_context_t oc) = 0;
+
+ using full_merge_ertr = TransactionManager::alloc_extent_ertr;
+ using full_merge_ret = full_merge_ertr::future<OMapNodeRef>;
+ virtual full_merge_ret make_full_merge(omap_context_t oc, OMapNodeRef right) = 0;
+
+ using make_balanced_ertr = TransactionManager::alloc_extent_ertr;
+ using make_balanced_ret = make_balanced_ertr::future
+ <std::tuple<OMapNodeRef, OMapNodeRef, std::string>>;
+ virtual make_balanced_ret make_balanced(omap_context_t oc, OMapNodeRef _right) = 0;
+
+ virtual omap_node_meta_t get_node_meta() const = 0;
+ virtual bool extent_will_overflow(size_t ksize, std::optional<size_t> vsize) const = 0;
+ virtual bool extent_is_below_min() const = 0;
+ virtual uint32_t get_node_size() = 0;
+
+ virtual ~OMapNode() = default;
+};
+
+using OMapNodeRef = OMapNode::OMapNodeRef;
+
+TransactionManager::read_extent_ertr::future<OMapNodeRef>
+omap_load_extent(omap_context_t oc, laddr_t laddr, depth_t depth);
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <string.h>
+
+#include "include/buffer.h"
+#include "include/byteorder.h"
+#include "crimson/os/seastore/transaction_manager.h"
+#include "crimson/os/seastore/omap_manager/btree/omap_btree_node.h"
+#include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h"
+#include "seastar/core/thread.hh"
+
+namespace {
+ seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_filestore);
+ }
+}
+
+namespace crimson::os::seastore::omap_manager {
+
+std::ostream &operator<<(std::ostream &out, const omap_inner_key_t &rhs)
+{
+ return out << "omap_inner_key (" << rhs.key_off<< " - " << rhs.key_len
+ << " - " << rhs.laddr << ")";
+}
+
+std::ostream &operator<<(std::ostream &out, const omap_leaf_key_t &rhs)
+{
+ return out << "omap_leaf_key_t (" << rhs.key_off<< " - " << rhs.key_len
+ << " "<< rhs.val_off<<" - " << rhs.val_len << ")";
+}
+
+std::ostream &OMapInnerNode::print_detail_l(std::ostream &out) const
+{
+ return out << ", size=" << get_size()
+ << ", depth=" << get_meta().depth;
+}
+
+/**
+ * make_split_insert
+ *
+ * insert an entry at iter, with the address of key.
+ * will result in a split outcome encoded in the returned mutation_result_t
+ */
+OMapInnerNode::make_split_insert_ret
+OMapInnerNode::make_split_insert(omap_context_t oc, internal_iterator_t iter,
+ std::string key, laddr_t laddr)
+{
+ return make_split_children(oc).safe_then([=] (auto tuple) {
+ auto [left, right, pivot] = tuple;
+ if (pivot > key) {
+ auto liter = left->iter_idx(iter.get_index());
+ left->journal_inner_insert(liter, laddr, key,
+ left->maybe_get_delta_buffer());
+ } else { //right
+ auto riter = right->iter_idx(iter.get_index() - left->get_node_size());
+ right->journal_inner_insert(riter, laddr, key,
+ right->maybe_get_delta_buffer());
+ }
+ return make_split_insert_ret(
+ make_split_insert_ertr::ready_future_marker{},
+ mutation_result_t(mutation_status_t::SPLITTED, tuple, std::nullopt));
+ });
+
+}
+
+
+OMapInnerNode::handle_split_ret
+OMapInnerNode::handle_split(omap_context_t oc, internal_iterator_t iter,
+ mutation_result_t mresult)
+{
+ logger().debug("{}: {}","OMapInnerNode", __func__);
+ if (!is_pending()) {
+ auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast<OMapInnerNode>();
+ auto mut_iter = mut->iter_idx(iter.get_index());
+ return mut->handle_split(oc, mut_iter, mresult);
+ }
+ auto [left, right, pivot] = *(mresult.split_tuple);
+ //update will not cause overflow do it first.
+ journal_inner_update(iter, left->get_laddr(), maybe_get_delta_buffer());
+ if (!extent_will_overflow(pivot.size() + 1, std::nullopt)) {
+ journal_inner_insert(iter + 1, right->get_laddr(), pivot,
+ maybe_get_delta_buffer());
+ return insert_ret(
+ insert_ertr::ready_future_marker{},
+ mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt));
+ } else {
+ return make_split_insert(oc, iter + 1, pivot, right->get_laddr())
+ .safe_then([this, oc] (auto m_result) {
+ return oc.tm.dec_ref(oc.t, get_laddr())
+ .safe_then([m_result = std::move(m_result)] (auto ret) {
+ return insert_ret(
+ insert_ertr::ready_future_marker{},
+ m_result);
+ });
+ });
+ }
+}
+
+OMapInnerNode::get_value_ret
+OMapInnerNode::get_value(omap_context_t oc, const std::string &key)
+{
+ logger().debug("{}: {} key = {}", "OMapInnerNode", __func__, key);
+ auto child_pt = get_containing_child(key);
+ auto laddr = child_pt->get_node_key().laddr;
+ return omap_load_extent(oc, laddr, get_meta().depth - 1).safe_then(
+ [oc, &key] (auto extent) {
+ return extent->get_value(oc, key);
+ }).finally([ref = OMapNodeRef(this)] {});
+}
+
+OMapInnerNode::insert_ret
+OMapInnerNode::insert(omap_context_t oc, const std::string &key, const std::string &value)
+{
+ logger().debug("{}: {} {}->{}", "OMapInnerNode", __func__, key, value);
+ auto child_pt = get_containing_child(key);
+ assert(child_pt != iter_end());
+ auto laddr = child_pt->get_node_key().laddr;
+ return omap_load_extent(oc, laddr, get_meta().depth - 1).safe_then(
+ [this, oc, child_pt, &key, &value] (auto extent) {
+ return extent->insert(oc, key, value);
+ }).safe_then([this, oc, child_pt] (auto mresult) {
+ if (mresult.status == mutation_status_t::SUCCESS) {
+ return insert_ertr::make_ready_future<mutation_result_t>(mresult);
+ } else if (mresult.status == mutation_status_t::SPLITTED) {
+ return handle_split(oc, child_pt, mresult);
+ } else {
+ return insert_ret(
+ insert_ertr::ready_future_marker{},
+ mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt));
+ }
+ });
+}
+
+OMapInnerNode::rm_key_ret
+OMapInnerNode::rm_key(omap_context_t oc, const std::string &key)
+{
+ logger().debug("{}: {}","OMapInnerNode", __func__);
+ auto child_pt = get_containing_child(key);
+ auto laddr = child_pt->get_node_key().laddr;
+ return omap_load_extent(oc, laddr, get_meta().depth - 1).safe_then(
+ [this, oc, &key, child_pt] (auto extent) {
+ return extent->rm_key(oc, key)
+ .safe_then([this, oc, child_pt, extent = std::move(extent)] (auto mresult) {
+ if (mresult.status == mutation_status_t::SUCCESS ||
+ mresult.status == mutation_status_t::FAIL) {
+ return rm_key_ertr::make_ready_future<mutation_result_t>(mresult);
+ } else if (mresult.status == mutation_status_t::NEED_MERGE) {
+ if (get_node_size() >1)
+ return merge_entry(oc, child_pt, *(mresult.need_merge));
+ else
+ return rm_key_ret(
+ rm_key_ertr::ready_future_marker{},
+ mutation_result_t(mutation_status_t::SUCCESS,
+ std::nullopt, std::nullopt));
+ } else if (mresult.status == mutation_status_t::SPLITTED) {
+ return handle_split(oc, child_pt, mresult);
+ } else {
+ return rm_key_ertr::make_ready_future<mutation_result_t>(mresult);
+ }
+ });
+ });
+}
+
+OMapInnerNode::list_keys_ret
+OMapInnerNode::list_keys(omap_context_t oc, std::string &start, size_t max_result_size)
+{
+ logger().debug("{}: {}","OMapInnerNode", __func__);
+ auto child_iter = get_containing_child(start);
+
+ return seastar::do_with(child_iter, iter_end(), list_keys_result_t(), [=, &start]
+ (auto &biter, auto &eiter, auto &result) {
+ result.next = start;
+ return crimson::do_until([=, &biter, &eiter, &result] ()
+ -> list_keys_ertr::future<bool> {
+ if (biter == eiter || result.keys.size() == max_result_size)
+ return list_keys_ertr::make_ready_future<bool>(true);
+
+ auto laddr = biter->get_node_key().laddr;
+ return omap_load_extent(oc, laddr, get_meta().depth - 1).safe_then(
+ [=, &biter, &eiter, &result] (auto &&extent) {
+ return extent->list_keys(oc, result.next, max_result_size - result.keys.size())
+ .safe_then([&biter, &eiter, &result] (auto &&list) mutable {
+ if (!list.keys.empty())
+ result.keys.insert(result.keys.end(), list.keys.begin(),list.keys.end());
+
+ biter++;
+ if (list.next != "")
+ result.next = list.next;
+ else if (biter != eiter)
+ result.next = biter->get_node_val();
+ else
+ result.next = "";
+
+ return list_keys_ertr::make_ready_future<bool>(false);
+ });
+ });
+ }).safe_then([&result, ref = OMapNodeRef(this)] {
+ return list_keys_ertr::make_ready_future<list_keys_result_t>(std::move(result));
+ });
+ });
+}
+
+OMapInnerNode::list_ret
+OMapInnerNode::list(omap_context_t oc, std::string &start, size_t max_result_size)
+{
+ logger().debug("{}: {}","OMapInnerNode", __func__);
+ auto child_iter = get_containing_child(start);
+
+ return seastar::do_with(child_iter, iter_end(), list_kvs_result_t(), [=, &start]
+ (auto &biter, auto &eiter, auto &result) {
+ result.next = start;
+ return crimson::do_until([=, &biter, &eiter, &result] ()
+ -> list_ertr::future<bool> {
+ if (biter == eiter || result.kvs.size() == max_result_size)
+ return list_ertr::make_ready_future<bool>(true);
+
+ auto laddr = biter->get_node_key().laddr;
+ return omap_load_extent(oc, laddr, get_meta().depth - 1).safe_then(
+ [=, &biter, &eiter, &result] (auto &&extent) {
+ return extent->list(oc, result.next, max_result_size - result.kvs.size())
+ .safe_then([&biter, &eiter, &result] (auto &&list) mutable {
+ if (!list.kvs.empty())
+ result.kvs.insert(result.kvs.end(), list.kvs.begin(),list.kvs.end());
+
+ biter++;
+ if (list.next != "")
+ result.next = list.next;
+ else if (biter != eiter)
+ result.next = biter->get_node_val();
+ else
+ result.next = "";
+
+ return list_ertr::make_ready_future<bool>(false);
+ });
+ });
+ }).safe_then([&result, ref = OMapNodeRef(this)] {
+ return list_ertr::make_ready_future<list_kvs_result_t>(std::move(result));
+ });
+ });
+}
+
+OMapInnerNode::clear_ret
+OMapInnerNode::clear(omap_context_t oc)
+{
+ logger().debug("{}: {}","OMapInnerNode", __func__);
+ return crimson::do_for_each(iter_begin(), iter_end(), [this, oc] (auto iter) {
+ auto laddr = iter->get_node_key().laddr;
+ return omap_load_extent(oc, laddr, get_meta().depth - 1).safe_then(
+ [oc] (auto &&extent) {
+ return extent->clear(oc);
+ }).safe_then([oc, laddr] {
+ return oc.tm.dec_ref(oc.t, laddr);
+ }).safe_then([ref = OMapNodeRef(this)] (auto ret){
+ return clear_ertr::now();
+ });
+ });
+}
+
+OMapInnerNode::split_children_ret
+OMapInnerNode:: make_split_children(omap_context_t oc)
+{
+ logger().debug("{}: {}","OMapInnerNode", __func__);
+ return oc.tm.alloc_extents<OMapInnerNode>(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE, 2)
+ .safe_then([this] (auto &&ext_pair) {
+ auto left = ext_pair.front();
+ auto right = ext_pair.back();
+ return split_children_ret(
+ split_children_ertr::ready_future_marker{},
+ std::make_tuple(left, right, split_into(*left, *right)));
+ });
+}
+
+OMapInnerNode::full_merge_ret
+OMapInnerNode::make_full_merge(omap_context_t oc, OMapNodeRef right)
+{
+ logger().debug("{}: {}","OMapInnerNode", __func__);
+ return oc.tm.alloc_extent<OMapInnerNode>(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE)
+ .safe_then([this, right] (auto &&replacement) {
+ replacement->merge_from(*this, *right->cast<OMapInnerNode>());
+ return full_merge_ret(
+ full_merge_ertr::ready_future_marker{},
+ std::move(replacement));
+ });
+}
+
+OMapInnerNode::make_balanced_ret
+OMapInnerNode::make_balanced(omap_context_t oc, OMapNodeRef _right)
+{
+ logger().debug("{}: {}","OMapInnerNode", __func__);
+ ceph_assert(_right->get_type() == type);
+ return oc.tm.alloc_extents<OMapInnerNode>(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE, 2)
+ .safe_then([this, _right] (auto &&replacement_pair){
+ auto replacement_left = replacement_pair.front();
+ auto replacement_right = replacement_pair.back();
+ auto &right = *_right->cast<OMapInnerNode>();
+ return make_balanced_ret(
+ make_balanced_ertr::ready_future_marker{},
+ std::make_tuple(replacement_left, replacement_right,
+ balance_into_new_nodes(*this, right,
+ *replacement_left, *replacement_right)));
+ });
+}
+
+OMapInnerNode::merge_entry_ret
+OMapInnerNode::merge_entry(omap_context_t oc, internal_iterator_t iter, OMapNodeRef entry)
+{
+ logger().debug("{}: {}","OMapInnerNode", __func__);
+ if (!is_pending()) {
+ auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast<OMapInnerNode>();
+ auto mut_iter = mut->iter_idx(iter->get_index());
+ return mut->merge_entry(oc, mut_iter, entry);
+ }
+ auto is_left = (iter + 1) == iter_end();
+ auto donor_iter = is_left ? iter - 1 : iter + 1;
+ return omap_load_extent(oc, donor_iter->get_node_key().laddr, get_meta().depth - 1)
+ .safe_then([=] (auto &&donor) mutable {
+ auto [l, r] = is_left ?
+ std::make_pair(donor, entry) : std::make_pair(entry, donor);
+ auto [liter, riter] = is_left ?
+ std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter);
+ if (donor->extent_is_below_min()) {
+ logger().debug("{}::merge_entry make_full_merge l {} r {}", __func__, *l, *r);
+ assert(entry->extent_is_below_min());
+ return l->make_full_merge(oc, r).safe_then([=] (auto &&replacement){
+ journal_inner_update(liter, replacement->get_laddr(), maybe_get_delta_buffer());
+ journal_inner_remove(riter, maybe_get_delta_buffer());
+ //retire extent
+ std::list<laddr_t> dec_laddrs {l->get_laddr(), r->get_laddr()};
+ return oc.tm.dec_ref(oc.t, dec_laddrs).safe_then([this, oc] (auto &&ret) {
+ if (extent_is_below_min()) {
+ return merge_entry_ret(
+ merge_entry_ertr::ready_future_marker{},
+ mutation_result_t(mutation_status_t::NEED_MERGE, std::nullopt,
+ this->cast<OMapNode>()));
+ } else {
+ return merge_entry_ret(
+ merge_entry_ertr::ready_future_marker{},
+ mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt));
+ }
+ });
+ });
+ } else {
+ logger().debug("{}::merge_entry balanced l {} r {}", __func__, *l, *r);
+ return l->make_balanced(oc, r).safe_then([=] (auto tuple) {
+ auto [replacement_l, replacement_r, replacement_pivot] = tuple;
+ //update will not cuase overflow, do it first
+ journal_inner_update(liter, replacement_l->get_laddr(), maybe_get_delta_buffer());
+ if (!extent_will_overflow(replacement_pivot.size() + 1, std::nullopt)) {
+ journal_inner_replace(riter, replacement_r->get_laddr(),
+ replacement_pivot, maybe_get_delta_buffer());
+ std::list<laddr_t> dec_laddrs{l->get_laddr(), r->get_laddr()};
+ return oc.tm.dec_ref(oc.t, dec_laddrs).safe_then([] (auto &&ret) {
+ return merge_entry_ret(
+ merge_entry_ertr::ready_future_marker{},
+ mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt));
+ });
+ } else {
+ logger().debug("{}::merge_entry balanced and split {} r {}", __func__, *l, *r);
+ //use remove and insert to instead of replace, remove not cause split do it first
+ journal_inner_remove(riter, maybe_get_delta_buffer());
+ return make_split_insert(oc, riter, replacement_pivot, replacement_r->get_laddr())
+ .safe_then([this, oc, l = l, r = r] (auto mresult) {
+ std::list<laddr_t> dec_laddrs{l->get_laddr(), r->get_laddr(), get_laddr()};
+ return oc.tm.dec_ref(oc.t, dec_laddrs)
+ .safe_then([mresult = std::move(mresult)] (auto &&ret){
+ return merge_entry_ret(
+ merge_entry_ertr::ready_future_marker{},
+ mresult);
+ });
+ });
+ }
+ });
+ }
+ });
+
+}
+
+OMapInnerNode::internal_iterator_t
+OMapInnerNode::get_containing_child(const std::string &key)
+{
+ for (auto i = iter_begin(); i != iter_end(); ++i) {
+ if (i.contains(key))
+ return i;
+ }
+ ceph_assert( 0 == "invalid");
+ return iter_end();
+}
+
+std::ostream &OMapLeafNode::print_detail_l(std::ostream &out) const
+{
+ return out << ", size=" << get_size()
+ << ", depth=" << get_meta().depth;
+}
+
+OMapLeafNode::get_value_ret
+OMapLeafNode::get_value(omap_context_t oc, const std::string &key)
+{
+ logger().debug("{}: {} key = {}","OMapLeafNode", __func__, key);
+ auto ite = find_string_key(key);
+ if (ite != iter_end()) {
+ auto value = ite->get_string_val();
+ return get_value_ret(
+ get_value_ertr::ready_future_marker{},
+ std::make_pair(key, value));
+ } else {
+ return get_value_ret(
+ get_value_ertr::ready_future_marker{},
+ std::make_pair(key, ""));
+ }
+}
+
+OMapLeafNode::insert_ret
+OMapLeafNode::insert(omap_context_t oc, const std::string &key, const std::string &value)
+{
+ logger().debug("{}: {}, {} -> {}","OMapLeafNode", __func__, key, value);
+ if (!extent_will_overflow(key.size() + 1, value.size() + 1)) {
+ if (!is_pending()) {
+ auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast<OMapLeafNode>();
+ return mut->insert(oc, key, value);
+ }
+ auto replace_pt = find_string_key(key);
+ if (replace_pt != iter_end()) {
+ journal_leaf_update(replace_pt, key, value, maybe_get_delta_buffer());
+ } else {
+ auto insert_pt = string_lower_bound(key);
+ journal_leaf_insert(insert_pt, key, value, maybe_get_delta_buffer());
+
+ logger().debug(
+ "{}: {} inserted {}->{} {}"," OMapLeafNode", __func__,
+ insert_pt.get_node_key(),
+ insert_pt.get_node_val(),
+ insert_pt.get_string_val());
+ }
+ return insert_ret(
+ insert_ertr::ready_future_marker{},
+ mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt));
+ } else {
+ return make_split_children(oc).safe_then([this, oc, &key, &value] (auto tuple) {
+ auto [left, right, pivot] = tuple;
+ auto replace_pt = find_string_key(key);
+ if (replace_pt != iter_end()) {
+ if (key < pivot) { //left
+ auto mut_iter = left->iter_idx(replace_pt->get_index());
+ left->journal_leaf_update(mut_iter, key, value, left->maybe_get_delta_buffer());
+ } else if (key >= pivot) { //right
+ auto mut_iter = right->iter_idx(replace_pt->get_index() - left->get_node_size());
+ right->journal_leaf_update(mut_iter, key, value, right->maybe_get_delta_buffer());
+ }
+ } else {
+ auto insert_pt = string_lower_bound(key);
+ if (key < pivot) { //left
+ auto mut_iter = left->iter_idx(insert_pt->get_index());
+ left->journal_leaf_insert(mut_iter, key, value, left->maybe_get_delta_buffer());
+ } else {
+ auto mut_iter = right->iter_idx(insert_pt->get_index() - left->get_node_size());
+ right->journal_leaf_insert(mut_iter, key, value, right->maybe_get_delta_buffer());
+ }
+ }
+ return oc.tm.dec_ref(oc.t, get_laddr())
+ .safe_then([tuple = std::move(tuple)] (auto ret) {
+ return insert_ret(
+ insert_ertr::ready_future_marker{},
+ mutation_result_t(mutation_status_t::SPLITTED, tuple, std::nullopt));
+ });
+ });
+ }
+}
+
+OMapLeafNode::rm_key_ret
+OMapLeafNode::rm_key(omap_context_t oc, const std::string &key)
+{
+ logger().debug("{}: {} : {}","OMapLeafNode", __func__, key);
+ if(!is_pending()) {
+ auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast<OMapLeafNode>();
+ return mut->rm_key(oc, key);
+ }
+
+ auto rm_pt = find_string_key(key);
+ if (rm_pt != iter_end()) {
+ journal_leaf_remove(rm_pt, maybe_get_delta_buffer());
+ logger().debug(
+ "{}: removed {}->{} {}", __func__,
+ rm_pt->get_node_key(),
+ rm_pt->get_node_val(),
+ rm_pt->get_string_val());
+ if (extent_is_below_min()) {
+ return rm_key_ret(
+ rm_key_ertr::ready_future_marker{},
+ mutation_result_t(mutation_status_t::NEED_MERGE, std::nullopt,
+ this->cast<OMapNode>()));
+ } else {
+ return rm_key_ret(
+ rm_key_ertr::ready_future_marker{},
+ mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt));
+ }
+ } else {
+ return rm_key_ret(
+ rm_key_ertr::ready_future_marker{},
+ mutation_result_t(mutation_status_t::FAIL, std::nullopt, std::nullopt));
+ }
+
+}
+
+OMapLeafNode::list_keys_ret
+OMapLeafNode::list_keys(omap_context_t oc, std::string &start, size_t max_result_size)
+{
+ logger().debug("{}: {}","OMapLeafNode", __func__);
+ auto result = list_keys_result_t();
+ iterator iter = start == "" ? iter_begin() : string_lower_bound(start);
+ for (; iter != iter_end() && result.keys.size() <= max_result_size; iter++) {
+ result.keys.push_back(iter->get_node_val());
+ }
+ if (iter == iter_end())
+ result.next = "";
+ else
+ result.next = iter->get_node_val();
+
+ return list_keys_ertr::make_ready_future<list_keys_result_t>(std::move(result));
+
+}
+
+OMapLeafNode::list_ret
+OMapLeafNode::list(omap_context_t oc, std::string &start, size_t max_result_size)
+{
+ logger().debug("{}: {}", "OMapLeafNode", __func__);
+ auto result = list_kvs_result_t();
+ iterator iter = start == "" ? iter_begin() : string_lower_bound(start);
+ for (; iter != iter_end() && result.kvs.size() <= max_result_size; iter++) {
+ result.kvs.push_back({iter->get_node_val(), iter->get_string_val()});
+ }
+ if (iter == iter_end())
+ result.next = "";
+ else
+ result.next = iter->get_node_val();
+
+ return list_ertr::make_ready_future<list_kvs_result_t>(std::move(result));
+}
+
+OMapLeafNode::clear_ret
+OMapLeafNode::clear(omap_context_t oc)
+{
+ return clear_ertr::now();
+}
+
+OMapLeafNode::split_children_ret
+OMapLeafNode::make_split_children(omap_context_t oc)
+{
+ logger().debug("{}: {}","OMapLeafNode", __func__);
+ return oc.tm.alloc_extents<OMapLeafNode>(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE, 2)
+ .safe_then([this] (auto &&ext_pair) {
+ auto left = ext_pair.front();
+ auto right = ext_pair.back();
+ return split_children_ret(
+ split_children_ertr::ready_future_marker{},
+ std::make_tuple(left, right, split_into(*left, *right)));
+ });
+}
+
+OMapLeafNode::full_merge_ret
+OMapLeafNode::make_full_merge(omap_context_t oc, OMapNodeRef right)
+{
+ ceph_assert(right->get_type() == type);
+ logger().debug("{}: {}","OMapLeafNode", __func__);
+ return oc.tm.alloc_extent<OMapLeafNode>(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE)
+ .safe_then([this, right] (auto &&replacement) {
+ replacement->merge_from(*this, *right->cast<OMapLeafNode>());
+ return full_merge_ret(
+ full_merge_ertr::ready_future_marker{},
+ std::move(replacement));
+ });
+}
+
+OMapLeafNode::make_balanced_ret
+OMapLeafNode::make_balanced(omap_context_t oc, OMapNodeRef _right)
+{
+ ceph_assert(_right->get_type() == type);
+ logger().debug("{}: {}", "OMapLeafNode", __func__);
+ return oc.tm.alloc_extents<OMapLeafNode>(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE, 2)
+ .safe_then([this, _right] (auto &&replacement_pair) {
+ auto replacement_left = replacement_pair.front();
+ auto replacement_right = replacement_pair.back();
+ auto &right = *_right->cast<OMapLeafNode>();
+ return make_balanced_ret(
+ make_balanced_ertr::ready_future_marker{},
+ std::make_tuple(
+ replacement_left, replacement_right,
+ balance_into_new_nodes(
+ *this, right,
+ *replacement_left, *replacement_right)));
+ });
+}
+
+
+TransactionManager::read_extent_ertr::future<OMapNodeRef>
+omap_load_extent(omap_context_t oc, laddr_t laddr, depth_t depth)
+{
+ ceph_assert(depth > 0);
+ if (depth > 1) {
+ return oc.tm.read_extents<OMapInnerNode>(oc.t, laddr, OMAP_BLOCK_SIZE).safe_then(
+ [](auto&& extents) {
+ assert(extents.size() == 1);
+ [[maybe_unused]] auto [laddr, e] = extents.front();
+ return TransactionManager::read_extent_ertr::make_ready_future<OMapNodeRef>(std::move(e));
+ });
+ } else {
+ return oc.tm.read_extents<OMapLeafNode>(oc.t, laddr, OMAP_BLOCK_SIZE).safe_then(
+ [](auto&& extents) {
+ assert(extents.size() == 1);
+ [[maybe_unused]] auto [laddr, e] = extents.front();
+ return TransactionManager::read_extent_ertr::make_ready_future<OMapNodeRef>(std::move(e));
+ });
+ }
+}
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <string.h>
+
+#include "include/buffer.h"
+
+#include "crimson/common/errorator.h"
+#include "crimson/os/seastore/omap_manager.h"
+#include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h"
+#include "crimson/os/seastore/omap_manager/btree/omap_types.h"
+#include "crimson/os/seastore/omap_manager/btree/omap_btree_node.h"
+
+namespace crimson::os::seastore::omap_manager {
+
+/**
+ * OMapInnerNode
+ *
+ * Abstracts operations on and layout of internal nodes for the
+ * omap Tree.
+ *
+ * Layout (4k):
+ * num_entries: meta : keys : values :
+ */
+
+struct OMapInnerNode
+ : OMapNode,
+ StringKVInnerNodeLayout<
+ omap_node_meta_t, omap_node_meta_le_t> {
+ using OMapInnerNodeRef = TCachedExtentRef<OMapInnerNode>;
+ using internal_iterator_t = const_iterator;
+ template <typename... T>
+ OMapInnerNode(T&&... t) :
+ OMapNode(std::forward<T>(t)...),
+ StringKVInnerNodeLayout(get_bptr().c_str()) {}
+
+ static constexpr extent_types_t type = extent_types_t::OMAP_INNER;
+
+ omap_node_meta_t get_node_meta() const final { return get_meta(); }
+ bool extent_will_overflow(size_t ksize, std::optional<size_t> vsize) const {
+ return is_overflow(ksize);
+ }
+ bool extent_is_below_min() const { return below_min(); }
+ uint32_t get_node_size() { return get_size(); }
+
+ CachedExtentRef duplicate_for_write() final {
+ assert(delta_buffer.empty());
+ return CachedExtentRef(new OMapInnerNode(*this));
+ }
+
+ delta_inner_buffer_t delta_buffer;
+ delta_inner_buffer_t *maybe_get_delta_buffer() {
+ return is_mutation_pending() ? &delta_buffer : nullptr;
+ }
+
+ get_value_ret get_value(omap_context_t oc, const std::string &key) final;
+
+ insert_ret insert(omap_context_t oc, const std::string &key, const std::string &value) final;
+
+ rm_key_ret rm_key(omap_context_t oc, const std::string &key) final;
+
+ list_keys_ret list_keys(omap_context_t oc, std::string &start, size_t max_result_size) final;
+
+ list_ret list(omap_context_t oc, std::string &start, size_t max_result_size) final;
+
+ clear_ret clear(omap_context_t oc) final;
+
+ using split_children_ertr = TransactionManager::alloc_extent_ertr;
+ using split_children_ret = split_children_ertr::future
+ <std::tuple<OMapInnerNodeRef, OMapInnerNodeRef, std::string>>;
+ split_children_ret make_split_children(omap_context_t oc);
+
+ full_merge_ret make_full_merge(omap_context_t oc, OMapNodeRef right) final;
+
+ make_balanced_ret
+ make_balanced(omap_context_t oc, OMapNodeRef right) final;
+
+ using make_split_insert_ertr = TransactionManager::alloc_extent_ertr;
+ using make_split_insert_ret = make_split_insert_ertr::future<mutation_result_t>;
+ make_split_insert_ret make_split_insert(omap_context_t oc, internal_iterator_t iter,
+ std::string key, laddr_t laddr);
+
+ using merge_entry_ertr = TransactionManager::read_extent_ertr;
+ using merge_entry_ret = merge_entry_ertr::future<mutation_result_t>;
+ merge_entry_ret merge_entry(omap_context_t oc,
+ internal_iterator_t iter, OMapNodeRef entry);
+
+ using handle_split_ertr = TransactionManager::read_extent_ertr;
+ using handle_split_ret = handle_split_ertr::future<mutation_result_t>;
+ handle_split_ret handle_split(omap_context_t oc, internal_iterator_t iter,
+ mutation_result_t mresult);
+
+ std::ostream &print_detail_l(std::ostream &out) const final;
+
+ extent_types_t get_type() const final {
+ return type;
+ }
+
+ ceph::bufferlist get_delta() final {
+ ceph::bufferlist bl;
+ delta_buffer.encode(bl);
+ return bl;
+ }
+
+ void apply_delta(const ceph::bufferlist &bl) final {
+ assert(bl.length());
+ delta_inner_buffer_t buffer;
+ buffer.decode(bl);
+ buffer.replay(*this);
+ }
+
+ internal_iterator_t get_containing_child(const std::string &key);
+
+};
+using OMapInnerNodeRef = OMapInnerNode::OMapInnerNodeRef;
+/**
+ * OMapLeafNode
+ *
+ * Abstracts operations on and layout of leaf nodes for the
+ * OMap Tree.
+ *
+ * Layout (4k):
+ * num_entries: meta : keys : values :
+ */
+
+struct OMapLeafNode
+ : OMapNode,
+ StringKVLeafNodeLayout<
+ omap_node_meta_t, omap_node_meta_le_t> {
+
+ using OMapLeafNodeRef = TCachedExtentRef<OMapLeafNode>;
+ using internal_iterator_t = const_iterator;
+ template <typename... T>
+ OMapLeafNode(T&&... t) :
+ OMapNode(std::forward<T>(t)...),
+ StringKVLeafNodeLayout(get_bptr().c_str()) {}
+
+ static constexpr extent_types_t type = extent_types_t::OMAP_LEAF;
+
+ omap_node_meta_t get_node_meta() const final { return get_meta(); }
+ bool extent_will_overflow(size_t ksize, std::optional<size_t> vsize) const {
+ return is_overflow(ksize, *vsize);
+ }
+ bool extent_is_below_min() const { return below_min(); }
+ uint32_t get_node_size() { return get_size(); }
+
+ CachedExtentRef duplicate_for_write() final {
+ assert(delta_buffer.empty());
+ return CachedExtentRef(new OMapLeafNode(*this));
+ }
+
+ delta_leaf_buffer_t delta_buffer;
+ delta_leaf_buffer_t *maybe_get_delta_buffer() {
+ return is_mutation_pending() ? &delta_buffer : nullptr;
+ }
+
+ get_value_ret get_value(omap_context_t oc, const std::string &key) final;
+
+ insert_ret insert(omap_context_t oc, const std::string &key, const std::string &value) final;
+
+ rm_key_ret rm_key(omap_context_t oc, const std::string &key) final;
+
+ list_keys_ret list_keys(omap_context_t oc, std::string &start, size_t max_result_size) final;
+
+ list_ret list(omap_context_t oc, std::string &start, size_t max_result_size) final;
+
+ clear_ret clear(omap_context_t oc) final;
+
+ using split_children_ertr = TransactionManager::alloc_extent_ertr;
+ using split_children_ret = split_children_ertr::future
+ <std::tuple<OMapLeafNodeRef, OMapLeafNodeRef, std::string>>;
+ split_children_ret make_split_children(omap_context_t oc);
+
+ full_merge_ret make_full_merge(omap_context_t oc, OMapNodeRef right) final;
+
+ make_balanced_ret make_balanced(omap_context_t oc, OMapNodeRef _right) final;
+
+ extent_types_t get_type() const final {
+ return type;
+ }
+
+ ceph::bufferlist get_delta() final {
+ ceph::bufferlist bl;
+ delta_buffer.encode(bl);
+ return bl;
+ }
+
+ void apply_delta(const ceph::bufferlist &_bl) final {
+ assert(_bl.length());
+ ceph::bufferlist bl = _bl;
+ bl.rebuild();
+ delta_leaf_buffer_t buffer;
+ buffer.decode(bl);
+ buffer.replay(*this);
+ }
+
+ std::ostream &print_detail_l(std::ostream &out) const final;
+
+ std::pair<internal_iterator_t, internal_iterator_t>
+ get_leaf_entries(std::string &key);
+
+};
+using OMapLeafNodeRef = OMapLeafNode::OMapLeafNodeRef;
+
+std::ostream &operator<<(std::ostream &out, const omap_inner_key_t &rhs);
+std::ostream &operator<<(std::ostream &out, const omap_leaf_key_t &rhs);
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+#include "crimson/os/seastore/seastore_types.h"
+
+namespace crimson::os::seastore::omap_manager {
+
+struct omap_node_meta_t {
+ depth_t depth = 0;
+
+ std::pair<omap_node_meta_t, omap_node_meta_t> split_into() const {
+ return std::make_pair(
+ omap_node_meta_t{depth},
+ omap_node_meta_t{depth});
+ }
+
+ static omap_node_meta_t merge_from(
+ const omap_node_meta_t &lhs, const omap_node_meta_t &rhs) {
+ assert(lhs.depth == rhs.depth);
+ return omap_node_meta_t{lhs.depth};
+ }
+
+ static std::pair<omap_node_meta_t, omap_node_meta_t>
+ rebalance(const omap_node_meta_t &lhs, const omap_node_meta_t &rhs) {
+ assert(lhs.depth == rhs.depth);
+ return std::make_pair(
+ omap_node_meta_t{lhs.depth},
+ omap_node_meta_t{lhs.depth});
+ }
+};
+
+struct omap_node_meta_le_t {
+ depth_le_t depth = init_les32(0);
+
+ omap_node_meta_le_t() = default;
+ omap_node_meta_le_t(const omap_node_meta_le_t &) = default;
+ explicit omap_node_meta_le_t(const omap_node_meta_t &val)
+ : depth(init_les32(val.depth)) {}
+
+ operator omap_node_meta_t() const {
+ return omap_node_meta_t{ depth };
+ }
+};
+
+struct omap_inner_key_t {
+ uint16_t key_off = 0;
+ uint16_t key_len = 0;
+ laddr_t laddr = 0;
+
+ omap_inner_key_t() = default;
+ omap_inner_key_t(uint16_t off, uint16_t len, laddr_t addr)
+ : key_off(off), key_len(len), laddr(addr) {}
+};
+
+struct omap_inner_key_le_t {
+ ceph_le16 key_off = init_le16(0);
+ ceph_le16 key_len = init_le16(0);
+ laddr_le_t laddr = laddr_le_t(0);
+
+ omap_inner_key_le_t() = default;
+ omap_inner_key_le_t(const omap_inner_key_le_t &) = default;
+ explicit omap_inner_key_le_t(const omap_inner_key_t &key)
+ : key_off(init_le16(key.key_off)),
+ key_len(init_le16(key.key_len)),
+ laddr(laddr_le_t(key.laddr)) {}
+
+ operator omap_inner_key_t() const {
+ return omap_inner_key_t{uint16_t(key_off), uint16_t(key_len), laddr_t(laddr)};
+ }
+
+ omap_inner_key_le_t& operator=(omap_inner_key_t key) {
+ key_off = init_le16(key.key_off);
+ key_len = init_le16(key.key_len);
+ laddr = laddr_le_t(key.laddr);
+ return *this;
+ }
+
+ inline bool operator==(const omap_inner_key_le_t b) const {
+ return key_off == b.key_off && key_len == b.key_len && laddr == b.laddr;
+ }
+};
+
+struct omap_leaf_key_t {
+ uint16_t key_off = 0;
+ uint16_t key_len = 0;
+ uint16_t val_off = 0;
+ uint16_t val_len = 0;
+
+ omap_leaf_key_t() = default;
+ omap_leaf_key_t(uint16_t k_off, uint16_t k_len, uint16_t v_off, uint16_t v_len)
+ : key_off(k_off), key_len(k_len), val_off(v_off), val_len(v_len) {}
+};
+
+struct omap_leaf_key_le_t {
+ ceph_le16 key_off = init_le16(0);
+ ceph_le16 key_len = init_le16(0);
+ ceph_le16 val_off = init_le16(0);
+ ceph_le16 val_len = init_le16(0);
+
+ omap_leaf_key_le_t() = default;
+ omap_leaf_key_le_t(const omap_leaf_key_le_t &) = default;
+ explicit omap_leaf_key_le_t(const omap_leaf_key_t &key)
+ : key_off(init_le16(key.key_off)),
+ key_len(init_le16(key.key_len)),
+ val_off(init_le16(key.val_off)),
+ val_len(init_le16(key.val_len)) {}
+
+ operator omap_leaf_key_t() const {
+ return omap_leaf_key_t{uint16_t(key_off), uint16_t(key_len),
+ uint16_t(val_off), uint16_t(val_len)};
+ }
+
+ omap_leaf_key_le_t& operator=(omap_leaf_key_t key) {
+ key_off = init_le16(key.key_off);
+ key_len = init_le16(key.key_len);
+ val_off = init_le16(key.val_off);
+ val_len = init_le16(key.val_len);
+ return *this;
+ }
+
+ inline bool operator==(const omap_leaf_key_le_t b) const {
+ return key_off == b.key_off && key_len == b.key_len &&
+ val_off == b.val_off && val_len == b.val_len;
+ }
+};
+
+}
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-index:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include <iostream>
+#include <string>
+
+#include "include/byteorder.h"
+
+#include "crimson/common/layout.h"
+#include "crimson/common/fixed_kv_node_layout.h"
+#include "crimson/os/seastore/omap_manager/btree/omap_types.h"
+
+#define BlockSize 4096
+namespace crimson::os::seastore::omap_manager {
+
+template <
+ typename Meta,
+ typename MetaInt,
+ bool VALIDATE_INVARIANTS=true> class StringKVInnerNodeLayout;
+
+template <
+ typename Meta,
+ typename MetaInt,
+ bool VALIDATE_INVARIANTS=true> class StringKVLeafNodeLayout;
+
+
+/**
+ * StringKVInnerNodeLayout
+ *
+ * Reusable implementation of a fixed size key mapping
+ * omap_inner_key_t(fixed) -> V(string) with internal representations omap_inner_key_le_t.
+ *
+ * Uses absl::container_internal::Layout for the actual key memory layout.
+ *
+ * The primary interface exposed is centered on the iterator
+ * and related methods.
+ *
+ * Also included are helpers for doing splits and merges as for a btree.
+ */
+template <
+ typename Meta,
+ typename MetaInt,
+ bool VALIDATE_INVARIANTS>
+class StringKVInnerNodeLayout {
+ char *buf = nullptr;
+
+ using L = absl::container_internal::Layout<ceph_le32, MetaInt, omap_inner_key_le_t>;
+ static constexpr L layout{1, 1, 1}; // = L::Partial(1, 1, 1);
+
+public:
+ template <bool is_const>
+ struct iter_t {
+ friend class StringKVInnerNodeLayout;
+ using parent_t = typename crimson::common::maybe_const_t<StringKVInnerNodeLayout, is_const>::type;
+
+ parent_t node;
+ uint16_t index;
+
+ iter_t(
+ parent_t parent,
+ uint16_t index) : node(parent), index(index) {}
+
+ iter_t(const iter_t &) = default;
+ iter_t(iter_t &&) = default;
+ iter_t &operator=(const iter_t &) = default;
+ iter_t &operator=(iter_t &&) = default;
+
+ operator iter_t<!is_const>() const {
+ static_assert(!is_const);
+ return iter_t<!is_const>(node, index);
+ }
+
+ // Work nicely with for loops without requiring a nested type.
+ iter_t &operator*() { return *this; }
+ iter_t *operator->() { return this; }
+
+ iter_t operator++(int) {
+ auto ret = *this;
+ ++index;
+ return ret;
+ }
+
+ iter_t &operator++() {
+ ++index;
+ return *this;
+ }
+
+ uint16_t operator-(const iter_t &rhs) const {
+ assert(rhs.node == node);
+ return index - rhs.index;
+ }
+
+ iter_t operator+(uint16_t off) const {
+ return iter_t(
+ node,
+ index + off);
+ }
+ iter_t operator-(uint16_t off) const {
+ return iter_t(
+ node,
+ index - off);
+ }
+
+ uint16_t operator<(const iter_t &rhs) const {
+ assert(rhs.node == node);
+ return index < rhs.index;
+ }
+
+ bool operator==(const iter_t &rhs) const {
+ assert(node == rhs.node);
+ return rhs.index == index;
+ }
+
+ bool operator!=(const iter_t &rhs) const {
+ return !(*this == rhs);
+ }
+
+ omap_inner_key_t get_node_key() const {
+ omap_inner_key_le_t kint = node->get_node_key_ptr()[index];
+ return omap_inner_key_t(kint);
+ }
+
+ char *get_node_val_ptr() {
+ auto tail = node->buf + BlockSize;
+ if (*this == node->iter_end())
+ return tail;
+ else {
+ return tail - static_cast<uint32_t>(get_node_key().key_off);
+ }
+ }
+
+ const char *get_node_val_ptr() const {
+ auto tail = node->buf + BlockSize;
+ if ( *this == node->iter_end())
+ return tail;
+ else {
+ return tail - static_cast<uint32_t>(get_node_key().key_off);
+ }
+ }
+
+ void set_node_val(const std::string &val) {
+ static_assert(!is_const);
+ std::strcpy((char*)get_node_val_ptr(), val.c_str()); //copy char* to char* include "\0"
+ }
+
+ std::string get_node_val(){
+ std::string s(get_node_val_ptr());
+ return s;
+ }
+ std::string get_node_val() const{
+ std::string s(get_node_val_ptr());
+ return s;
+ }
+
+ bool contains(const std::string &key) const {
+ auto next = *this + 1;
+ if (next == node->iter_end())
+ return get_node_val() <= key;
+
+ return (get_node_val() <= key) && (next->get_node_val() > key);
+ }
+
+ uint16_t get_index() const {
+ return index;
+ }
+
+ private:
+ void set_node_key(omap_inner_key_t _lb) const {
+ static_assert(!is_const);
+ omap_inner_key_le_t lb;
+ lb = _lb;
+ node->get_node_key_ptr()[index] = lb;
+ }
+
+ typename crimson::common::maybe_const_t<char, is_const>::type get_node_key_ptr() const {
+ return reinterpret_cast<
+ typename crimson::common::maybe_const_t<char, is_const>::type>(
+ node->get_node_key_ptr() + index);
+ }
+
+ };
+ using const_iterator = iter_t<true>;
+ using iterator = iter_t<false>;
+
+ struct delta_inner_t {
+ enum class op_t : uint8_t {
+ INSERT,
+ UPDATE,
+ REMOVE,
+ } op;
+ omap_inner_key_le_t key;
+ std::string val;
+
+ void replay(StringKVInnerNodeLayout &l) {
+ switch (op) {
+ case op_t::INSERT: {
+ l.inner_insert(l.string_lower_bound(val), key, val);
+ break;
+ }
+ case op_t::UPDATE: {
+ auto iter = l.find_string_key(val);
+ assert(iter != l.iter_end());
+ l.inner_update(iter, key);
+ break;
+ }
+ case op_t::REMOVE: {
+ auto iter = l.find_string_key(val);
+ assert(iter != l.iter_end());
+ l.inner_remove(iter);
+ break;
+ }
+ default:
+ assert(0 == "Impossible");
+ }
+ }
+
+ bool operator==(const delta_inner_t &rhs) const {
+ return op == rhs.op &&
+ key == rhs.key &&
+ val == rhs.val;
+ }
+ };
+
+public:
+ class delta_inner_buffer_t {
+ std::vector<delta_inner_t> buffer;
+ public:
+ bool empty() const {
+ return buffer.empty();
+ }
+ void insert(
+ const omap_inner_key_t &key,
+ const std::string val) {
+ omap_inner_key_le_t k;
+ k = key;
+ buffer.push_back(
+ delta_inner_t{
+ delta_inner_t::op_t::INSERT,
+ k,
+ val
+ });
+ }
+ void update(
+ const omap_inner_key_t &key,
+ const std::string &val) {
+ omap_inner_key_le_t k;
+ k = key;
+ buffer.push_back(
+ delta_inner_t{
+ delta_inner_t::op_t::UPDATE,
+ k,
+ val
+ });
+ }
+ void remove(std::string val) {
+ buffer.push_back(
+ delta_inner_t{
+ delta_inner_t::op_t::REMOVE,
+ omap_inner_key_le_t(),
+ val
+ });
+ }
+
+ void replay(StringKVInnerNodeLayout &node) {
+ for (auto &i: buffer) {
+ i.replay(node);
+ }
+ }
+ size_t get_bytes() const {
+ size_t size = 0;
+ for (auto &i: buffer) {
+ size += sizeof(i.op_t) + sizeof(i.key) + i.val.size();
+ }
+ return size;
+ }
+ //copy out
+ void encode(ceph::bufferlist &bl) {
+ using ceph::encode;
+ uint32_t num = buffer.size();
+ encode(num, bl);
+ for (auto &&i: buffer) {
+ encode(i.op, bl);
+ bl.append((char*)&(i.key), sizeof(i.key));
+ encode(i.val, bl);
+ }
+ buffer.clear();
+ }
+ //copy in
+ void decode(const ceph::bufferlist &bl) {
+ using ceph::decode;
+ auto p = bl.cbegin();
+ uint32_t num;
+ decode (num, p);
+ while (num--) {
+ delta_inner_t delta;
+ decode(delta.op, p);
+ omap_inner_key_le_t key;
+ p.copy(sizeof(key), (char*)&(key));
+ delta.key = key;
+ decode(delta.val, p);
+ buffer.push_back(delta);
+ }
+ }
+
+ bool operator==(const delta_inner_buffer_t &rhs) const {
+ return buffer == rhs.buffer;
+ }
+ };
+
+ void journal_inner_insert(
+ const_iterator _iter,
+ const laddr_t laddr,
+ const std::string val,
+ delta_inner_buffer_t *recorder) {
+ auto iter = iterator(this, _iter.index);
+ omap_inner_key_t node_key;
+ node_key.laddr = laddr;
+ node_key.key_len = val.size() + 1;
+ node_key.key_off = iter.get_index() == 0 ?
+ node_key.key_len :
+ (iter - 1).get_node_key().key_off + node_key.key_len;
+ if (recorder) {
+ recorder->insert(
+ node_key,
+ val);
+ }
+ inner_insert(iter, node_key, val);
+ }
+
+ void journal_inner_update(
+ const_iterator _iter,
+ const laddr_t laddr,
+ delta_inner_buffer_t *recorder) {
+ auto iter = iterator(this, _iter.index);
+ auto node_key = iter.get_node_key();
+ node_key.laddr = laddr;
+ if (recorder) {
+ recorder->update(node_key, iter->get_node_val());
+ }
+ inner_update(iter, node_key);
+ }
+
+ void journal_inner_replace(
+ const_iterator _iter,
+ const laddr_t laddr,
+ const std::string val,
+ delta_inner_buffer_t *recorder) {
+ auto iter = iterator(this, _iter.index);
+ omap_inner_key_t node_key;
+ node_key.laddr = laddr;
+ node_key.key_len = val.size() + 1;
+ node_key.key_off = iter.get_index() == 0?
+ node_key.key_len :
+ (iter - 1).get_node_key().key_off + node_key.key_len;
+ if (recorder) {
+ recorder->remove(iter->get_node_val());
+ recorder->insert(node_key, val);
+ }
+ inner_replace(iter, node_key, val);
+ }
+
+ void journal_inner_remove(
+ const_iterator _iter,
+ delta_inner_buffer_t *recorder) {
+ auto iter = iterator(this, _iter.index);
+ if (recorder) {
+ recorder->remove(iter->get_node_val());
+ }
+ inner_remove(iter);
+ }
+
+ StringKVInnerNodeLayout(char *buf) :
+ buf(buf) {}
+
+ uint32_t get_size() const {
+ ceph_le32 &size = *layout.template Pointer<0>(buf);
+ return uint32_t(size);
+ }
+
+ /**
+ * set_size
+ *
+ * Set size representation to match size
+ */
+ void set_size(uint32_t size) {
+ ceph_le32 s;
+ s = size;
+ *layout.template Pointer<0>(buf) = s;
+ }
+
+ const_iterator iter_begin() const {
+ return const_iterator(
+ this,
+ 0);
+ }
+
+ const_iterator iter_end() const {
+ return const_iterator(
+ this,
+ get_size());
+ }
+
+ iterator iter_begin() {
+ return iterator(
+ this,
+ 0);
+ }
+
+ iterator iter_end() {
+ return iterator(
+ this,
+ get_size());
+ }
+
+ const_iterator iter_idx(uint16_t off) const {
+ return const_iterator(
+ this,
+ off);
+ }
+
+ const_iterator string_lower_bound(std::string str) const {
+ uint16_t start = 0, end = get_size();
+ while (start != end) {
+ unsigned mid = (start + end) / 2;
+ const_iterator iter(this, mid);
+ std::string s = iter->get_node_val();
+ if (s < str)
+ start = ++mid;
+ if ( s > str)
+ end = mid;
+ if (s == str)
+ return iter;
+ }
+ return const_iterator(this, start);
+ }
+
+ iterator string_lower_bound(std::string str) {
+ const auto &tref = *this;
+ return iterator(this, tref.string_lower_bound(str).index);
+ }
+
+ const_iterator string_upper_bound(std::string str) const {
+ auto ret = iter_begin();
+ for (; ret != iter_end(); ++ret) {
+ std::string s = ret->get_node_val();
+ if (s > str)
+ break;
+ }
+ return ret;
+ }
+
+ iterator string_upper_bound(std::string str) {
+ const auto &tref = *this;
+ return iterator(this, tref.string_upper_bound(str).index);
+ }
+
+ const_iterator find_string_key(const std::string &str) const {
+ auto ret = iter_begin();
+ for (; ret != iter_end(); ++ret) {
+ std::string s = ret->get_node_val();
+ if (s == str)
+ break;
+ }
+ return ret;
+ }
+ iterator find_string_key(const std::string &str) {
+ const auto &tref = *this;
+ return iterator(this, tref.find_string_key(str).index);
+ }
+
+ const_iterator get_split_pivot() const {
+ uint32_t total_size = omap_inner_key_t(get_node_key_ptr()[get_size()-1]).key_off;
+ uint32_t pivot_size = total_size / 2;
+ uint32_t size = 0;
+ for (auto ite = iter_begin(); ite < iter_end(); ite++) {
+ auto node_key = ite->get_node_key();
+ size += node_key.key_len;
+ if (size >= pivot_size){
+ return ite;
+ }
+ }
+ return iter_end();
+ }
+
+
+ /**
+ * get_meta/set_meta
+ *
+ * Enables stashing a templated type within the layout.
+ * Cannot be modified after initial write as it is not represented
+ * in delta_t
+ */
+ Meta get_meta() const {
+ MetaInt &metaint = *layout.template Pointer<1>(buf);
+ return Meta(metaint);
+ }
+ void set_meta(const Meta &meta) {
+ *layout.template Pointer<1>(buf) = MetaInt(meta);
+ }
+
+ uint32_t used_space() const {
+ uint32_t count = get_size();
+ if (count) {
+ omap_inner_key_t last_key = omap_inner_key_t(get_node_key_ptr()[count-1]);
+ return last_key.key_off + count * sizeof(omap_inner_key_le_t);
+ } else {
+ return 0;
+ }
+ }
+
+ uint32_t free_space() const {
+ return capacity() - used_space();
+ }
+
+ uint16_t capacity() const {
+ return BlockSize - (reinterpret_cast<char*>(layout.template Pointer<2>(buf))-
+ reinterpret_cast<char*>(layout.template Pointer<0>(buf)));
+ }
+
+ char* from_end(int off) {
+ return buf + (BlockSize - off);
+ }
+
+ bool is_overflow(size_t ksize) const {
+ return free_space() < (sizeof(omap_inner_key_le_t) + ksize);
+ }
+ bool below_min() const {
+ return free_space() > (capacity() / 2);
+ }
+
+ bool operator==(const StringKVInnerNodeLayout &rhs) const {
+ if (get_size() != rhs.get_size()) {
+ return false;
+ }
+
+ auto iter = iter_begin();
+ auto iter2 = rhs.iter_begin();
+ while (iter != iter_end()) {
+ if (iter->get_node_key() != iter2->get_node_key() ||
+ iter->get_node_val() != iter2->get_node_val()) {
+ return false;
+ }
+ iter++;
+ iter2++;
+ }
+ return true;
+ }
+
+ /**
+ * split_into
+ *
+ * Takes *this and splits its contents into left and right.
+ */
+ std::string split_into(
+ StringKVInnerNodeLayout &left,
+ StringKVInnerNodeLayout &right) const {
+ auto piviter = get_split_pivot();
+ assert(piviter != iter_end());
+
+ left.copy_from_foreign_head(left.iter_begin(), iter_begin(), piviter);
+ left.set_size(piviter - iter_begin());
+
+ right.copy_from_foreign_back(right.iter_begin(), piviter, iter_end());
+ right.set_size(iter_end() - piviter);
+
+ auto [lmeta, rmeta] = get_meta().split_into();
+ left.set_meta(lmeta);
+ right.set_meta(rmeta);
+
+ return piviter->get_node_val();
+ }
+
+ /**
+ * merge_from
+ *
+ * Takes two nodes and copies their contents into *this.
+ *
+ * precondition: left.size() + right.size() < CAPACITY
+ */
+ void merge_from(
+ const StringKVInnerNodeLayout &left,
+ const StringKVInnerNodeLayout &right) {
+ copy_from_foreign_head(
+ iter_end(),
+ left.iter_begin(),
+ left.iter_end());
+ set_size(left.get_size());
+
+ append_copy_from_foreign_head(
+ iter_end(),
+ right.iter_begin(),
+ right.iter_end());
+ set_size(left.get_size() + right.get_size());
+ set_meta(Meta::merge_from(left.get_meta(), right.get_meta()));
+ }
+
+ /**
+ * balance_into_new_nodes
+ *
+ * Takes the contents of left and right and copies them into
+ * replacement_left and replacement_right such that
+ * the size of replacement_left just >= 1/2 of (left + right)
+ */
+ static std::string balance_into_new_nodes(
+ const StringKVInnerNodeLayout &left,
+ const StringKVInnerNodeLayout &right,
+ StringKVInnerNodeLayout &replacement_left,
+ StringKVInnerNodeLayout &replacement_right)
+ {
+ uint32_t left_size = omap_inner_key_t(left.get_node_key_ptr()[left.get_size()-1]).key_off;
+ uint32_t right_size = omap_inner_key_t(right.get_node_key_ptr()[right.get_size()-1]).key_off;
+ uint32_t total = left_size + right_size;
+ uint32_t pivot_size = total / 2;
+ uint32_t pivot_idx = 0;
+ if (pivot_size < left_size) {
+ uint32_t size = 0;
+ for (auto ite = left.iter_begin(); ite < left.iter_end(); ite++) {
+ auto node_key = ite->get_node_key();
+ size += node_key.key_len;
+ if (size >= pivot_size){
+ pivot_idx = ite.get_index();
+ break;
+ }
+ }
+ } else {
+ uint32_t more_size = pivot_size - left_size;
+ uint32_t size = 0;
+ for (auto ite = right.iter_begin(); ite < right.iter_end(); ite++) {
+ auto node_key = ite->get_node_key();
+ size += node_key.key_len;
+ if (size >= more_size){
+ pivot_idx = ite.get_index() + left.get_size();
+ break;
+ }
+ }
+ }
+
+ auto replacement_pivot = pivot_idx >= left.get_size() ?
+ right.iter_idx(pivot_idx - left.get_size())->get_node_val() :
+ left.iter_idx(pivot_idx)->get_node_val();
+
+ if (pivot_size < left_size) {
+ replacement_left.copy_from_foreign_head(
+ replacement_left.iter_end(),
+ left.iter_begin(),
+ left.iter_idx(pivot_idx));
+ replacement_left.set_size(pivot_idx);
+
+ replacement_right.copy_from_foreign_back(
+ replacement_right.iter_end(),
+ left.iter_idx(pivot_idx),
+ left.iter_end());
+ replacement_right.set_size(left.get_size() - pivot_idx);
+
+ replacement_right.append_copy_from_foreign_head(
+ replacement_right.iter_end(),
+ right.iter_begin(),
+ right.iter_end());
+ replacement_right.set_size(right.get_size() + left.get_size()- pivot_idx);
+ } else {
+ replacement_left.copy_from_foreign_head(
+ replacement_left.iter_end(),
+ left.iter_begin(),
+ left.iter_end());
+ replacement_left.set_size(left.get_size());
+
+ replacement_left.append_copy_from_foreign_head(
+ replacement_left.iter_end(),
+ right.iter_begin(),
+ right.iter_idx(pivot_idx - left.get_size()));
+ replacement_left.set_size(pivot_idx);
+
+ replacement_right.copy_from_foreign_back(
+ replacement_right.iter_end(),
+ right.iter_idx(pivot_idx - left.get_size()),
+ right.iter_end());
+ replacement_right.set_size(right.get_size() + left.get_size() - pivot_idx);
+ }
+
+ auto [lmeta, rmeta] = Meta::rebalance(
+ left.get_meta(), right.get_meta());
+ replacement_left.set_meta(lmeta);
+ replacement_right.set_meta(rmeta);
+ return replacement_pivot;
+ }
+
+private:
+ void inner_insert(
+ iterator iter,
+ const omap_inner_key_t key,
+ const std::string &val) {
+ if (VALIDATE_INVARIANTS) {
+ if (iter != iter_begin()) {
+ assert((iter - 1)->get_node_val() < val);
+ }
+ if (iter != iter_end()) {
+ assert(iter->get_node_val() > val);
+ }
+ assert(is_overflow(val.size() + 1) == false);
+ }
+ if (get_size() != 0 && iter != iter_end())
+ local_move_back(key, iter + 1, iter, iter_end());
+
+ iter->set_node_key(key);
+ set_size(get_size() + 1);
+ iter->set_node_val(val);
+ }
+
+ void inner_update(
+ iterator iter,
+ omap_inner_key_t key ) {
+ assert(iter != iter_end());
+ iter->set_node_key(key);
+ }
+
+ void inner_replace(
+ iterator iter,
+ const omap_inner_key_t &key,
+ const std::string &val) {
+ assert(iter != iter_end());
+ if (VALIDATE_INVARIANTS) {
+ if (iter != iter_begin()) {
+ assert((iter - 1)->get_node_val() < val);
+ }
+ if ((iter + 1) != iter_end()) {
+ assert((iter + 1)->get_node_val() > val);
+ }
+ assert(is_overflow(val.size() + 1) == false);
+ }
+ inner_remove(iter);
+ inner_insert(iter, key, val);
+ }
+
+ void inner_remove(iterator iter) {
+ assert(iter != iter_end());
+ if ((iter + 1) != iter_end())
+ local_move_ahead(iter, iter + 1, iter_end());
+ set_size(get_size() - 1);
+ }
+
+ /**
+ * get_key_ptr
+ *
+ * Get pointer to start of key array
+ */
+ omap_inner_key_le_t *get_node_key_ptr() {
+ return L::Partial(1, 1, get_size()).template Pointer<2>(buf);
+ }
+ const omap_inner_key_le_t *get_node_key_ptr() const {
+ return L::Partial(1, 1, get_size()).template Pointer<2>(buf);
+ }
+
+ /**
+ * copy_from_foreign_head
+ *
+ * Copy from another node begin entries to this node.
+ * [from_src, to_src) is another node entry range.
+ * tgt is this node entry to copy to.
+ * tgt and from_src must be from different nodes.
+ * from_src and to_src must be in the same node.
+ */
+ static void copy_from_foreign_head(
+ iterator tgt,
+ const_iterator from_src,
+ const_iterator to_src) {
+ assert(tgt->node != from_src->node);
+ assert(to_src->node == from_src->node);
+ void* des = tgt.node->from_end((to_src -1)->get_node_key().key_off);
+ void* src = (to_src - 1)->get_node_val_ptr();
+ size_t len = (to_src -1)->get_node_key().key_off;
+ memcpy(des, src, len);
+ memcpy(
+ tgt->get_node_key_ptr(), from_src->get_node_key_ptr(),
+ to_src->get_node_key_ptr() - from_src->get_node_key_ptr());
+ }
+
+ /**
+ * copy_from_foreign_back
+ *
+ * Copy from another node back entries to this node.
+ * [from_src, to_src) is another node entry range.
+ * tgt is this node entry to copy to.
+ * tgt and from_src must be from different nodes.
+ * from_src and to_src must be in the same node.
+ */
+ void copy_from_foreign_back(
+ iterator tgt,
+ const_iterator from_src,
+ const_iterator to_src) {
+ assert(tgt->node != from_src->node);
+ assert(to_src->node == from_src->node);
+ auto offset = from_src.get_index() == 0? 0: (from_src-1)->get_node_key().key_off;
+ void* des = tgt.node->from_end((to_src -1)->get_node_key().key_off - offset);
+ void* src = (to_src - 1)->get_node_val_ptr();
+ size_t len = from_src.get_index() == 0? (to_src -1)->get_node_key().key_off:
+ (from_src-1)->get_node_val_ptr() - (to_src -1)->get_node_val_ptr();
+ memcpy(des, src, len);
+ memcpy(
+ tgt->get_node_key_ptr(), from_src->get_node_key_ptr(),
+ to_src->get_node_key_ptr() - from_src->get_node_key_ptr());
+ if ( from_src.get_index() == 0)
+ return;
+
+ omap_inner_key_t key = (from_src - 1)->get_node_key();
+ auto end_idx = tgt.get_index() + to_src.get_index() - from_src.get_index();
+ for (auto ite = tgt; ite.get_index() != end_idx; ite++) {
+ omap_inner_key_t node_key = ite->get_node_key();
+ node_key.key_off -= key.key_off;
+ ite->set_node_key(node_key);
+ }
+ }
+
+ /**
+ * append copy_from_foreign_ahead
+ *
+ * append another node head entries to this node back.
+ * [from_src, to_src) is another node entry range.
+ * tgt is this node entry to copy to.
+ * tgt and from_src must be from different nodes.
+ * from_src and to_src must be in the same node.
+ */
+ void append_copy_from_foreign_head(
+ iterator tgt,
+ const_iterator from_src,
+ const_iterator to_src) {
+ assert(tgt->node != from_src->node);
+ assert(to_src->node == from_src->node);
+ if (from_src == to_src)
+ return;
+
+ void* des = tgt.node->from_end((to_src -1)->get_node_key().key_off + (tgt - 1)->get_node_key().key_off);
+ void* src = (to_src - 1)->get_node_val_ptr();
+ size_t len = (to_src -1)->get_node_key().key_off;
+ memcpy(des, src, len);
+ memcpy(
+ tgt->get_node_key_ptr(), from_src->get_node_key_ptr(),
+ to_src->get_node_key_ptr() - from_src->get_node_key_ptr());
+ omap_inner_key_t key = (tgt - 1)->get_node_key();
+ auto end_idx = tgt.get_index() + to_src.get_index() - from_src.get_index();
+ for (auto ite = tgt; ite.get_index() != end_idx; ite++) {
+ omap_inner_key_t node_key = ite->get_node_key();
+ node_key.key_off += key.key_off;
+ ite->set_node_key(node_key);
+ }
+ }
+
+ /**
+ * local_move_back
+ *
+ * move this node entries range [from_src, to_src) back to tgt position.
+ *
+ * tgt, from_src, and to_src must be from the same node.
+ */
+ static void local_move_back(
+ omap_inner_key_t key,
+ iterator tgt,
+ iterator from_src,
+ iterator to_src) {
+ assert(tgt->node == from_src->node);
+ assert(to_src->node == from_src->node);
+ void* des = (to_src-1)->get_node_val_ptr() - key.key_len;
+ void* src = (to_src-1)->get_node_val_ptr();
+ size_t len = from_src.get_index() == 0?
+ from_src->node->buf + BlockSize - (to_src-1)->get_node_val_ptr():
+ (from_src-1)->get_node_val_ptr() - (to_src-1)->get_node_val_ptr();
+
+ memmove(des, src, len);
+ for ( auto ite = from_src; ite < to_src; ite++) {
+ omap_inner_key_t node_key = ite->get_node_key();
+ node_key.key_off += key.key_len;
+ ite->set_node_key(node_key);
+ }
+ memmove(
+ tgt->get_node_key_ptr(), from_src->get_node_key_ptr(),
+ to_src->get_node_key_ptr() - from_src->get_node_key_ptr());
+ }
+
+ /**
+ * local_move_ahead
+ *
+ * move this node entries range [from_src, to_src) ahead to tgt position.
+ *
+ * tgt, from_src, and to_src must be from the same node.
+ */
+ static void local_move_ahead(
+ iterator tgt,
+ iterator from_src,
+ iterator to_src) {
+ assert(tgt->node == from_src->node);
+ assert(to_src->node == from_src->node);
+ assert(from_src.get_index() != 0);
+ omap_inner_key_t key = tgt->get_node_key();
+ void* des = (to_src-1)->get_node_val_ptr() + key.key_len;
+ void* src = (to_src-1)->get_node_val_ptr();
+ size_t len = (from_src-1)->get_node_val_ptr() - (to_src-1)->get_node_val_ptr();
+ memmove(des, src, len);
+ for ( auto ite = from_src; ite < to_src; ite++) {
+ omap_inner_key_t node_key = ite->get_node_key();
+ node_key.key_off -= key.key_len;
+ ite->set_node_key(node_key);
+ }
+ memmove(
+ tgt->get_node_key_ptr(), from_src->get_node_key_ptr(),
+ to_src->get_node_key_ptr() - from_src->get_node_key_ptr());
+ }
+
+};
+
+template <
+ typename Meta,
+ typename MetaInt,
+ bool VALIDATE_INVARIANTS>
+class StringKVLeafNodeLayout {
+ char *buf = nullptr;
+
+ using L = absl::container_internal::Layout<ceph_le32, MetaInt, omap_leaf_key_le_t>;
+ static constexpr L layout{1, 1, 1}; // = L::Partial(1, 1, 1);
+
+public:
+ template <bool is_const>
+ struct iter_t {
+ friend class StringKVLeafNodeLayout;
+ using parent_t = typename crimson::common::maybe_const_t<StringKVLeafNodeLayout, is_const>::type;
+
+ parent_t node;
+ uint16_t index;
+
+ iter_t(
+ parent_t parent,
+ uint16_t index) : node(parent), index(index) {}
+
+ iter_t(const iter_t &) = default;
+ iter_t(iter_t &&) = default;
+ iter_t &operator=(const iter_t &) = default;
+ iter_t &operator=(iter_t &&) = default;
+
+ operator iter_t<!is_const>() const {
+ static_assert(!is_const);
+ return iter_t<!is_const>(node, index);
+ }
+
+ // Work nicely with for loops without requiring a nested type.
+ iter_t &operator*() { return *this; }
+ iter_t *operator->() { return this; }
+
+ iter_t operator++(int) {
+ auto ret = *this;
+ ++index;
+ return ret;
+ }
+
+ iter_t &operator++() {
+ ++index;
+ return *this;
+ }
+
+ uint16_t operator-(const iter_t &rhs) const {
+ assert(rhs.node == node);
+ return index - rhs.index;
+ }
+
+ iter_t operator+(uint16_t off) const {
+ return iter_t(
+ node,
+ index + off);
+ }
+ iter_t operator-(uint16_t off) const {
+ return iter_t(
+ node,
+ index - off);
+ }
+
+ uint16_t operator<(const iter_t &rhs) const {
+ assert(rhs.node == node);
+ return index < rhs.index;
+ }
+
+ bool operator==(const iter_t &rhs) const {
+ assert(node == rhs.node);
+ return rhs.index == index;
+ }
+
+ bool operator!=(const iter_t &rhs) const {
+ assert(node == rhs.node);
+ return index != rhs.index;
+ }
+
+ omap_leaf_key_t get_node_key() const {
+ omap_leaf_key_le_t kint = node->get_node_key_ptr()[index];
+ return omap_leaf_key_t(kint);
+ }
+
+ char *get_node_val_ptr() {
+ auto tail = node->buf + BlockSize;
+ if ( *this == node->iter_end())
+ return tail;
+ else
+ return tail - static_cast<int>(get_node_key().key_off);
+ }
+
+ const char *get_node_val_ptr() const {
+ auto tail = node->buf + BlockSize;
+ if ( *this == node->iter_end())
+ return tail;
+ else
+ return tail - static_cast<int>(get_node_key().key_off);
+ }
+
+ char *get_string_val_ptr() {
+ auto tail = node->buf + BlockSize;
+ return tail - static_cast<int>(get_node_key().val_off);
+ }
+
+ const char *get_string_val_ptr() const {
+ auto tail = node->buf + BlockSize;
+ return tail - static_cast<int>(get_node_key().val_off);
+ }
+
+ void set_node_val(std::string val) const {
+ static_assert(!is_const);
+ std::strcpy((char*)get_node_val_ptr(), val.c_str()); //copy char* to char* include "\0"
+ }
+
+ std::string get_node_val() {
+ std::string s(get_node_val_ptr());
+ return s;
+ }
+ std::string get_node_val() const{
+ std::string s(get_node_val_ptr());
+ return s;
+ }
+
+ void set_string_val(std::string val) {
+ static_assert(!is_const);
+ std::strcpy((char*)get_string_val_ptr(), val.c_str()); //copy char* to char* include "\0"
+ }
+
+ std::string get_string_val() const {
+ std::string s(get_string_val_ptr());
+ return s;
+ }
+
+ bool contains(const std::string &key) const {
+ auto next = *this + 1;
+ if (*this == node->iter_begin()){
+ if (next->get_node_val() > key)
+ return true;
+ else
+ return false;
+ }
+ if (next == node->iter_end())
+ return get_node_val() <= key;
+
+ return (get_node_val() <= key) && (next->get_node_val() > key);
+ }
+
+ uint16_t get_index() const {
+ return index;
+ }
+
+ private:
+ void set_node_key(omap_leaf_key_t _lb) const {
+ static_assert(!is_const);
+ omap_leaf_key_le_t lb;
+ lb = _lb;
+ node->get_node_key_ptr()[index] = lb;
+ }
+
+ typename crimson::common::maybe_const_t<char, is_const>::type get_node_key_ptr() const {
+ return reinterpret_cast<
+ typename crimson::common::maybe_const_t<char, is_const>::type>(
+ node->get_node_key_ptr() + index);
+ }
+ };
+ using const_iterator = iter_t<true>;
+ using iterator = iter_t<false>;
+
+ struct delta_leaf_t {
+ enum class op_t : uint8_t {
+ INSERT,
+ UPDATE,
+ REMOVE,
+ } op;
+ std::string key;
+ std::string val;
+
+ void replay(StringKVLeafNodeLayout &l) {
+ switch (op) {
+ case op_t::INSERT: {
+ l.leaf_insert(l.string_lower_bound(key), key, val);
+ break;
+ }
+ case op_t::UPDATE: {
+ auto iter = l.find_string_key(key);
+ assert(iter != l.iter_end());
+ l.leaf_update(iter, key, val);
+ break;
+ }
+ case op_t::REMOVE: {
+ auto iter = l.find_string_key(key);
+ assert(iter != l.iter_end());
+ l.leaf_remove(iter);
+ break;
+ }
+ default:
+ assert(0 == "Impossible");
+ }
+ }
+
+ bool operator==(const delta_leaf_t &rhs) const {
+ return op == rhs.op &&
+ key == rhs.key &&
+ val == rhs.val;
+ }
+ };
+
+public:
+ class delta_leaf_buffer_t {
+ std::vector<delta_leaf_t> buffer;
+ public:
+ bool empty() const {
+ return buffer.empty();
+ }
+ void insert(
+ const std::string &key,
+ const std::string &val) {
+ buffer.push_back(
+ delta_leaf_t{
+ delta_leaf_t::op_t::INSERT,
+ key,
+ val
+ });
+ }
+ void update(
+ const std::string &key,
+ const std::string &val) {
+ buffer.push_back(
+ delta_leaf_t{
+ delta_leaf_t::op_t::UPDATE,
+ key,
+ val
+ });
+ }
+ void remove(std::string key) {
+ buffer.push_back(
+ delta_leaf_t{
+ delta_leaf_t::op_t::REMOVE,
+ key,
+ ""
+ });
+ }
+
+ void replay(StringKVLeafNodeLayout &node) {
+ for (auto &i: buffer) {
+ i.replay(node);
+ }
+ }
+ size_t get_bytes() const {
+ size_t size = 0;
+ for (auto &i: buffer) {
+ size += sizeof(i.op_t) + i.key.size() + i.val.size();
+ }
+ return size;
+ }
+ //copy out
+ void encode(ceph::bufferlist &bl) {
+ using ceph::encode;
+ uint32_t num = buffer.size();
+ encode(num, bl);
+ for (auto &&i: buffer) {
+ encode(i.op, bl);
+ encode(i.key, bl);
+ //bl.append((char*)&(i.key), sizeof(i.key));
+ encode(i.val, bl);
+ }
+ buffer.clear();
+ }
+ //copy in
+ void decode(const ceph::bufferlist &bl) {
+ using ceph::decode;
+ auto p = bl.cbegin();
+ uint32_t num;
+ decode (num, p);
+ while (num--) {
+ delta_leaf_t delta;
+ decode(delta.op, p);
+ decode(delta.key, p);
+ decode(delta.val, p);
+ buffer.push_back(delta);
+ }
+ }
+
+ bool operator==(const delta_leaf_buffer_t &rhs) const {
+ return buffer == rhs.buffer;
+ }
+ };
+
+ void journal_leaf_insert(
+ const_iterator _iter,
+ const std::string &key,
+ const std::string &val,
+ delta_leaf_buffer_t *recorder) {
+ auto iter = iterator(this, _iter.index);
+ if (recorder) {
+ recorder->insert(
+ key,
+ val);
+ }
+ leaf_insert(iter, key, val);
+ }
+
+ void journal_leaf_update(
+ const_iterator _iter,
+ const std::string &key,
+ const std::string &val,
+ delta_leaf_buffer_t *recorder) {
+ auto iter = iterator(this, _iter.index);
+ if (recorder) {
+ recorder->remove(iter->get_node_val());
+ recorder->insert(key, val);
+ }
+ leaf_update(iter, key, val);
+ }
+
+
+ void journal_leaf_remove(
+ const_iterator _iter,
+ delta_leaf_buffer_t *recorder) {
+ auto iter = iterator(this, _iter.index);
+ if (recorder) {
+ recorder->remove(iter->get_node_val());
+ }
+ leaf_remove(iter);
+ }
+
+ StringKVLeafNodeLayout(char *buf) :
+ buf(buf) {}
+
+ const_iterator iter_begin() const {
+ return const_iterator(
+ this,
+ 0);
+ }
+
+ const_iterator iter_end() const {
+ return const_iterator(
+ this,
+ get_size());
+ }
+
+ iterator iter_begin() {
+ return iterator(
+ this,
+ 0);
+ }
+
+ iterator iter_end() {
+ return iterator(
+ this,
+ get_size());
+ }
+
+ const_iterator iter_idx(uint16_t off) const {
+ return const_iterator(
+ this,
+ off);
+ }
+
+ const_iterator string_lower_bound(std::string str) const {
+ uint16_t start = 0, end = get_size();
+ while (start != end) {
+ unsigned mid = (start + end) / 2;
+ const_iterator iter(this, mid);
+ std::string s = iter->get_node_val();
+ if (s < str)
+ start = ++mid;
+ if (s > str)
+ end = mid;
+ if (s == str)
+ return iter;
+ }
+ return const_iterator(this, start);
+ }
+
+ iterator string_lower_bound(std::string str) {
+ const auto &tref = *this;
+ return iterator(this, tref.string_lower_bound(str).index);
+ }
+
+ const_iterator string_upper_bound(std::string str) const {
+ auto ret = iter_begin();
+ for (; ret != iter_end(); ++ret) {
+ std::string s = ret->get_node_val();
+ if (s > str)
+ break;
+ }
+ return ret;
+ }
+
+ iterator string_upper_bound(std::string str) {
+ const auto &tref = *this;
+ return iterator(this, tref.string_upper_bound(str).index);
+ }
+
+ const_iterator find_string_key(const std::string &str) const {
+ auto ret = iter_begin();
+ for (; ret != iter_end(); ++ret) {
+ std::string s = ret->get_node_val();
+ if (s == str)
+ break;
+ }
+ return ret;
+ }
+ iterator find_string_key(const std::string &str) {
+ const auto &tref = *this;
+ return iterator(this, tref.find_string_key(str).index);
+ }
+
+ const_iterator get_split_pivot() const {
+ uint32_t total_size = omap_leaf_key_t(get_node_key_ptr()[get_size()-1]).key_off;
+ uint32_t pivot_size = total_size / 2;
+ uint32_t size = 0;
+ for (auto ite = iter_begin(); ite < iter_end(); ite++) {
+ auto node_key = ite->get_node_key();
+ size += node_key.key_len + node_key.val_len;
+ if (size >= pivot_size){
+ return ite;
+ }
+ }
+ return iter_end();
+ }
+
+ uint32_t get_size() const {
+ ceph_le32 &size = *layout.template Pointer<0>(buf);
+ return uint32_t(size);
+ }
+
+ /**
+ * set_size
+ *
+ * Set size representation to match size
+ */
+ void set_size(uint32_t size) {
+ ceph_le32 s;
+ s = size;
+ *layout.template Pointer<0>(buf) = s;
+ }
+
+ /**
+ * get_meta/set_meta
+ *
+ * Enables stashing a templated type within the layout.
+ * Cannot be modified after initial write as it is not represented
+ * in delta_t
+ */
+ Meta get_meta() const {
+ MetaInt &metaint = *layout.template Pointer<1>(buf);
+ return Meta(metaint);
+ }
+ void set_meta(const Meta &meta) {
+ *layout.template Pointer<1>(buf) = MetaInt(meta);
+ }
+
+ uint32_t used_space() const {
+ uint32_t count = get_size();
+ if (count) {
+ omap_leaf_key_t last_key = omap_leaf_key_t(get_node_key_ptr()[count-1]);
+ return last_key.key_off + count * sizeof(omap_leaf_key_le_t);
+ } else {
+ return 0;
+ }
+ }
+
+ uint32_t free_space() const {
+ return capacity() - used_space();
+ }
+
+ uint32_t capacity() const {
+ return BlockSize - (reinterpret_cast<char*>(layout.template Pointer<2>(buf))-
+ reinterpret_cast<char*>(layout.template Pointer<0>(buf)));
+ }
+ char* from_end(int off) {
+ return buf + (BlockSize - off);
+ }
+
+ bool is_overflow(size_t ksize, size_t vsize) const {
+ return free_space() < (sizeof(omap_leaf_key_le_t) + ksize + vsize);
+ }
+ bool below_min() const {
+ return free_space() > (capacity() / 2);
+ }
+
+ bool operator==(const StringKVLeafNodeLayout &rhs) const {
+ if (get_size() != rhs.get_size()) {
+ return false;
+ }
+
+ auto iter = iter_begin();
+ auto iter2 = rhs.iter_begin();
+ while (iter != iter_end()) {
+ if (iter->get_node_key() != iter2->get_node_key() ||
+ iter->get_node_val() != iter2->get_node_val() ||
+ iter->get_string_val() != iter2->get_string_val()){
+ return false;
+ }
+ iter++;
+ iter2++;
+ }
+ return true;
+ }
+
+ /**
+ * split_into
+ *
+ * Takes *this and splits its contents into left and right.
+ */
+ std::string split_into(
+ StringKVLeafNodeLayout &left,
+ StringKVLeafNodeLayout &right) const {
+ auto piviter = get_split_pivot();
+ assert (piviter != iter_end());
+
+ left.copy_from_foreign_head(left.iter_begin(), iter_begin(), piviter);
+ left.set_size(piviter - iter_begin());
+
+ right.copy_from_foreign_back(right.iter_begin(), piviter, iter_end());
+ right.set_size(iter_end() - piviter);
+
+ auto [lmeta, rmeta] = get_meta().split_into();
+ left.set_meta(lmeta);
+ right.set_meta(rmeta);
+
+ return piviter->get_node_val();
+ }
+
+ /**
+ * merge_from
+ *
+ * Takes two nodes and copies their contents into *this.
+ *
+ * precondition: left.size() + right.size() < CAPACITY
+ */
+ void merge_from(
+ const StringKVLeafNodeLayout &left,
+ const StringKVLeafNodeLayout &right)
+ {
+ copy_from_foreign_head(
+ iter_end(),
+ left.iter_begin(),
+ left.iter_end());
+ set_size(left.get_size());
+ append_copy_from_foreign_head(
+ iter_end(),
+ right.iter_begin(),
+ right.iter_end());
+ set_size(left.get_size() + right.get_size());
+ set_meta(Meta::merge_from(left.get_meta(), right.get_meta()));
+ }
+
+ /**
+ * balance_into_new_nodes
+ *
+ * Takes the contents of left and right and copies them into
+ * replacement_left and replacement_right such that
+ * the size of replacement_left side just >= 1/2 of the total size (left + right).
+ */
+ static std::string balance_into_new_nodes(
+ const StringKVLeafNodeLayout &left,
+ const StringKVLeafNodeLayout &right,
+ StringKVLeafNodeLayout &replacement_left,
+ StringKVLeafNodeLayout &replacement_right)
+ {
+ uint32_t left_size = omap_leaf_key_t(left.get_node_key_ptr()[left.get_size()-1]).key_off;
+ uint32_t right_size = omap_leaf_key_t(right.get_node_key_ptr()[right.get_size()-1]).key_off;
+ uint32_t total = left_size + right_size;
+ uint32_t pivot_size = total / 2;
+ uint32_t pivot_idx = 0;
+ if (pivot_size < left_size) {
+ uint32_t size = 0;
+ for (auto ite = left.iter_begin(); ite < left.iter_end(); ite++) {
+ auto node_key = ite->get_node_key();
+ size += node_key.key_len + node_key.val_len;
+ if (size >= pivot_size){
+ pivot_idx = ite.get_index();
+ break;
+ }
+ }
+ } else {
+ uint32_t more_size = pivot_size - left_size;
+ uint32_t size = 0;
+ for (auto ite = right.iter_begin(); ite < right.iter_end(); ite++) {
+ auto node_key = ite->get_node_key();
+ size += node_key.key_len + node_key.val_len;
+ if (size >= more_size){
+ pivot_idx = ite.get_index() + left.get_size();
+ break;
+ }
+ }
+ }
+
+ auto replacement_pivot = pivot_idx >= left.get_size() ?
+ right.iter_idx(pivot_idx - left.get_size())->get_node_val() :
+ left.iter_idx(pivot_idx)->get_node_val();
+
+ if (pivot_size < left_size) {
+ replacement_left.copy_from_foreign_head(
+ replacement_left.iter_end(),
+ left.iter_begin(),
+ left.iter_idx(pivot_idx));
+ replacement_left.set_size(pivot_idx);
+
+ replacement_right.copy_from_foreign_back(
+ replacement_right.iter_end(),
+ left.iter_idx(pivot_idx),
+ left.iter_end());
+ replacement_right.set_size(left.get_size() - pivot_idx);
+
+ replacement_right.append_copy_from_foreign_head(
+ replacement_right.iter_end(),
+ right.iter_begin(),
+ right.iter_end());
+ replacement_right.set_size(right.get_size() + left.get_size() - pivot_idx);
+ } else {
+ replacement_left.copy_from_foreign_head(
+ replacement_left.iter_end(),
+ left.iter_begin(),
+ left.iter_end());
+ replacement_left.set_size(left.get_size());
+
+ replacement_left.append_copy_from_foreign_head(
+ replacement_left.iter_end(),
+ right.iter_begin(),
+ right.iter_idx(pivot_idx - left.get_size()));
+ replacement_left.set_size(pivot_idx);
+
+ replacement_right.copy_from_foreign_back(
+ replacement_right.iter_end(),
+ right.iter_idx(pivot_idx - left.get_size()),
+ right.iter_end());
+ replacement_right.set_size(right.get_size() + left.get_size() - pivot_idx);
+ }
+
+ auto [lmeta, rmeta] = Meta::rebalance(
+ left.get_meta(), right.get_meta());
+ replacement_left.set_meta(lmeta);
+ replacement_right.set_meta(rmeta);
+ return replacement_pivot;
+ }
+
+private:
+ void leaf_insert(
+ iterator iter,
+ const std::string &key,
+ const std::string &val) {
+ if (VALIDATE_INVARIANTS) {
+ if (iter != iter_begin()) {
+ assert((iter - 1)->get_node_val() < key);
+ }
+ if (iter != iter_end()) {
+ assert(iter->get_node_val() > key);
+ }
+ assert(is_overflow(key.size() + 1, val.size() + 1) == false);
+ }
+ omap_leaf_key_t node_key;
+ if (iter == iter_begin()) {
+ node_key.key_off = key.size() + 1 + val.size() + 1;
+ node_key.key_len = key.size() + 1;
+ node_key.val_off = val.size() + 1;
+ node_key.val_len = val.size() + 1;
+ } else {
+ node_key.key_off = (iter - 1)->get_node_key().key_off + (key.size() + 1 + val.size() + 1);
+ node_key.key_len = key.size() + 1;
+ node_key.val_off = (iter - 1)->get_node_key().key_off + (val.size() + 1);
+ node_key.val_len = val.size() + 1;
+ }
+ if (get_size() != 0 && iter != iter_end())
+ local_move_back(node_key, iter + 1, iter, iter_end());
+
+ iter->set_node_key(node_key);
+ set_size(get_size() + 1);
+ iter->set_node_val(key);
+ iter->set_string_val(val);
+ }
+
+ void leaf_update(
+ iterator iter,
+ const std::string &key,
+ const std::string &val) {
+ assert(iter != iter_end());
+ if (VALIDATE_INVARIANTS) {
+ assert(is_overflow(0, val.size() + 1) == false);
+ }
+ leaf_remove(iter);
+ leaf_insert(iter, key, val);
+ }
+
+ void leaf_remove(iterator iter) {
+ assert(iter != iter_end());
+ if ((iter + 1) != iter_end())
+ local_move_ahead(iter, iter + 1, iter_end());
+ set_size(get_size() - 1);
+ }
+
+ /**
+ * get_key_ptr
+ *
+ * Get pointer to start of key array
+ */
+ omap_leaf_key_le_t *get_node_key_ptr() {
+ return L::Partial(1, 1, get_size()).template Pointer<2>(buf);
+ }
+ const omap_leaf_key_le_t *get_node_key_ptr() const {
+ return L::Partial(1, 1, get_size()).template Pointer<2>(buf);
+ }
+
+ /**
+ * copy_from_foreign_head
+ *
+ * Copy from another node begin entries to this node.
+ * [from_src, to_src) is another node entry range.
+ * tgt is this node entry to copy to.
+ * tgt and from_src must be from different nodes.
+ * from_src and to_src must be in the same node.
+ */
+ static void copy_from_foreign_head(
+ iterator tgt,
+ const_iterator from_src,
+ const_iterator to_src) {
+ assert(tgt->node != from_src->node);
+ assert(to_src->node == from_src->node);
+ void* des = tgt.node->from_end((to_src -1)->get_node_key().key_off);
+ void* src = (to_src - 1)->get_node_val_ptr();
+ size_t len = (to_src -1)->get_node_key().key_off;
+ memcpy(des, src, len);
+ memcpy(
+ tgt->get_node_key_ptr(), from_src->get_node_key_ptr(),
+ to_src->get_node_key_ptr() - from_src->get_node_key_ptr());
+ }
+
+ /**
+ * copy_from_foreign_back
+ *
+ * Copy from another node back entries to this node.
+ * [from_src, to_src) is another node entry range.
+ * tgt is this node entry to copy to.
+ * tgt and from_src must be from different nodes.
+ * from_src and to_src must be in the same node.
+ */
+ void copy_from_foreign_back(
+ iterator tgt,
+ const_iterator from_src,
+ const_iterator to_src) {
+ assert(tgt->node != from_src->node);
+ assert(to_src->node == from_src->node);
+ auto offset = from_src.get_index() == 0? 0: (from_src-1)->get_node_key().key_off;
+
+ void* des = tgt.node->from_end((to_src -1)->get_node_key().key_off - offset);
+ void* src = (to_src - 1)->get_node_val_ptr();
+ size_t len = from_src.get_index() == 0? (to_src -1)->get_node_key().key_off:
+ (from_src-1)->get_node_val_ptr() - (to_src -1)->get_node_val_ptr();
+ memcpy(des, src, len);
+ memcpy(
+ tgt->get_node_key_ptr(), from_src->get_node_key_ptr(),
+ to_src->get_node_key_ptr() - from_src->get_node_key_ptr());
+ if ( from_src.get_index() == 0)
+ return;
+
+ omap_leaf_key_t key = (from_src - 1)->get_node_key();
+ for (auto ite = tgt; ite.get_index() < (tgt.get_index() + to_src.get_index() - from_src.get_index()); ite++) {
+ omap_leaf_key_t node_key = ite->get_node_key();
+ node_key.key_off -= key.key_off;
+ node_key.val_off -= key.key_off;
+ ite->set_node_key(node_key);
+ }
+ }
+
+ /**
+ * append copy_from_foreign_ahead
+ *
+ * append another node head entries to this node back.
+ * [from_src, to_src) is another node entry range.
+ * tgt is this node entry to copy to.
+ * tgt and from_src must be from different nodes.
+ * from_src and to_src must be in the same node.
+ */
+ void append_copy_from_foreign_head(
+ iterator tgt,
+ const_iterator from_src,
+ const_iterator to_src) {
+ assert(tgt->node != from_src->node);
+ assert(to_src->node == from_src->node);
+ if (from_src == to_src)
+ return;
+
+ void* des = tgt.node->from_end((to_src -1)->get_node_key().key_off + (tgt - 1)->get_node_key().key_off);
+ void* src = (to_src - 1)->get_node_val_ptr();
+ size_t len = (to_src -1)->get_node_key().key_off;
+ memcpy(des, src, len);
+ memcpy(
+ tgt->get_node_key_ptr(), from_src->get_node_key_ptr(),
+ to_src->get_node_key_ptr() - from_src->get_node_key_ptr());
+ omap_leaf_key_t key = (tgt - 1)->get_node_key();
+ auto end_idx = tgt.get_index() + to_src.get_index() - from_src.get_index();
+ for (auto ite = tgt; ite.get_index() != end_idx; ite++) {
+ omap_leaf_key_t node_key = ite->get_node_key();
+ node_key.key_off += key.key_off;
+ node_key.val_off += key.key_off;
+ ite->set_node_key(node_key);
+ }
+ }
+
+ /**
+ * local_move_back
+ *
+ * move this node entries range [from_src, to_src) back to tgt position.
+ *
+ * tgt, from_src, and to_src must be from the same node.
+ */
+ static void local_move_back(
+ omap_leaf_key_t key,
+ iterator tgt,
+ iterator from_src,
+ iterator to_src) {
+ assert(tgt->node == from_src->node);
+ assert(to_src->node == from_src->node);
+ void* des = (to_src-1)->get_node_val_ptr() - (key.key_len + key.val_len);
+ void* src = (to_src-1)->get_node_val_ptr();
+ size_t len = from_src.get_index() == 0?
+ from_src->node->buf + BlockSize - (to_src-1)->get_node_val_ptr():
+ (from_src-1)->get_node_val_ptr() - (to_src-1)->get_node_val_ptr();
+ memmove(des, src, len);
+ for ( auto ite = from_src; ite < to_src; ite++) {
+ omap_leaf_key_t node_key = ite->get_node_key();
+ node_key.key_off += (key.key_len + key.val_len);
+ node_key.val_off += (key.key_len + key.val_len);
+ ite->set_node_key(node_key);
+ }
+ memmove(
+ tgt->get_node_key_ptr(), from_src->get_node_key_ptr(),
+ to_src->get_node_key_ptr() - from_src->get_node_key_ptr());
+ }
+
+ /**
+ * local_move_ahead
+ *
+ * move this node entries range [from_src, to_src) ahead to tgt position.
+ *
+ * tgt, from_src, and to_src must be from the same node.
+ */
+ static void local_move_ahead(
+ iterator tgt,
+ iterator from_src,
+ iterator to_src) {
+ assert(tgt->node == from_src->node);
+ assert(to_src->node == from_src->node);
+ assert(from_src.get_index() != 0);
+ omap_leaf_key_t key = tgt->get_node_key();
+ void* des = (to_src - 1)->get_node_val_ptr() + key.key_len + key.val_len;
+ void* src = (to_src - 1)->get_node_val_ptr();
+ size_t len = (from_src - 1)->get_node_val_ptr() - (to_src - 1)->get_node_val_ptr();
+ memmove(des, src, len);
+ for ( auto ite = from_src; ite < to_src; ite++) {
+ omap_leaf_key_t node_key = ite->get_node_key();
+ node_key.key_off -= (key.key_len + key.val_len);
+ node_key.val_off -= (key.key_len + key.val_len);
+ ite->set_node_key(node_key);
+ }
+ memmove(
+ tgt->get_node_key_ptr(), from_src->get_node_key_ptr(),
+ to_src->get_node_key_ptr() - from_src->get_node_key_ptr());
+ }
+
+};
+
+}
return out << "EXTMAP_LEAF";
case extent_types_t::ONODE_BLOCK_STAGED:
return out << "ONODE_BLOCK_STAGED";
+ case extent_types_t::OMAP_INNER:
+ return out << "OMAP_INNER";
+ case extent_types_t::OMAP_LEAF:
+ return out << "OMAP_LEAF";
case extent_types_t::TEST_BLOCK:
return out << "TEST_BLOCK";
case extent_types_t::TEST_BLOCK_PHYSICAL:
ONODE_BLOCK = 3,
EXTMAP_INNER = 4,
EXTMAP_LEAF = 5,
- ONODE_BLOCK_STAGED = 6,
+ OMAP_INNER = 6,
+ OMAP_LEAF = 7,
+ ONODE_BLOCK_STAGED = 8,
// Test Block Types
TEST_BLOCK = 0xF0,
});
}
+TransactionManager::refs_ret TransactionManager::dec_ref(
+ Transaction &t,
+ std::list<laddr_t> offsets)
+{
+ return seastar::do_with(std::move(offsets), std::list<unsigned>(),
+ [this, &t] (auto &&offsets, auto &refcnt) {
+ return crimson::do_for_each(offsets.begin(), offsets.end(),
+ [this, &t, &refcnt] (auto &laddr) {
+ return dec_ref(t, laddr).safe_then([&refcnt] (auto ref) {
+ refcnt.push_back(ref);
+ });
+ }).safe_then([&refcnt] {
+ return ref_ertr::make_ready_future<std::list<unsigned>>(std::move(refcnt));
+ });
+ });
+}
+
TransactionManager::submit_transaction_ertr::future<>
TransactionManager::submit_transaction(
TransactionRef t)
#include <functional>
#include <boost/intrusive_ptr.hpp>
+#include <boost/iterator/counting_iterator.hpp>
#include <boost/smart_ptr/intrusive_ref_counter.hpp>
#include <seastar/core/future.hh>
Transaction &t,
laddr_t offset);
+ /// remove refcount for list of offset
+ using refs_ret = ref_ertr::future<std::list<unsigned>>;
+ refs_ret dec_ref(
+ Transaction &t,
+ std::list<laddr_t> offsets);
+
/**
* alloc_extent
*
});
}
+ /* alloc_extents
+ *
+ * allocates more than one new blocks of type T.
+ */
+ using alloc_extents_ertr = alloc_extent_ertr;
+ template<class T>
+ alloc_extents_ertr::future<std::vector<TCachedExtentRef<T>>>
+ alloc_extents(
+ Transaction &t,
+ laddr_t hint,
+ extent_len_t len,
+ int num) {
+ return seastar::do_with(std::vector<TCachedExtentRef<T>>(),
+ [this, &t, hint, len, num] (auto &extents) {
+ return crimson::do_for_each(
+ boost::make_counting_iterator(0),
+ boost::make_counting_iterator(num),
+ [this, &t, len, hint, &extents] (auto i) {
+ return alloc_extent<T>(t, hint, len).safe_then(
+ [&extents](auto &&node) {
+ extents.push_back(node);
+ });
+ }).safe_then([&extents] {
+ return alloc_extents_ertr::make_ready_future
+ <std::vector<TCachedExtentRef<T>>>(std::move(extents));
+ });
+ });
+ }
+
/**
* submit_transaction
*
${CMAKE_DL_LIBS}
crimson-seastore)
+add_executable(unittest_omap_manager
+ test_omap_manager.cc
+ ../gtest_seastar.cc)
+add_ceph_unittest(unittest_omap_manager)
+target_link_libraries(
+ unittest_omap_manager
+ ${CMAKE_DL_LIBS}
+ crimson-seastore)
+
add_subdirectory(onode_tree)
--- /dev/null
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "test/crimson/gtest_seastar.h"
+
+#include "test/crimson/seastore/transaction_manager_test_state.h"
+
+#include "crimson/os/seastore/cache.h"
+#include "crimson/os/seastore/transaction_manager.h"
+#include "crimson/os/seastore/segment_manager.h"
+#include "crimson/os/seastore/omap_manager.h"
+
+#include "test/crimson/seastore/test_block.h"
+
+using namespace crimson;
+using namespace crimson::os;
+using namespace crimson::os::seastore;
+using namespace std;
+
+namespace {
+ [[maybe_unused]] seastar::logger& logger() {
+ return crimson::get_logger(ceph_subsys_test);
+ }
+}
+
+struct omap_manager_test_t :
+ public seastar_test_suite_t,
+ TMTestState {
+
+ OMapManagerRef omap_manager;
+
+ omap_manager_test_t() {}
+
+ seastar::future<> set_up_fut() final {
+ return tm_setup().then([this] {
+ omap_manager = omap_manager::create_omap_manager(*tm);
+ return seastar::now();
+ });
+ }
+
+ seastar::future<> tear_down_fut() final {
+ return tm_teardown().then([this] {
+ omap_manager.reset();
+ return seastar::now();
+ });
+ }
+
+ using test_omap_t = std::map<std::string, std::string>;
+ test_omap_t test_omap_mappings;
+
+ bool set_key(
+ omap_root_t &omap_root,
+ Transaction &t,
+ string &key,
+ string &val) {
+ auto ret = omap_manager->omap_set_key(omap_root, t, key, val).unsafe_get0();
+ EXPECT_EQ(ret, true);
+ test_omap_mappings[key] = val;
+ return ret;
+ }
+
+ std::pair<string, string> get_value(
+ omap_root_t &omap_root,
+ Transaction &t,
+ const string &key) {
+ auto ret = omap_manager->omap_get_value(omap_root, t, key).unsafe_get0();
+ EXPECT_EQ(key, ret.first);
+ return ret;
+ }
+
+ bool rm_key(
+ omap_root_t &omap_root,
+ Transaction &t,
+ const string &key) {
+ auto ret = omap_manager->omap_rm_key(omap_root, t, key).unsafe_get0();
+ EXPECT_EQ(ret, true);
+ test_omap_mappings.erase(test_omap_mappings.find(key));
+ return ret;
+ }
+
+ list_keys_result_t list_keys(
+ omap_root_t &omap_root,
+ Transaction &t,
+ std::string &start,
+ size_t max = MAX_SIZE) {
+ auto ret = omap_manager->omap_list_keys(omap_root, t, start, max).unsafe_get0();
+ if (start == "" && max == MAX_SIZE) {
+ EXPECT_EQ(test_omap_mappings.size(), ret.keys.size());
+ for ( auto &i : ret.keys) {
+ auto it = test_omap_mappings.find(i);
+ EXPECT_NE(it, test_omap_mappings.end());
+ EXPECT_EQ(i, it->first);
+ }
+ } else {
+ size_t i =0;
+ auto it = test_omap_mappings.find(start);
+ for (; it != test_omap_mappings.end() && i < max; it++) {
+ EXPECT_EQ(ret.keys[i], it->first);
+ i++;
+ }
+ if (it == test_omap_mappings.end()) {
+ EXPECT_EQ(ret.next, "");
+ } else {
+ EXPECT_EQ(ret.keys.size(), max);
+ EXPECT_EQ(ret.next, it->first);
+ }
+ }
+ return ret;
+ }
+
+ list_kvs_result_t list(
+ omap_root_t &omap_root,
+ Transaction &t,
+ std::string &start,
+ size_t max = MAX_SIZE) {
+ auto ret = omap_manager->omap_list(omap_root, t, start, max).unsafe_get0();
+ if (start == "" && max == MAX_SIZE) {
+ EXPECT_EQ(test_omap_mappings.size(), ret.kvs.size());
+ for ( auto &i : ret.kvs) {
+ auto it = test_omap_mappings.find(i.first);
+ EXPECT_NE(it, test_omap_mappings.end());
+ EXPECT_EQ(i.second, it->second);
+ }
+ } else {
+ size_t i = 0;
+ auto it = test_omap_mappings.find(start);
+ for (; it != test_omap_mappings.end() && i < max; it++) {
+ EXPECT_EQ(ret.kvs[i].first, it->first);
+ i++;
+ }
+ if (it == test_omap_mappings.end()) {
+ EXPECT_EQ(ret.next, "");
+ } else {
+ EXPECT_EQ(ret.kvs.size(), max);
+ EXPECT_EQ(ret.next, it->first);
+ }
+ }
+
+ return ret;
+ }
+
+ void clear(
+ omap_root_t &omap_root,
+ Transaction &t) {
+ omap_manager->omap_clear(omap_root, t).unsafe_get0();
+ EXPECT_EQ(omap_root.omap_root_laddr, L_ADDR_NULL);
+ }
+
+ void check_mappings(omap_root_t &omap_root, Transaction &t) {
+ for (const auto &i: test_omap_mappings){
+ auto ret = get_value(omap_root, t, i.first);
+ EXPECT_EQ(i.first, ret.first);
+ EXPECT_EQ(i.second, ret.second);
+ }
+ }
+
+ void check_mappings(omap_root_t &omap_root) {
+ auto t = tm->create_transaction();
+ check_mappings(omap_root, *t);
+ }
+
+ void replay() {
+ logger().debug("{}: begin", __func__);
+ tm->close().unsafe_get();
+ destroy();
+ static_cast<segment_manager::EphemeralSegmentManager*>(&*segment_manager)->remount();
+ init();
+ tm->mount().unsafe_get();
+ omap_manager = omap_manager::create_omap_manager(*tm);
+ logger().debug("{}: end", __func__);
+ }
+};
+
+char* rand_string(char* str, const int len)
+{
+ int i;
+ for (i = 0; i < len; ++i) {
+ switch (rand() % 3) {
+ case 1:
+ str[i] = 'A' + rand() % 26;
+ break;
+ case 2:
+ str[i] = 'a' +rand() % 26;
+ break;
+ case 0:
+ str[i] = '0' + rand() % 10;
+ break;
+ }
+ }
+ str[len] = '\0';
+ return str;
+}
+
+TEST_F(omap_manager_test_t, basic)
+{
+ run_async([this] {
+ omap_root_t omap_root(0, L_ADDR_NULL);
+ {
+ auto t = tm->create_transaction();
+ omap_root = omap_manager->initialize_omap(*t).unsafe_get0();
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+
+ string key = "owner";
+ string val = "test";
+ {
+ auto t = tm->create_transaction();
+ logger().debug("first transaction");
+ [[maybe_unused]] auto setret = set_key(omap_root, *t, key, val);
+ [[maybe_unused]] auto getret = get_value(omap_root, *t, key);
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+ {
+ auto t = tm->create_transaction();
+ logger().debug("second transaction");
+ [[maybe_unused]] auto getret = get_value(omap_root, *t, key);
+ [[maybe_unused]] auto rmret = rm_key(omap_root, *t, key);
+ [[maybe_unused]] auto getret2 = get_value(omap_root, *t, key);
+ EXPECT_EQ(getret2.second, "");
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+ {
+ auto t = tm->create_transaction();
+ logger().debug("third transaction");
+ [[maybe_unused]] auto getret = get_value(omap_root, *t, key);
+ EXPECT_EQ(getret.second, "");
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+ });
+}
+
+TEST_F(omap_manager_test_t, force_leafnode_split)
+{
+ run_async([this] {
+ omap_root_t omap_root(0, L_ADDR_NULL);
+ {
+ auto t = tm->create_transaction();
+ omap_root = omap_manager->initialize_omap(*t).unsafe_get0();
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+ const int STR_LEN = 50;
+ char str[STR_LEN + 1];
+ for (unsigned i = 0; i < 40; i++) {
+ auto t = tm->create_transaction();
+ logger().debug("opened transaction");
+ for (unsigned j = 0; j < 10; ++j) {
+ string key(rand_string(str, rand() % STR_LEN));
+ string val(rand_string(str, rand() % STR_LEN));
+ [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val);
+ if ((i % 20 == 0) && (j == 5)) {
+ check_mappings(omap_root, *t);
+ }
+ }
+ logger().debug("force split submit transaction i = {}", i);
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ check_mappings(omap_root);
+ }
+ });
+}
+
+TEST_F(omap_manager_test_t, force_leafnode_split_merge)
+{
+ run_async([this] {
+ omap_root_t omap_root(0, L_ADDR_NULL);
+ {
+ auto t = tm->create_transaction();
+ omap_root = omap_manager->initialize_omap(*t).unsafe_get0();
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+ const int STR_LEN = 50;
+ char str[STR_LEN + 1];
+
+ for (unsigned i = 0; i < 80; i++) {
+ auto t = tm->create_transaction();
+ logger().debug("opened split_merge transaction");
+ for (unsigned j = 0; j < 5; ++j) {
+ string key(rand_string(str, rand() % STR_LEN));
+ string val(rand_string(str, rand() % STR_LEN));
+ [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val);
+ if ((i % 10 == 0) && (j == 3)) {
+ check_mappings(omap_root, *t);
+ }
+ }
+ logger().debug("submitting transaction");
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ if (i % 50 == 0) {
+ check_mappings(omap_root);
+ }
+ }
+ auto t = tm->create_transaction();
+ int i = 0;
+ for (auto &e: test_omap_mappings) {
+ if (i % 3 != 0) {
+ [[maybe_unused]] auto rmref= rm_key(omap_root, *t, e.first);
+ }
+
+ if (i % 10 == 0) {
+ logger().debug("submitting transaction i= {}", i);
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ t = tm->create_transaction();
+ }
+ if (i % 100 == 0) {
+ logger().debug("check_mappings i= {}", i);
+ check_mappings(omap_root, *t);
+ check_mappings(omap_root);
+ }
+ i++;
+ }
+ logger().debug("finally submitting transaction ");
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ });
+}
+
+TEST_F(omap_manager_test_t, force_leafnode_split_merge_fullandbalanced)
+{
+ run_async([this] {
+ omap_root_t omap_root(0, L_ADDR_NULL);
+ {
+ auto t = tm->create_transaction();
+ omap_root = omap_manager->initialize_omap(*t).unsafe_get0();
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+ const int STR_LEN = 50;
+ char str[STR_LEN + 1];
+
+ for (unsigned i = 0; i < 50; i++) {
+ auto t = tm->create_transaction();
+ logger().debug("opened split_merge transaction");
+ for (unsigned j = 0; j < 5; ++j) {
+ string key(rand_string(str, rand() % STR_LEN));
+ string val(rand_string(str, rand() % STR_LEN));
+ [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val);
+ if ((i % 10 == 0) && (j == 3)) {
+ check_mappings(omap_root, *t);
+ }
+ }
+ logger().debug("submitting transaction");
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ if (i % 50 == 0) {
+ check_mappings(omap_root);
+ }
+ }
+ auto t = tm->create_transaction();
+ int i = 0;
+ for (auto &e: test_omap_mappings) {
+ if (30 < i && i < 100) {
+ auto val = e;
+ [[maybe_unused]] auto rmref= rm_key(omap_root, *t, e.first);
+ }
+
+ if (i % 10 == 0) {
+ logger().debug("submitting transaction i= {}", i);
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ t = tm->create_transaction();
+ }
+ if (i % 50 == 0) {
+ logger().debug("check_mappings i= {}", i);
+ check_mappings(omap_root, *t);
+ check_mappings(omap_root);
+ }
+ i++;
+ if (i == 100)
+ break;
+ }
+ logger().debug("finally submitting transaction ");
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ check_mappings(omap_root);
+ });
+}
+
+
+TEST_F(omap_manager_test_t, force_split_listkeys_list_clear)
+{
+ run_async([this] {
+ omap_root_t omap_root(0, L_ADDR_NULL);
+ {
+ auto t = tm->create_transaction();
+ omap_root = omap_manager->initialize_omap(*t).unsafe_get0();
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+ const int STR_LEN = 300;
+ char str[STR_LEN + 1];
+ string temp;
+ for (unsigned i = 0; i < 40; i++) {
+ auto t = tm->create_transaction();
+ logger().debug("opened transaction");
+ for (unsigned j = 0; j < 10; ++j) {
+ string key(rand_string(str, rand() % STR_LEN));
+ string val(rand_string(str, rand() % STR_LEN));
+ [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val);
+ if (i == 10)
+ temp = key;
+ if ((i % 20 == 0) && (j == 5)) {
+ check_mappings(omap_root, *t);
+ }
+ }
+ logger().debug("force split submit transaction i = {}", i);
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ check_mappings(omap_root);
+ }
+ std::string empty = "";
+ auto t = tm->create_transaction();
+ [[maybe_unused]] auto keys = list_keys(omap_root, *t, empty);
+ tm->submit_transaction(std::move(t)).unsafe_get();
+
+ t = tm->create_transaction();
+ keys = list_keys(omap_root, *t, temp, 100);
+ tm->submit_transaction(std::move(t)).unsafe_get();
+
+ t = tm->create_transaction();
+ [[maybe_unused]] auto ls = list(omap_root, *t, empty);
+ tm->submit_transaction(std::move(t)).unsafe_get();
+
+ t = tm->create_transaction();
+ ls = list(omap_root, *t, temp, 100);
+ tm->submit_transaction(std::move(t)).unsafe_get();
+
+ t = tm->create_transaction();
+ clear(omap_root, *t);
+ tm->submit_transaction(std::move(t)).unsafe_get();
+
+ });
+}
+
+TEST_F(omap_manager_test_t, internal_force_split)
+{
+ run_async([this] {
+ omap_root_t omap_root(0, L_ADDR_NULL);
+ {
+ auto t = tm->create_transaction();
+ omap_root = omap_manager->initialize_omap(*t).unsafe_get0();
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+ const int STR_LEN = 300;
+ char str[STR_LEN + 1];
+ for (unsigned i = 0; i < 10; i++) {
+ logger().debug("opened split transaction");
+ auto t = tm->create_transaction();
+
+ for (unsigned j = 0; j < 80; ++j) {
+ string key(rand_string(str, rand() % STR_LEN));
+ string val(rand_string(str, rand() % STR_LEN));
+ [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val);
+ if ((i % 2 == 0) && (j % 50 == 0)) {
+ check_mappings(omap_root, *t);
+ }
+ }
+ logger().debug("submitting transaction i = {}", i);
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+ check_mappings(omap_root);
+ });
+}
+
+TEST_F(omap_manager_test_t, internal_force_merge_fullandbalanced)
+{
+ run_async([this] {
+ omap_root_t omap_root(0, L_ADDR_NULL);
+ {
+ auto t = tm->create_transaction();
+ omap_root = omap_manager->initialize_omap(*t).unsafe_get0();
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+ const int STR_LEN = 300;
+ char str[STR_LEN + 1];
+
+ for (unsigned i = 0; i < 8; i++) {
+ logger().debug("opened split transaction");
+ auto t = tm->create_transaction();
+
+ for (unsigned j = 0; j < 80; ++j) {
+ string key(rand_string(str, rand() % STR_LEN));
+ string val(rand_string(str, rand() % STR_LEN));
+ [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val);
+ if ((i % 2 == 0) && (j % 50 == 0)) {
+ check_mappings(omap_root, *t);
+ }
+ }
+ logger().debug("submitting transaction");
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+ auto t = tm->create_transaction();
+ int i = 0;
+ for (auto &e: test_omap_mappings) {
+ auto val = e;
+ [[maybe_unused]] auto rmref= rm_key(omap_root, *t, e.first);
+
+ if (i % 10 == 0) {
+ logger().debug("submitting transaction i= {}", i);
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ t = tm->create_transaction();
+ }
+ if (i % 50 == 0) {
+ logger().debug("check_mappings i= {}", i);
+ check_mappings(omap_root, *t);
+ check_mappings(omap_root);
+ }
+ i++;
+ }
+ logger().debug("finally submitting transaction ");
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ check_mappings(omap_root);
+ });
+}
+
+TEST_F(omap_manager_test_t, replay)
+{
+ run_async([this] {
+ omap_root_t omap_root(0, L_ADDR_NULL);
+ {
+ auto t = tm->create_transaction();
+ omap_root = omap_manager->initialize_omap(*t).unsafe_get0();
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ replay();
+ }
+ const int STR_LEN = 300;
+ char str[STR_LEN + 1];
+
+ for (unsigned i = 0; i < 8; i++) {
+ logger().debug("opened split transaction");
+ auto t = tm->create_transaction();
+
+ for (unsigned j = 0; j < 80; ++j) {
+ string key(rand_string(str, rand() % STR_LEN));
+ string val(rand_string(str, rand() % STR_LEN));
+ [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val);
+ if ((i % 2 == 0) && (j % 50 == 0)) {
+ check_mappings(omap_root, *t);
+ }
+ }
+ logger().debug("submitting transaction i = {}", i);
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+ replay();
+ check_mappings(omap_root);
+
+ auto t = tm->create_transaction();
+ int i = 0;
+ for (auto &e: test_omap_mappings) {
+ auto val = e;
+ [[maybe_unused]] auto rmref= rm_key(omap_root, *t, e.first);
+
+ if (i % 10 == 0) {
+ logger().debug("submitting transaction i= {}", i);
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ replay();
+ t = tm->create_transaction();
+ }
+ if (i % 50 == 0) {
+ logger().debug("check_mappings i= {}", i);
+ check_mappings(omap_root, *t);
+ check_mappings(omap_root);
+ }
+ i++;
+ }
+ logger().debug("finally submitting transaction ");
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ replay();
+ check_mappings(omap_root);
+ });
+}
+
+
+TEST_F(omap_manager_test_t, internal_force_split_to_root)
+{
+ run_async([this] {
+ omap_root_t omap_root(0, L_ADDR_NULL);
+ {
+ auto t = tm->create_transaction();
+ omap_root = omap_manager->initialize_omap(*t).unsafe_get0();
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+ const int STR_LEN = 300;
+ char str[STR_LEN + 1];
+
+ logger().debug("set big keys");
+ for (unsigned i = 0; i < 53; i++) {
+ auto t = tm->create_transaction();
+
+ for (unsigned j = 0; j < 8; ++j) {
+ string key(rand_string(str, STR_LEN));
+ string val(rand_string(str, STR_LEN));
+ [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val);
+ }
+ logger().debug("submitting transaction i = {}", i);
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+ logger().debug("set small keys");
+ const int STR_LEN_2 = 100;
+ char str_2[STR_LEN_2 + 1];
+ for (unsigned i = 0; i < 100; i++) {
+ auto t = tm->create_transaction();
+
+ for (unsigned j = 0; j < 8; ++j) {
+ string key(rand_string(str_2, STR_LEN_2));
+ string val(rand_string(str_2, STR_LEN_2));
+ [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val);
+ }
+ logger().debug("submitting transaction last");
+ tm->submit_transaction(std::move(t)).unsafe_get();
+ }
+ check_mappings(omap_root);
+ });
+}