From: chunmei-liu Date: Thu, 17 Dec 2020 01:20:44 +0000 (-0800) Subject: crimson/seastore: add omap tree implementation X-Git-Tag: v17.0.0~31^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b414d7b0277b39678d99e5a3a0beb886a91c4228;p=ceph.git crimson/seastore: add omap tree implementation Signed-off-by: chunmei-liu --- diff --git a/src/crimson/os/seastore/CMakeLists.txt b/src/crimson/os/seastore/CMakeLists.txt index 77f8465cf9a6..fd8ff393fef5 100644 --- a/src/crimson/os/seastore/CMakeLists.txt +++ b/src/crimson/os/seastore/CMakeLists.txt @@ -11,6 +11,9 @@ add_library(crimson-seastore STATIC lba_manager/btree/btree_lba_manager.cc lba_manager/btree/lba_btree_node_impl.cc lba_manager/btree/btree_range_pin.cc + omap_manager.cc + omap_manager/btree/btree_omap_manager.cc + omap_manager/btree/omap_btree_node_impl.cc onode.cc onode_manager/simple-fltree/onode_block.cc onode_manager/simple-fltree/onode_delta.cc diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 6a406c1b85a0..76f5008485ef 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -7,6 +7,7 @@ // included for get_extent_by_type #include "crimson/os/seastore/extentmap_manager/btree/extentmap_btree_node_impl.h" #include "crimson/os/seastore/lba_manager/btree/lba_btree_node_impl.h" +#include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h" #include "crimson/os/seastore/onode_manager/simple-fltree/onode_block.h" #include "crimson/os/seastore/onode_manager/staged-fltree/node_extent_manager/seastore.h" #include "test/crimson/seastore/test_block.h" @@ -136,6 +137,10 @@ CachedExtentRef Cache::alloc_new_extent_by_type( return alloc_new_extent(t, length); case extent_types_t::EXTMAP_LEAF: return alloc_new_extent(t, length); + case extent_types_t::OMAP_INNER: + return alloc_new_extent(t, length); + case extent_types_t::OMAP_LEAF: + return alloc_new_extent(t, length); case extent_types_t::TEST_BLOCK: return alloc_new_extent(t, length); case extent_types_t::TEST_BLOCK_PHYSICAL: @@ -501,6 +506,16 @@ Cache::get_extent_ertr::future Cache::get_extent_by_type( ).safe_then([](auto extent) { return CachedExtentRef(extent.detach(), false /* add_ref */); }); + case extent_types_t::OMAP_INNER: + return get_extent(offset, length + ).safe_then([](auto extent) { + return CachedExtentRef(extent.detach(), false /* add_ref */); + }); + case extent_types_t::OMAP_LEAF: + return get_extent(offset, length + ).safe_then([](auto extent) { + return CachedExtentRef(extent.detach(), false /* add_ref */); + }); case extent_types_t::ONODE_BLOCK: return get_extent(offset, length ).safe_then([](auto extent) { diff --git a/src/crimson/os/seastore/omap_manager.cc b/src/crimson/os/seastore/omap_manager.cc new file mode 100644 index 000000000000..f4c3ff032a72 --- /dev/null +++ b/src/crimson/os/seastore/omap_manager.cc @@ -0,0 +1,43 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#include +#include + +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/omap_manager.h" +#include "crimson/os/seastore/omap_manager/btree/btree_omap_manager.h" + +namespace crimson::os::seastore::omap_manager { + +OMapManagerRef create_omap_manager( + TransactionManager &trans_manager) { + return OMapManagerRef(new BtreeOMapManager(trans_manager)); +} + +} + +namespace std { +std::ostream &operator<<(std::ostream &out, const std::pair &rhs) +{ + return out << "key_value_map (" << rhs.first<< "->" << rhs.second << ")"; +} +} + +namespace crimson::os::seastore { + +std::ostream &operator<<(std::ostream &out, const std::list &rhs) +{ + out << '['; + std::copy(std::begin(rhs), std::end(rhs), std::experimental::make_ostream_joiner(out, ", ")); + return out << ']'; +} + +std::ostream &operator<<(std::ostream &out, const std::vector> &rhs) +{ + out << '['; + std::ostream_iterator> out_it(out, ", "); + std::copy(rhs.begin(), rhs.end(), out_it); + return out << ']'; +} + +} diff --git a/src/crimson/os/seastore/omap_manager.h b/src/crimson/os/seastore/omap_manager.h new file mode 100644 index 000000000000..6725bc031ed3 --- /dev/null +++ b/src/crimson/os/seastore/omap_manager.h @@ -0,0 +1,152 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include + +#include +#include + +#include + +#include "crimson/osd/exceptions.h" +#include "crimson/os/seastore/seastore_types.h" +#include "crimson/os/seastore/transaction_manager.h" + +#define OMAP_BLOCK_SIZE 4096 + +namespace crimson::os::seastore { + +enum class omap_root_state_t : uint8_t { + INITIAL = 0, + MUTATED = 1, + NONE = 0xFF +}; + +struct omap_root_t { + depth_t depth = 0; + omap_root_state_t state; + laddr_t omap_root_laddr; + omap_root_t(depth_t dep, laddr_t laddr) + : depth(dep), + omap_root_laddr(laddr) { state = omap_root_state_t::INITIAL; } +}; + +struct list_keys_result_t { + std::vector keys; + std::string next; +}; + +struct list_kvs_result_t { + std::vector> kvs; + std::string next; +}; +constexpr size_t MAX_SIZE = std::numeric_limits::max(); +std::ostream &operator<<(std::ostream &out, const std::list &rhs); +std::ostream &operator<<(std::ostream &out, const std::map &rhs); + +class OMapManager { + /* all OMapManager API use reference to transfer input string parameters, + * the upper caller should guarantee the referenced string values alive (not freed) + * until these functions future resolved. + */ +public: + /* allocate omap tree root node + * + * input: Transaction &t, current transaction + * return: return the omap_root_t structure. + */ + using initialize_omap_ertr = TransactionManager::alloc_extent_ertr; + using initialize_omap_ret = initialize_omap_ertr::future; + virtual initialize_omap_ret initialize_omap(Transaction &t) = 0; + + /*get value(string) by key(string) + * + * input: omap_root_t omap_root, omap btree root information + * input: Transaction &t, current transaction + * input: string &key, omap string key + * return: string key->string value mapping pair. + */ + using omap_get_value_ertr = TransactionManager::read_extent_ertr; + using omap_get_value_ret = omap_get_value_ertr::future>; + virtual omap_get_value_ret omap_get_value(omap_root_t &omap_root, Transaction &t, + const std::string &key) = 0; + + /* set key value mapping in omap + * + * input: omap_root_t &omap_root, omap btree root information + * input: Transaction &t, current transaction + * input: string &key, omap string key + * input: string &value, mapped value corresponding key + * return: mutation_result_t, status should be success. + */ + using omap_set_key_ertr = TransactionManager::read_extent_ertr; + using omap_set_key_ret = omap_set_key_ertr::future; + virtual omap_set_key_ret omap_set_key(omap_root_t &omap_root, Transaction &t, + const std::string &key, const std::string &value) = 0; + + /* remove key value mapping in omap tree + * + * input: omap_root_t &omap_root, omap btree root information + * input: Transaction &t, current transaction + * input: string &key, omap string key + * return: remove success return true, else return false. + */ + using omap_rm_key_ertr = TransactionManager::read_extent_ertr; + using omap_rm_key_ret = omap_rm_key_ertr::future; + virtual omap_rm_key_ret omap_rm_key(omap_root_t &omap_root, Transaction &t, + const std::string &key) = 0; + + /* get all keys or partial keys in omap tree + * + * input: omap_root_t &omap_root, omap btree root information + * input: Transaction &t, current transaction + * input: string &start, the list keys range begin from start, + * if start is "", list from the first omap key + * input: max_result_size, the number of list keys, + * it it is not set, list all keys after string start + * return: list_keys_result_t, listed keys and next key + */ + using omap_list_keys_ertr = TransactionManager::read_extent_ertr; + using omap_list_keys_ret = omap_list_keys_ertr::future; + virtual omap_list_keys_ret omap_list_keys(omap_root_t &omap_root, Transaction &t, + std::string &start, + size_t max_result_size = MAX_SIZE) = 0; + + /* Get all or partial key-> value mapping in omap tree + * + * input: omap_root_t &omap_root, omap btree root information + * input: Transaction &t, current transaction + * input: string &start, the list keys range begin from start, + * if start is "" , list from the first omap key + * input: max_result_size, the number of list keys, + * it it is not set, list all keys after string start. + * return: list_kvs_result_t, listed key->value mapping and next key. + */ + using omap_list_ertr = TransactionManager::read_extent_ertr; + using omap_list_ret = omap_list_ertr::future; + virtual omap_list_ret omap_list(omap_root_t &omap_root, Transaction &t, + std::string &start, + size_t max_result_size = MAX_SIZE) = 0; + + /* clear all omap tree key->value mapping + * + * input: omap_root_t &omap_root, omap btree root information + * input: Transaction &t, current transaction + */ + using omap_clear_ertr = TransactionManager::read_extent_ertr; + using omap_clear_ret = omap_clear_ertr::future<>; + virtual omap_clear_ret omap_clear(omap_root_t &omap_root, Transaction &t) = 0; + + virtual ~OMapManager() {} +}; +using OMapManagerRef = std::unique_ptr; + +namespace omap_manager { + +OMapManagerRef create_omap_manager ( + TransactionManager &trans_manager); +} + +} diff --git a/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc b/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc new file mode 100644 index 000000000000..877d192089f8 --- /dev/null +++ b/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.cc @@ -0,0 +1,188 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include + +#include "crimson/common/log.h" + +#include "crimson/os/seastore/seastore_types.h" +#include "crimson/os/seastore/omap_manager/btree/btree_omap_manager.h" +#include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h" + +namespace { + seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } +} + +namespace crimson::os::seastore::omap_manager { + +BtreeOMapManager::BtreeOMapManager( + TransactionManager &tm) + : tm(tm) {} + +BtreeOMapManager::initialize_omap_ret +BtreeOMapManager::initialize_omap(Transaction &t) +{ + + logger().debug("{}", __func__); + return tm.alloc_extent(t, L_ADDR_MIN, OMAP_BLOCK_SIZE) + .safe_then([this](auto&& root_extent) { + root_extent->set_size(0); + omap_node_meta_t meta{1}; + root_extent->set_meta(meta); + omap_root_t omap_root = omap_root_t(1, root_extent->get_laddr()); + return initialize_omap_ertr::make_ready_future(omap_root); + }); +} + +BtreeOMapManager::get_root_ret +BtreeOMapManager::get_omap_root(omap_root_t &omap_root, Transaction &t) +{ + assert(omap_root.omap_root_laddr != L_ADDR_NULL); + laddr_t laddr = omap_root.omap_root_laddr; + return omap_load_extent(get_omap_context(omap_root, t), laddr, omap_root.depth); +} + +BtreeOMapManager::handle_root_split_ret +BtreeOMapManager::handle_root_split(omap_context_t oc, OMapNode::mutation_result_t mresult) +{ + return oc.tm.alloc_extent(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE) + .safe_then([oc, mresult](auto&& nroot) { + auto [left, right, pivot] = *(mresult.split_tuple); + omap_node_meta_t meta{oc.omap_root.depth + 1}; + nroot->set_meta(meta); + nroot->journal_inner_insert(nroot->iter_begin(), left->get_laddr(), + "", nroot->maybe_get_delta_buffer()); + nroot->journal_inner_insert(nroot->iter_begin() + 1, right->get_laddr(), + pivot, nroot->maybe_get_delta_buffer()); + oc.omap_root.omap_root_laddr = nroot->get_laddr(); + oc.omap_root.depth += 1; + oc.omap_root.state = omap_root_state_t::MUTATED; + return handle_root_split_ertr::make_ready_future(true); + }); +} + +BtreeOMapManager::handle_root_merge_ret +BtreeOMapManager::handle_root_merge(omap_context_t oc, OMapNode::mutation_result_t mresult) +{ + auto root = *(mresult.need_merge); + auto iter = root->cast()->iter_begin(); + oc.omap_root.omap_root_laddr = iter->get_node_key().laddr; + oc.omap_root.depth -= 1; + oc.omap_root.state = omap_root_state_t::MUTATED; + return oc.tm.dec_ref(oc.t, root->get_laddr()).safe_then([] (auto &&ret) { + return handle_root_merge_ertr::make_ready_future(true); + }); +} + + +BtreeOMapManager::omap_get_value_ret +BtreeOMapManager::omap_get_value(omap_root_t &omap_root, Transaction &t, + const std::string &key) +{ + logger().debug("{}: {}", __func__, key); + return get_omap_root(omap_root, t).safe_then([this, &omap_root, &t, &key](auto&& extent) { + return extent->get_value(get_omap_context(omap_root, t), key); + }).safe_then([](auto &&e) { + logger().debug("{}: {} -> {}", __func__, e.first, e.second); + return omap_get_value_ret( + omap_get_value_ertr::ready_future_marker{}, + std::move(e)); + }); + +} + +BtreeOMapManager::omap_set_key_ret +BtreeOMapManager::omap_set_key(omap_root_t &omap_root, Transaction &t, + const std::string &key, const std::string &value) +{ + logger().debug("{}: {} -> {}", __func__, key, value); + return get_omap_root(omap_root, t).safe_then([this, &omap_root, &t, &key, &value](auto root) { + return root->insert(get_omap_context(omap_root, t), key, value); + }).safe_then([this, &omap_root, &t](auto mresult) { + if (mresult.status == mutation_status_t::SUCCESS) + return omap_set_key_ertr::make_ready_future(true); + else if (mresult.status == mutation_status_t::SPLITTED) + return handle_root_split(get_omap_context(omap_root, t), mresult); + else + return omap_set_key_ertr::make_ready_future(false); + + }); + +} + +BtreeOMapManager::omap_rm_key_ret +BtreeOMapManager::omap_rm_key(omap_root_t &omap_root, Transaction &t, const std::string &key) +{ + logger().debug("{}: {}", __func__, key); + return get_omap_root(omap_root, t).safe_then([this, &omap_root, &t, &key](auto root) { + return root->rm_key(get_omap_context(omap_root, t), key); + }).safe_then([this, &omap_root, &t](auto mresult) { + if (mresult.status == mutation_status_t::SUCCESS) + return omap_rm_key_ertr::make_ready_future(true); + else if (mresult.status == mutation_status_t::SPLITTED) + return handle_root_split(get_omap_context(omap_root, t), mresult); + else if (mresult.status == mutation_status_t::NEED_MERGE) { + auto root = *(mresult.need_merge); + if (root->get_node_size() == 1 && omap_root.depth != 1) + return handle_root_merge(get_omap_context(omap_root, t), mresult); + else + return omap_rm_key_ertr::make_ready_future(true); + } + else + return omap_rm_key_ertr::make_ready_future(false); + }); + +} + +BtreeOMapManager::omap_list_keys_ret +BtreeOMapManager::omap_list_keys(omap_root_t &omap_root, Transaction &t, + std::string &start, size_t max_result_size) +{ + logger().debug("{}", __func__); + return get_omap_root(omap_root, t).safe_then([this, &omap_root, &t, &start, + max_result_size] (auto extent) { + return extent->list_keys(get_omap_context(omap_root, t), start, max_result_size) + .safe_then([](auto &&result) { + return omap_list_keys_ret( + omap_list_keys_ertr::ready_future_marker{}, + std::move(result)); + }); + }); + +} + +BtreeOMapManager::omap_list_ret +BtreeOMapManager::omap_list(omap_root_t &omap_root, Transaction &t, + std::string &start, size_t max_result_size) +{ + logger().debug("{}", __func__); + return get_omap_root(omap_root, t).safe_then([this, &omap_root, &t, &start, max_result_size] + (auto extent) { + return extent->list(get_omap_context(omap_root, t), start, max_result_size) + .safe_then([](auto &&result) { + return omap_list_ret( + omap_list_ertr::ready_future_marker{}, + std::move(result)); + }); + }); +} + +BtreeOMapManager::omap_clear_ret +BtreeOMapManager::omap_clear(omap_root_t &omap_root, Transaction &t) +{ + logger().debug("{}", __func__); + return get_omap_root(omap_root, t).safe_then([this, &omap_root, &t](auto extent) { + return extent->clear(get_omap_context(omap_root, t)); + }).safe_then([this, &omap_root, &t] { + return tm.dec_ref(t, omap_root.omap_root_laddr).safe_then([&omap_root] (auto ret) { + omap_root.state = omap_root_state_t::MUTATED; + omap_root.depth = 0; + omap_root.omap_root_laddr = L_ADDR_NULL; + return omap_clear_ertr::now(); + }); + }); +} + +} diff --git a/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.h b/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.h new file mode 100644 index 000000000000..d1601bad1169 --- /dev/null +++ b/src/crimson/os/seastore/omap_manager/btree/btree_omap_manager.h @@ -0,0 +1,85 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once +#include +#include +#include + +#include "include/ceph_assert.h" +#include "crimson/osd/exceptions.h" + +#include "crimson/os/seastore/omap_manager.h" +#include "crimson/os/seastore/omap_manager/btree/omap_btree_node.h" +#include "crimson/os/seastore/seastore_types.h" +#include "crimson/os/seastore/transaction_manager.h" + +namespace crimson::os::seastore::omap_manager { +/** + * BtreeOMapManager + * + * Uses a btree to track : + * string -> string mapping for each onode omap + */ + +class BtreeOMapManager : public OMapManager { + TransactionManager &tm; + + omap_context_t get_omap_context(omap_root_t &omap_root, Transaction &t) { + return omap_context_t{omap_root, tm, t}; + } + + /* get_omap_root + * + * load omap tree root node + */ + using get_root_ertr = TransactionManager::read_extent_ertr; + using get_root_ret = get_root_ertr::future; + get_root_ret get_omap_root(omap_root_t &omap_root, Transaction &t); + + /* handle_root_split + * + * root has been splitted and need update omap_root_t + */ + using handle_root_split_ertr = TransactionManager::read_extent_ertr; + using handle_root_split_ret = handle_root_split_ertr::future; + handle_root_split_ret handle_root_split(omap_context_t oc, + OMapNode:: mutation_result_t mresult); + + /* handle_root_merge + * + * root node has only one item and it is not leaf node, need remove a layer + */ + using handle_root_merge_ertr = TransactionManager::read_extent_ertr; + using handle_root_merge_ret = handle_root_merge_ertr::future; + handle_root_merge_ret handle_root_merge(omap_context_t oc, + OMapNode:: mutation_result_t mresult); + +public: + explicit BtreeOMapManager(TransactionManager &tm); + + initialize_omap_ret initialize_omap(Transaction &t) final; + + omap_get_value_ret omap_get_value(omap_root_t &omap_root, Transaction &t, + const std::string &key) final; + + omap_set_key_ret omap_set_key(omap_root_t &omap_root, Transaction &t, + const std::string &key, const std::string &value) final; + + omap_rm_key_ret omap_rm_key(omap_root_t &omap_root, Transaction &t, + const std::string &key) final; + + omap_list_keys_ret omap_list_keys(omap_root_t &omap_root, Transaction &t, + std::string &start, + size_t max_result_size = MAX_SIZE) final; + + omap_list_ret omap_list(omap_root_t &omap_root, Transaction &t, + std::string &start, + size_t max_result_size = MAX_SIZE) final; + + omap_clear_ret omap_clear(omap_root_t &omap_root, Transaction &t) final; + +}; +using BtreeOMapManagerRef = std::unique_ptr; + +} diff --git a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h new file mode 100644 index 000000000000..7a447bb058e5 --- /dev/null +++ b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node.h @@ -0,0 +1,100 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#pragma once + +#include +#include + +//#include + +#include "crimson/common/log.h" +#include "crimson/os/seastore/seastore_types.h" +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/omap_manager.h" +#include "crimson/os/seastore/omap_manager/btree/omap_types.h" + +namespace crimson::os::seastore::omap_manager{ + +struct omap_context_t { + omap_root_t &omap_root; + TransactionManager &tm; + Transaction &t; +}; + +enum class mutation_status_t : uint8_t { + SUCCESS = 0, + SPLITTED = 1, + NEED_MERGE = 2, + FAIL = 3 +}; + +struct OMapNode : LogicalCachedExtent { + using OMapNodeRef = TCachedExtentRef; + + struct mutation_result_t { + mutation_status_t status; + /// Only populated if SPLITTED, indicates the newly created left and right nodes + /// from splitting the target entry during insertion. + std::optional> split_tuple; + /// only sopulated if need merged, indicate which entry need be doing merge in upper layer. + std::optional need_merge; + + mutation_result_t(mutation_status_t s, std::optional> tuple, std::optional n_merge) + : status(s), + split_tuple(tuple), + need_merge(n_merge) {} + }; + + OMapNode(ceph::bufferptr &&ptr) : LogicalCachedExtent(std::move(ptr)) {} + OMapNode(const OMapNode &other) + : LogicalCachedExtent(other) {} + + using get_value_ertr = OMapManager::omap_get_value_ertr; + using get_value_ret = OMapManager::omap_get_value_ret; + virtual get_value_ret get_value(omap_context_t oc, const std::string &key) = 0; + + using insert_ertr = TransactionManager::alloc_extent_ertr; + using insert_ret = insert_ertr::future; + virtual insert_ret insert(omap_context_t oc, const std::string &key, const std::string &value) = 0; + + using rm_key_ertr = TransactionManager::alloc_extent_ertr; + using rm_key_ret = rm_key_ertr::future; + virtual rm_key_ret rm_key(omap_context_t oc, const std::string &key) = 0; + + using list_keys_ertr = OMapManager::omap_list_keys_ertr; + using list_keys_ret = OMapManager::omap_list_keys_ret; + virtual list_keys_ret list_keys(omap_context_t oc, std::string &start, + size_t max_result_size) = 0; + + using list_ertr = OMapManager::omap_list_ertr; + using list_ret = OMapManager::omap_list_ret; + virtual list_ret list(omap_context_t oc, std::string &start, size_t max_result_size) = 0; + + using clear_ertr = OMapManager::omap_clear_ertr; + using clear_ret = clear_ertr::future<>; + virtual clear_ret clear(omap_context_t oc) = 0; + + using full_merge_ertr = TransactionManager::alloc_extent_ertr; + using full_merge_ret = full_merge_ertr::future; + virtual full_merge_ret make_full_merge(omap_context_t oc, OMapNodeRef right) = 0; + + using make_balanced_ertr = TransactionManager::alloc_extent_ertr; + using make_balanced_ret = make_balanced_ertr::future + >; + virtual make_balanced_ret make_balanced(omap_context_t oc, OMapNodeRef _right) = 0; + + virtual omap_node_meta_t get_node_meta() const = 0; + virtual bool extent_will_overflow(size_t ksize, std::optional vsize) const = 0; + virtual bool extent_is_below_min() const = 0; + virtual uint32_t get_node_size() = 0; + + virtual ~OMapNode() = default; +}; + +using OMapNodeRef = OMapNode::OMapNodeRef; + +TransactionManager::read_extent_ertr::future +omap_load_extent(omap_context_t oc, laddr_t laddr, depth_t depth); + +} diff --git a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc new file mode 100644 index 000000000000..b57f66a4e3f0 --- /dev/null +++ b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.cc @@ -0,0 +1,615 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include + +#include "include/buffer.h" +#include "include/byteorder.h" +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/omap_manager/btree/omap_btree_node.h" +#include "crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h" +#include "seastar/core/thread.hh" + +namespace { + seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_filestore); + } +} + +namespace crimson::os::seastore::omap_manager { + +std::ostream &operator<<(std::ostream &out, const omap_inner_key_t &rhs) +{ + return out << "omap_inner_key (" << rhs.key_off<< " - " << rhs.key_len + << " - " << rhs.laddr << ")"; +} + +std::ostream &operator<<(std::ostream &out, const omap_leaf_key_t &rhs) +{ + return out << "omap_leaf_key_t (" << rhs.key_off<< " - " << rhs.key_len + << " "<< rhs.val_off<<" - " << rhs.val_len << ")"; +} + +std::ostream &OMapInnerNode::print_detail_l(std::ostream &out) const +{ + return out << ", size=" << get_size() + << ", depth=" << get_meta().depth; +} + +/** + * make_split_insert + * + * insert an entry at iter, with the address of key. + * will result in a split outcome encoded in the returned mutation_result_t + */ +OMapInnerNode::make_split_insert_ret +OMapInnerNode::make_split_insert(omap_context_t oc, internal_iterator_t iter, + std::string key, laddr_t laddr) +{ + return make_split_children(oc).safe_then([=] (auto tuple) { + auto [left, right, pivot] = tuple; + if (pivot > key) { + auto liter = left->iter_idx(iter.get_index()); + left->journal_inner_insert(liter, laddr, key, + left->maybe_get_delta_buffer()); + } else { //right + auto riter = right->iter_idx(iter.get_index() - left->get_node_size()); + right->journal_inner_insert(riter, laddr, key, + right->maybe_get_delta_buffer()); + } + return make_split_insert_ret( + make_split_insert_ertr::ready_future_marker{}, + mutation_result_t(mutation_status_t::SPLITTED, tuple, std::nullopt)); + }); + +} + + +OMapInnerNode::handle_split_ret +OMapInnerNode::handle_split(omap_context_t oc, internal_iterator_t iter, + mutation_result_t mresult) +{ + logger().debug("{}: {}","OMapInnerNode", __func__); + if (!is_pending()) { + auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast(); + auto mut_iter = mut->iter_idx(iter.get_index()); + return mut->handle_split(oc, mut_iter, mresult); + } + auto [left, right, pivot] = *(mresult.split_tuple); + //update will not cause overflow do it first. + journal_inner_update(iter, left->get_laddr(), maybe_get_delta_buffer()); + if (!extent_will_overflow(pivot.size() + 1, std::nullopt)) { + journal_inner_insert(iter + 1, right->get_laddr(), pivot, + maybe_get_delta_buffer()); + return insert_ret( + insert_ertr::ready_future_marker{}, + mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt)); + } else { + return make_split_insert(oc, iter + 1, pivot, right->get_laddr()) + .safe_then([this, oc] (auto m_result) { + return oc.tm.dec_ref(oc.t, get_laddr()) + .safe_then([m_result = std::move(m_result)] (auto ret) { + return insert_ret( + insert_ertr::ready_future_marker{}, + m_result); + }); + }); + } +} + +OMapInnerNode::get_value_ret +OMapInnerNode::get_value(omap_context_t oc, const std::string &key) +{ + logger().debug("{}: {} key = {}", "OMapInnerNode", __func__, key); + auto child_pt = get_containing_child(key); + auto laddr = child_pt->get_node_key().laddr; + return omap_load_extent(oc, laddr, get_meta().depth - 1).safe_then( + [oc, &key] (auto extent) { + return extent->get_value(oc, key); + }).finally([ref = OMapNodeRef(this)] {}); +} + +OMapInnerNode::insert_ret +OMapInnerNode::insert(omap_context_t oc, const std::string &key, const std::string &value) +{ + logger().debug("{}: {} {}->{}", "OMapInnerNode", __func__, key, value); + auto child_pt = get_containing_child(key); + assert(child_pt != iter_end()); + auto laddr = child_pt->get_node_key().laddr; + return omap_load_extent(oc, laddr, get_meta().depth - 1).safe_then( + [this, oc, child_pt, &key, &value] (auto extent) { + return extent->insert(oc, key, value); + }).safe_then([this, oc, child_pt] (auto mresult) { + if (mresult.status == mutation_status_t::SUCCESS) { + return insert_ertr::make_ready_future(mresult); + } else if (mresult.status == mutation_status_t::SPLITTED) { + return handle_split(oc, child_pt, mresult); + } else { + return insert_ret( + insert_ertr::ready_future_marker{}, + mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt)); + } + }); +} + +OMapInnerNode::rm_key_ret +OMapInnerNode::rm_key(omap_context_t oc, const std::string &key) +{ + logger().debug("{}: {}","OMapInnerNode", __func__); + auto child_pt = get_containing_child(key); + auto laddr = child_pt->get_node_key().laddr; + return omap_load_extent(oc, laddr, get_meta().depth - 1).safe_then( + [this, oc, &key, child_pt] (auto extent) { + return extent->rm_key(oc, key) + .safe_then([this, oc, child_pt, extent = std::move(extent)] (auto mresult) { + if (mresult.status == mutation_status_t::SUCCESS || + mresult.status == mutation_status_t::FAIL) { + return rm_key_ertr::make_ready_future(mresult); + } else if (mresult.status == mutation_status_t::NEED_MERGE) { + if (get_node_size() >1) + return merge_entry(oc, child_pt, *(mresult.need_merge)); + else + return rm_key_ret( + rm_key_ertr::ready_future_marker{}, + mutation_result_t(mutation_status_t::SUCCESS, + std::nullopt, std::nullopt)); + } else if (mresult.status == mutation_status_t::SPLITTED) { + return handle_split(oc, child_pt, mresult); + } else { + return rm_key_ertr::make_ready_future(mresult); + } + }); + }); +} + +OMapInnerNode::list_keys_ret +OMapInnerNode::list_keys(omap_context_t oc, std::string &start, size_t max_result_size) +{ + logger().debug("{}: {}","OMapInnerNode", __func__); + auto child_iter = get_containing_child(start); + + return seastar::do_with(child_iter, iter_end(), list_keys_result_t(), [=, &start] + (auto &biter, auto &eiter, auto &result) { + result.next = start; + return crimson::do_until([=, &biter, &eiter, &result] () + -> list_keys_ertr::future { + if (biter == eiter || result.keys.size() == max_result_size) + return list_keys_ertr::make_ready_future(true); + + auto laddr = biter->get_node_key().laddr; + return omap_load_extent(oc, laddr, get_meta().depth - 1).safe_then( + [=, &biter, &eiter, &result] (auto &&extent) { + return extent->list_keys(oc, result.next, max_result_size - result.keys.size()) + .safe_then([&biter, &eiter, &result] (auto &&list) mutable { + if (!list.keys.empty()) + result.keys.insert(result.keys.end(), list.keys.begin(),list.keys.end()); + + biter++; + if (list.next != "") + result.next = list.next; + else if (biter != eiter) + result.next = biter->get_node_val(); + else + result.next = ""; + + return list_keys_ertr::make_ready_future(false); + }); + }); + }).safe_then([&result, ref = OMapNodeRef(this)] { + return list_keys_ertr::make_ready_future(std::move(result)); + }); + }); +} + +OMapInnerNode::list_ret +OMapInnerNode::list(omap_context_t oc, std::string &start, size_t max_result_size) +{ + logger().debug("{}: {}","OMapInnerNode", __func__); + auto child_iter = get_containing_child(start); + + return seastar::do_with(child_iter, iter_end(), list_kvs_result_t(), [=, &start] + (auto &biter, auto &eiter, auto &result) { + result.next = start; + return crimson::do_until([=, &biter, &eiter, &result] () + -> list_ertr::future { + if (biter == eiter || result.kvs.size() == max_result_size) + return list_ertr::make_ready_future(true); + + auto laddr = biter->get_node_key().laddr; + return omap_load_extent(oc, laddr, get_meta().depth - 1).safe_then( + [=, &biter, &eiter, &result] (auto &&extent) { + return extent->list(oc, result.next, max_result_size - result.kvs.size()) + .safe_then([&biter, &eiter, &result] (auto &&list) mutable { + if (!list.kvs.empty()) + result.kvs.insert(result.kvs.end(), list.kvs.begin(),list.kvs.end()); + + biter++; + if (list.next != "") + result.next = list.next; + else if (biter != eiter) + result.next = biter->get_node_val(); + else + result.next = ""; + + return list_ertr::make_ready_future(false); + }); + }); + }).safe_then([&result, ref = OMapNodeRef(this)] { + return list_ertr::make_ready_future(std::move(result)); + }); + }); +} + +OMapInnerNode::clear_ret +OMapInnerNode::clear(omap_context_t oc) +{ + logger().debug("{}: {}","OMapInnerNode", __func__); + return crimson::do_for_each(iter_begin(), iter_end(), [this, oc] (auto iter) { + auto laddr = iter->get_node_key().laddr; + return omap_load_extent(oc, laddr, get_meta().depth - 1).safe_then( + [oc] (auto &&extent) { + return extent->clear(oc); + }).safe_then([oc, laddr] { + return oc.tm.dec_ref(oc.t, laddr); + }).safe_then([ref = OMapNodeRef(this)] (auto ret){ + return clear_ertr::now(); + }); + }); +} + +OMapInnerNode::split_children_ret +OMapInnerNode:: make_split_children(omap_context_t oc) +{ + logger().debug("{}: {}","OMapInnerNode", __func__); + return oc.tm.alloc_extents(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE, 2) + .safe_then([this] (auto &&ext_pair) { + auto left = ext_pair.front(); + auto right = ext_pair.back(); + return split_children_ret( + split_children_ertr::ready_future_marker{}, + std::make_tuple(left, right, split_into(*left, *right))); + }); +} + +OMapInnerNode::full_merge_ret +OMapInnerNode::make_full_merge(omap_context_t oc, OMapNodeRef right) +{ + logger().debug("{}: {}","OMapInnerNode", __func__); + return oc.tm.alloc_extent(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE) + .safe_then([this, right] (auto &&replacement) { + replacement->merge_from(*this, *right->cast()); + return full_merge_ret( + full_merge_ertr::ready_future_marker{}, + std::move(replacement)); + }); +} + +OMapInnerNode::make_balanced_ret +OMapInnerNode::make_balanced(omap_context_t oc, OMapNodeRef _right) +{ + logger().debug("{}: {}","OMapInnerNode", __func__); + ceph_assert(_right->get_type() == type); + return oc.tm.alloc_extents(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE, 2) + .safe_then([this, _right] (auto &&replacement_pair){ + auto replacement_left = replacement_pair.front(); + auto replacement_right = replacement_pair.back(); + auto &right = *_right->cast(); + return make_balanced_ret( + make_balanced_ertr::ready_future_marker{}, + std::make_tuple(replacement_left, replacement_right, + balance_into_new_nodes(*this, right, + *replacement_left, *replacement_right))); + }); +} + +OMapInnerNode::merge_entry_ret +OMapInnerNode::merge_entry(omap_context_t oc, internal_iterator_t iter, OMapNodeRef entry) +{ + logger().debug("{}: {}","OMapInnerNode", __func__); + if (!is_pending()) { + auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast(); + auto mut_iter = mut->iter_idx(iter->get_index()); + return mut->merge_entry(oc, mut_iter, entry); + } + auto is_left = (iter + 1) == iter_end(); + auto donor_iter = is_left ? iter - 1 : iter + 1; + return omap_load_extent(oc, donor_iter->get_node_key().laddr, get_meta().depth - 1) + .safe_then([=] (auto &&donor) mutable { + auto [l, r] = is_left ? + std::make_pair(donor, entry) : std::make_pair(entry, donor); + auto [liter, riter] = is_left ? + std::make_pair(donor_iter, iter) : std::make_pair(iter, donor_iter); + if (donor->extent_is_below_min()) { + logger().debug("{}::merge_entry make_full_merge l {} r {}", __func__, *l, *r); + assert(entry->extent_is_below_min()); + return l->make_full_merge(oc, r).safe_then([=] (auto &&replacement){ + journal_inner_update(liter, replacement->get_laddr(), maybe_get_delta_buffer()); + journal_inner_remove(riter, maybe_get_delta_buffer()); + //retire extent + std::list dec_laddrs {l->get_laddr(), r->get_laddr()}; + return oc.tm.dec_ref(oc.t, dec_laddrs).safe_then([this, oc] (auto &&ret) { + if (extent_is_below_min()) { + return merge_entry_ret( + merge_entry_ertr::ready_future_marker{}, + mutation_result_t(mutation_status_t::NEED_MERGE, std::nullopt, + this->cast())); + } else { + return merge_entry_ret( + merge_entry_ertr::ready_future_marker{}, + mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt)); + } + }); + }); + } else { + logger().debug("{}::merge_entry balanced l {} r {}", __func__, *l, *r); + return l->make_balanced(oc, r).safe_then([=] (auto tuple) { + auto [replacement_l, replacement_r, replacement_pivot] = tuple; + //update will not cuase overflow, do it first + journal_inner_update(liter, replacement_l->get_laddr(), maybe_get_delta_buffer()); + if (!extent_will_overflow(replacement_pivot.size() + 1, std::nullopt)) { + journal_inner_replace(riter, replacement_r->get_laddr(), + replacement_pivot, maybe_get_delta_buffer()); + std::list dec_laddrs{l->get_laddr(), r->get_laddr()}; + return oc.tm.dec_ref(oc.t, dec_laddrs).safe_then([] (auto &&ret) { + return merge_entry_ret( + merge_entry_ertr::ready_future_marker{}, + mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt)); + }); + } else { + logger().debug("{}::merge_entry balanced and split {} r {}", __func__, *l, *r); + //use remove and insert to instead of replace, remove not cause split do it first + journal_inner_remove(riter, maybe_get_delta_buffer()); + return make_split_insert(oc, riter, replacement_pivot, replacement_r->get_laddr()) + .safe_then([this, oc, l = l, r = r] (auto mresult) { + std::list dec_laddrs{l->get_laddr(), r->get_laddr(), get_laddr()}; + return oc.tm.dec_ref(oc.t, dec_laddrs) + .safe_then([mresult = std::move(mresult)] (auto &&ret){ + return merge_entry_ret( + merge_entry_ertr::ready_future_marker{}, + mresult); + }); + }); + } + }); + } + }); + +} + +OMapInnerNode::internal_iterator_t +OMapInnerNode::get_containing_child(const std::string &key) +{ + for (auto i = iter_begin(); i != iter_end(); ++i) { + if (i.contains(key)) + return i; + } + ceph_assert( 0 == "invalid"); + return iter_end(); +} + +std::ostream &OMapLeafNode::print_detail_l(std::ostream &out) const +{ + return out << ", size=" << get_size() + << ", depth=" << get_meta().depth; +} + +OMapLeafNode::get_value_ret +OMapLeafNode::get_value(omap_context_t oc, const std::string &key) +{ + logger().debug("{}: {} key = {}","OMapLeafNode", __func__, key); + auto ite = find_string_key(key); + if (ite != iter_end()) { + auto value = ite->get_string_val(); + return get_value_ret( + get_value_ertr::ready_future_marker{}, + std::make_pair(key, value)); + } else { + return get_value_ret( + get_value_ertr::ready_future_marker{}, + std::make_pair(key, "")); + } +} + +OMapLeafNode::insert_ret +OMapLeafNode::insert(omap_context_t oc, const std::string &key, const std::string &value) +{ + logger().debug("{}: {}, {} -> {}","OMapLeafNode", __func__, key, value); + if (!extent_will_overflow(key.size() + 1, value.size() + 1)) { + if (!is_pending()) { + auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast(); + return mut->insert(oc, key, value); + } + auto replace_pt = find_string_key(key); + if (replace_pt != iter_end()) { + journal_leaf_update(replace_pt, key, value, maybe_get_delta_buffer()); + } else { + auto insert_pt = string_lower_bound(key); + journal_leaf_insert(insert_pt, key, value, maybe_get_delta_buffer()); + + logger().debug( + "{}: {} inserted {}->{} {}"," OMapLeafNode", __func__, + insert_pt.get_node_key(), + insert_pt.get_node_val(), + insert_pt.get_string_val()); + } + return insert_ret( + insert_ertr::ready_future_marker{}, + mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt)); + } else { + return make_split_children(oc).safe_then([this, oc, &key, &value] (auto tuple) { + auto [left, right, pivot] = tuple; + auto replace_pt = find_string_key(key); + if (replace_pt != iter_end()) { + if (key < pivot) { //left + auto mut_iter = left->iter_idx(replace_pt->get_index()); + left->journal_leaf_update(mut_iter, key, value, left->maybe_get_delta_buffer()); + } else if (key >= pivot) { //right + auto mut_iter = right->iter_idx(replace_pt->get_index() - left->get_node_size()); + right->journal_leaf_update(mut_iter, key, value, right->maybe_get_delta_buffer()); + } + } else { + auto insert_pt = string_lower_bound(key); + if (key < pivot) { //left + auto mut_iter = left->iter_idx(insert_pt->get_index()); + left->journal_leaf_insert(mut_iter, key, value, left->maybe_get_delta_buffer()); + } else { + auto mut_iter = right->iter_idx(insert_pt->get_index() - left->get_node_size()); + right->journal_leaf_insert(mut_iter, key, value, right->maybe_get_delta_buffer()); + } + } + return oc.tm.dec_ref(oc.t, get_laddr()) + .safe_then([tuple = std::move(tuple)] (auto ret) { + return insert_ret( + insert_ertr::ready_future_marker{}, + mutation_result_t(mutation_status_t::SPLITTED, tuple, std::nullopt)); + }); + }); + } +} + +OMapLeafNode::rm_key_ret +OMapLeafNode::rm_key(omap_context_t oc, const std::string &key) +{ + logger().debug("{}: {} : {}","OMapLeafNode", __func__, key); + if(!is_pending()) { + auto mut = oc.tm.get_mutable_extent(oc.t, this)->cast(); + return mut->rm_key(oc, key); + } + + auto rm_pt = find_string_key(key); + if (rm_pt != iter_end()) { + journal_leaf_remove(rm_pt, maybe_get_delta_buffer()); + logger().debug( + "{}: removed {}->{} {}", __func__, + rm_pt->get_node_key(), + rm_pt->get_node_val(), + rm_pt->get_string_val()); + if (extent_is_below_min()) { + return rm_key_ret( + rm_key_ertr::ready_future_marker{}, + mutation_result_t(mutation_status_t::NEED_MERGE, std::nullopt, + this->cast())); + } else { + return rm_key_ret( + rm_key_ertr::ready_future_marker{}, + mutation_result_t(mutation_status_t::SUCCESS, std::nullopt, std::nullopt)); + } + } else { + return rm_key_ret( + rm_key_ertr::ready_future_marker{}, + mutation_result_t(mutation_status_t::FAIL, std::nullopt, std::nullopt)); + } + +} + +OMapLeafNode::list_keys_ret +OMapLeafNode::list_keys(omap_context_t oc, std::string &start, size_t max_result_size) +{ + logger().debug("{}: {}","OMapLeafNode", __func__); + auto result = list_keys_result_t(); + iterator iter = start == "" ? iter_begin() : string_lower_bound(start); + for (; iter != iter_end() && result.keys.size() <= max_result_size; iter++) { + result.keys.push_back(iter->get_node_val()); + } + if (iter == iter_end()) + result.next = ""; + else + result.next = iter->get_node_val(); + + return list_keys_ertr::make_ready_future(std::move(result)); + +} + +OMapLeafNode::list_ret +OMapLeafNode::list(omap_context_t oc, std::string &start, size_t max_result_size) +{ + logger().debug("{}: {}", "OMapLeafNode", __func__); + auto result = list_kvs_result_t(); + iterator iter = start == "" ? iter_begin() : string_lower_bound(start); + for (; iter != iter_end() && result.kvs.size() <= max_result_size; iter++) { + result.kvs.push_back({iter->get_node_val(), iter->get_string_val()}); + } + if (iter == iter_end()) + result.next = ""; + else + result.next = iter->get_node_val(); + + return list_ertr::make_ready_future(std::move(result)); +} + +OMapLeafNode::clear_ret +OMapLeafNode::clear(omap_context_t oc) +{ + return clear_ertr::now(); +} + +OMapLeafNode::split_children_ret +OMapLeafNode::make_split_children(omap_context_t oc) +{ + logger().debug("{}: {}","OMapLeafNode", __func__); + return oc.tm.alloc_extents(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE, 2) + .safe_then([this] (auto &&ext_pair) { + auto left = ext_pair.front(); + auto right = ext_pair.back(); + return split_children_ret( + split_children_ertr::ready_future_marker{}, + std::make_tuple(left, right, split_into(*left, *right))); + }); +} + +OMapLeafNode::full_merge_ret +OMapLeafNode::make_full_merge(omap_context_t oc, OMapNodeRef right) +{ + ceph_assert(right->get_type() == type); + logger().debug("{}: {}","OMapLeafNode", __func__); + return oc.tm.alloc_extent(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE) + .safe_then([this, right] (auto &&replacement) { + replacement->merge_from(*this, *right->cast()); + return full_merge_ret( + full_merge_ertr::ready_future_marker{}, + std::move(replacement)); + }); +} + +OMapLeafNode::make_balanced_ret +OMapLeafNode::make_balanced(omap_context_t oc, OMapNodeRef _right) +{ + ceph_assert(_right->get_type() == type); + logger().debug("{}: {}", "OMapLeafNode", __func__); + return oc.tm.alloc_extents(oc.t, L_ADDR_MIN, OMAP_BLOCK_SIZE, 2) + .safe_then([this, _right] (auto &&replacement_pair) { + auto replacement_left = replacement_pair.front(); + auto replacement_right = replacement_pair.back(); + auto &right = *_right->cast(); + return make_balanced_ret( + make_balanced_ertr::ready_future_marker{}, + std::make_tuple( + replacement_left, replacement_right, + balance_into_new_nodes( + *this, right, + *replacement_left, *replacement_right))); + }); +} + + +TransactionManager::read_extent_ertr::future +omap_load_extent(omap_context_t oc, laddr_t laddr, depth_t depth) +{ + ceph_assert(depth > 0); + if (depth > 1) { + return oc.tm.read_extents(oc.t, laddr, OMAP_BLOCK_SIZE).safe_then( + [](auto&& extents) { + assert(extents.size() == 1); + [[maybe_unused]] auto [laddr, e] = extents.front(); + return TransactionManager::read_extent_ertr::make_ready_future(std::move(e)); + }); + } else { + return oc.tm.read_extents(oc.t, laddr, OMAP_BLOCK_SIZE).safe_then( + [](auto&& extents) { + assert(extents.size() == 1); + [[maybe_unused]] auto [laddr, e] = extents.front(); + return TransactionManager::read_extent_ertr::make_ready_future(std::move(e)); + }); + } +} +} diff --git a/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h new file mode 100644 index 000000000000..1e0f201f8503 --- /dev/null +++ b/src/crimson/os/seastore/omap_manager/btree/omap_btree_node_impl.h @@ -0,0 +1,210 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include + +#include "include/buffer.h" + +#include "crimson/common/errorator.h" +#include "crimson/os/seastore/omap_manager.h" +#include "crimson/os/seastore/seastore_types.h" +#include "crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h" +#include "crimson/os/seastore/omap_manager/btree/omap_types.h" +#include "crimson/os/seastore/omap_manager/btree/omap_btree_node.h" + +namespace crimson::os::seastore::omap_manager { + +/** + * OMapInnerNode + * + * Abstracts operations on and layout of internal nodes for the + * omap Tree. + * + * Layout (4k): + * num_entries: meta : keys : values : + */ + +struct OMapInnerNode + : OMapNode, + StringKVInnerNodeLayout< + omap_node_meta_t, omap_node_meta_le_t> { + using OMapInnerNodeRef = TCachedExtentRef; + using internal_iterator_t = const_iterator; + template + OMapInnerNode(T&&... t) : + OMapNode(std::forward(t)...), + StringKVInnerNodeLayout(get_bptr().c_str()) {} + + static constexpr extent_types_t type = extent_types_t::OMAP_INNER; + + omap_node_meta_t get_node_meta() const final { return get_meta(); } + bool extent_will_overflow(size_t ksize, std::optional vsize) const { + return is_overflow(ksize); + } + bool extent_is_below_min() const { return below_min(); } + uint32_t get_node_size() { return get_size(); } + + CachedExtentRef duplicate_for_write() final { + assert(delta_buffer.empty()); + return CachedExtentRef(new OMapInnerNode(*this)); + } + + delta_inner_buffer_t delta_buffer; + delta_inner_buffer_t *maybe_get_delta_buffer() { + return is_mutation_pending() ? &delta_buffer : nullptr; + } + + get_value_ret get_value(omap_context_t oc, const std::string &key) final; + + insert_ret insert(omap_context_t oc, const std::string &key, const std::string &value) final; + + rm_key_ret rm_key(omap_context_t oc, const std::string &key) final; + + list_keys_ret list_keys(omap_context_t oc, std::string &start, size_t max_result_size) final; + + list_ret list(omap_context_t oc, std::string &start, size_t max_result_size) final; + + clear_ret clear(omap_context_t oc) final; + + using split_children_ertr = TransactionManager::alloc_extent_ertr; + using split_children_ret = split_children_ertr::future + >; + split_children_ret make_split_children(omap_context_t oc); + + full_merge_ret make_full_merge(omap_context_t oc, OMapNodeRef right) final; + + make_balanced_ret + make_balanced(omap_context_t oc, OMapNodeRef right) final; + + using make_split_insert_ertr = TransactionManager::alloc_extent_ertr; + using make_split_insert_ret = make_split_insert_ertr::future; + make_split_insert_ret make_split_insert(omap_context_t oc, internal_iterator_t iter, + std::string key, laddr_t laddr); + + using merge_entry_ertr = TransactionManager::read_extent_ertr; + using merge_entry_ret = merge_entry_ertr::future; + merge_entry_ret merge_entry(omap_context_t oc, + internal_iterator_t iter, OMapNodeRef entry); + + using handle_split_ertr = TransactionManager::read_extent_ertr; + using handle_split_ret = handle_split_ertr::future; + handle_split_ret handle_split(omap_context_t oc, internal_iterator_t iter, + mutation_result_t mresult); + + std::ostream &print_detail_l(std::ostream &out) const final; + + extent_types_t get_type() const final { + return type; + } + + ceph::bufferlist get_delta() final { + ceph::bufferlist bl; + delta_buffer.encode(bl); + return bl; + } + + void apply_delta(const ceph::bufferlist &bl) final { + assert(bl.length()); + delta_inner_buffer_t buffer; + buffer.decode(bl); + buffer.replay(*this); + } + + internal_iterator_t get_containing_child(const std::string &key); + +}; +using OMapInnerNodeRef = OMapInnerNode::OMapInnerNodeRef; +/** + * OMapLeafNode + * + * Abstracts operations on and layout of leaf nodes for the + * OMap Tree. + * + * Layout (4k): + * num_entries: meta : keys : values : + */ + +struct OMapLeafNode + : OMapNode, + StringKVLeafNodeLayout< + omap_node_meta_t, omap_node_meta_le_t> { + + using OMapLeafNodeRef = TCachedExtentRef; + using internal_iterator_t = const_iterator; + template + OMapLeafNode(T&&... t) : + OMapNode(std::forward(t)...), + StringKVLeafNodeLayout(get_bptr().c_str()) {} + + static constexpr extent_types_t type = extent_types_t::OMAP_LEAF; + + omap_node_meta_t get_node_meta() const final { return get_meta(); } + bool extent_will_overflow(size_t ksize, std::optional vsize) const { + return is_overflow(ksize, *vsize); + } + bool extent_is_below_min() const { return below_min(); } + uint32_t get_node_size() { return get_size(); } + + CachedExtentRef duplicate_for_write() final { + assert(delta_buffer.empty()); + return CachedExtentRef(new OMapLeafNode(*this)); + } + + delta_leaf_buffer_t delta_buffer; + delta_leaf_buffer_t *maybe_get_delta_buffer() { + return is_mutation_pending() ? &delta_buffer : nullptr; + } + + get_value_ret get_value(omap_context_t oc, const std::string &key) final; + + insert_ret insert(omap_context_t oc, const std::string &key, const std::string &value) final; + + rm_key_ret rm_key(omap_context_t oc, const std::string &key) final; + + list_keys_ret list_keys(omap_context_t oc, std::string &start, size_t max_result_size) final; + + list_ret list(omap_context_t oc, std::string &start, size_t max_result_size) final; + + clear_ret clear(omap_context_t oc) final; + + using split_children_ertr = TransactionManager::alloc_extent_ertr; + using split_children_ret = split_children_ertr::future + >; + split_children_ret make_split_children(omap_context_t oc); + + full_merge_ret make_full_merge(omap_context_t oc, OMapNodeRef right) final; + + make_balanced_ret make_balanced(omap_context_t oc, OMapNodeRef _right) final; + + extent_types_t get_type() const final { + return type; + } + + ceph::bufferlist get_delta() final { + ceph::bufferlist bl; + delta_buffer.encode(bl); + return bl; + } + + void apply_delta(const ceph::bufferlist &_bl) final { + assert(_bl.length()); + ceph::bufferlist bl = _bl; + bl.rebuild(); + delta_leaf_buffer_t buffer; + buffer.decode(bl); + buffer.replay(*this); + } + + std::ostream &print_detail_l(std::ostream &out) const final; + + std::pair + get_leaf_entries(std::string &key); + +}; +using OMapLeafNodeRef = OMapLeafNode::OMapLeafNodeRef; + +std::ostream &operator<<(std::ostream &out, const omap_inner_key_t &rhs); +std::ostream &operator<<(std::ostream &out, const omap_leaf_key_t &rhs); +} diff --git a/src/crimson/os/seastore/omap_manager/btree/omap_types.h b/src/crimson/os/seastore/omap_manager/btree/omap_types.h new file mode 100644 index 000000000000..d1bbb4c2ed17 --- /dev/null +++ b/src/crimson/os/seastore/omap_manager/btree/omap_types.h @@ -0,0 +1,128 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once +#include "crimson/os/seastore/seastore_types.h" + +namespace crimson::os::seastore::omap_manager { + +struct omap_node_meta_t { + depth_t depth = 0; + + std::pair split_into() const { + return std::make_pair( + omap_node_meta_t{depth}, + omap_node_meta_t{depth}); + } + + static omap_node_meta_t merge_from( + const omap_node_meta_t &lhs, const omap_node_meta_t &rhs) { + assert(lhs.depth == rhs.depth); + return omap_node_meta_t{lhs.depth}; + } + + static std::pair + rebalance(const omap_node_meta_t &lhs, const omap_node_meta_t &rhs) { + assert(lhs.depth == rhs.depth); + return std::make_pair( + omap_node_meta_t{lhs.depth}, + omap_node_meta_t{lhs.depth}); + } +}; + +struct omap_node_meta_le_t { + depth_le_t depth = init_les32(0); + + omap_node_meta_le_t() = default; + omap_node_meta_le_t(const omap_node_meta_le_t &) = default; + explicit omap_node_meta_le_t(const omap_node_meta_t &val) + : depth(init_les32(val.depth)) {} + + operator omap_node_meta_t() const { + return omap_node_meta_t{ depth }; + } +}; + +struct omap_inner_key_t { + uint16_t key_off = 0; + uint16_t key_len = 0; + laddr_t laddr = 0; + + omap_inner_key_t() = default; + omap_inner_key_t(uint16_t off, uint16_t len, laddr_t addr) + : key_off(off), key_len(len), laddr(addr) {} +}; + +struct omap_inner_key_le_t { + ceph_le16 key_off = init_le16(0); + ceph_le16 key_len = init_le16(0); + laddr_le_t laddr = laddr_le_t(0); + + omap_inner_key_le_t() = default; + omap_inner_key_le_t(const omap_inner_key_le_t &) = default; + explicit omap_inner_key_le_t(const omap_inner_key_t &key) + : key_off(init_le16(key.key_off)), + key_len(init_le16(key.key_len)), + laddr(laddr_le_t(key.laddr)) {} + + operator omap_inner_key_t() const { + return omap_inner_key_t{uint16_t(key_off), uint16_t(key_len), laddr_t(laddr)}; + } + + omap_inner_key_le_t& operator=(omap_inner_key_t key) { + key_off = init_le16(key.key_off); + key_len = init_le16(key.key_len); + laddr = laddr_le_t(key.laddr); + return *this; + } + + inline bool operator==(const omap_inner_key_le_t b) const { + return key_off == b.key_off && key_len == b.key_len && laddr == b.laddr; + } +}; + +struct omap_leaf_key_t { + uint16_t key_off = 0; + uint16_t key_len = 0; + uint16_t val_off = 0; + uint16_t val_len = 0; + + omap_leaf_key_t() = default; + omap_leaf_key_t(uint16_t k_off, uint16_t k_len, uint16_t v_off, uint16_t v_len) + : key_off(k_off), key_len(k_len), val_off(v_off), val_len(v_len) {} +}; + +struct omap_leaf_key_le_t { + ceph_le16 key_off = init_le16(0); + ceph_le16 key_len = init_le16(0); + ceph_le16 val_off = init_le16(0); + ceph_le16 val_len = init_le16(0); + + omap_leaf_key_le_t() = default; + omap_leaf_key_le_t(const omap_leaf_key_le_t &) = default; + explicit omap_leaf_key_le_t(const omap_leaf_key_t &key) + : key_off(init_le16(key.key_off)), + key_len(init_le16(key.key_len)), + val_off(init_le16(key.val_off)), + val_len(init_le16(key.val_len)) {} + + operator omap_leaf_key_t() const { + return omap_leaf_key_t{uint16_t(key_off), uint16_t(key_len), + uint16_t(val_off), uint16_t(val_len)}; + } + + omap_leaf_key_le_t& operator=(omap_leaf_key_t key) { + key_off = init_le16(key.key_off); + key_len = init_le16(key.key_len); + val_off = init_le16(key.val_off); + val_len = init_le16(key.val_len); + return *this; + } + + inline bool operator==(const omap_leaf_key_le_t b) const { + return key_off == b.key_off && key_len == b.key_len && + val_off == b.val_off && val_len == b.val_len; + } +}; + +} diff --git a/src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h b/src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h new file mode 100644 index 000000000000..bf5bac5352a1 --- /dev/null +++ b/src/crimson/os/seastore/omap_manager/btree/string_kv_node_layout.h @@ -0,0 +1,1777 @@ +// -*- mode:C++; tab-width:8; c-basic-index:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include +#include + +#include "include/byteorder.h" + +#include "crimson/common/layout.h" +#include "crimson/common/fixed_kv_node_layout.h" +#include "crimson/os/seastore/omap_manager/btree/omap_types.h" + +#define BlockSize 4096 +namespace crimson::os::seastore::omap_manager { + +template < + typename Meta, + typename MetaInt, + bool VALIDATE_INVARIANTS=true> class StringKVInnerNodeLayout; + +template < + typename Meta, + typename MetaInt, + bool VALIDATE_INVARIANTS=true> class StringKVLeafNodeLayout; + + +/** + * StringKVInnerNodeLayout + * + * Reusable implementation of a fixed size key mapping + * omap_inner_key_t(fixed) -> V(string) with internal representations omap_inner_key_le_t. + * + * Uses absl::container_internal::Layout for the actual key memory layout. + * + * The primary interface exposed is centered on the iterator + * and related methods. + * + * Also included are helpers for doing splits and merges as for a btree. + */ +template < + typename Meta, + typename MetaInt, + bool VALIDATE_INVARIANTS> +class StringKVInnerNodeLayout { + char *buf = nullptr; + + using L = absl::container_internal::Layout; + static constexpr L layout{1, 1, 1}; // = L::Partial(1, 1, 1); + +public: + template + struct iter_t { + friend class StringKVInnerNodeLayout; + using parent_t = typename crimson::common::maybe_const_t::type; + + parent_t node; + uint16_t index; + + iter_t( + parent_t parent, + uint16_t index) : node(parent), index(index) {} + + iter_t(const iter_t &) = default; + iter_t(iter_t &&) = default; + iter_t &operator=(const iter_t &) = default; + iter_t &operator=(iter_t &&) = default; + + operator iter_t() const { + static_assert(!is_const); + return iter_t(node, index); + } + + // Work nicely with for loops without requiring a nested type. + iter_t &operator*() { return *this; } + iter_t *operator->() { return this; } + + iter_t operator++(int) { + auto ret = *this; + ++index; + return ret; + } + + iter_t &operator++() { + ++index; + return *this; + } + + uint16_t operator-(const iter_t &rhs) const { + assert(rhs.node == node); + return index - rhs.index; + } + + iter_t operator+(uint16_t off) const { + return iter_t( + node, + index + off); + } + iter_t operator-(uint16_t off) const { + return iter_t( + node, + index - off); + } + + uint16_t operator<(const iter_t &rhs) const { + assert(rhs.node == node); + return index < rhs.index; + } + + bool operator==(const iter_t &rhs) const { + assert(node == rhs.node); + return rhs.index == index; + } + + bool operator!=(const iter_t &rhs) const { + return !(*this == rhs); + } + + omap_inner_key_t get_node_key() const { + omap_inner_key_le_t kint = node->get_node_key_ptr()[index]; + return omap_inner_key_t(kint); + } + + char *get_node_val_ptr() { + auto tail = node->buf + BlockSize; + if (*this == node->iter_end()) + return tail; + else { + return tail - static_cast(get_node_key().key_off); + } + } + + const char *get_node_val_ptr() const { + auto tail = node->buf + BlockSize; + if ( *this == node->iter_end()) + return tail; + else { + return tail - static_cast(get_node_key().key_off); + } + } + + void set_node_val(const std::string &val) { + static_assert(!is_const); + std::strcpy((char*)get_node_val_ptr(), val.c_str()); //copy char* to char* include "\0" + } + + std::string get_node_val(){ + std::string s(get_node_val_ptr()); + return s; + } + std::string get_node_val() const{ + std::string s(get_node_val_ptr()); + return s; + } + + bool contains(const std::string &key) const { + auto next = *this + 1; + if (next == node->iter_end()) + return get_node_val() <= key; + + return (get_node_val() <= key) && (next->get_node_val() > key); + } + + uint16_t get_index() const { + return index; + } + + private: + void set_node_key(omap_inner_key_t _lb) const { + static_assert(!is_const); + omap_inner_key_le_t lb; + lb = _lb; + node->get_node_key_ptr()[index] = lb; + } + + typename crimson::common::maybe_const_t::type get_node_key_ptr() const { + return reinterpret_cast< + typename crimson::common::maybe_const_t::type>( + node->get_node_key_ptr() + index); + } + + }; + using const_iterator = iter_t; + using iterator = iter_t; + + struct delta_inner_t { + enum class op_t : uint8_t { + INSERT, + UPDATE, + REMOVE, + } op; + omap_inner_key_le_t key; + std::string val; + + void replay(StringKVInnerNodeLayout &l) { + switch (op) { + case op_t::INSERT: { + l.inner_insert(l.string_lower_bound(val), key, val); + break; + } + case op_t::UPDATE: { + auto iter = l.find_string_key(val); + assert(iter != l.iter_end()); + l.inner_update(iter, key); + break; + } + case op_t::REMOVE: { + auto iter = l.find_string_key(val); + assert(iter != l.iter_end()); + l.inner_remove(iter); + break; + } + default: + assert(0 == "Impossible"); + } + } + + bool operator==(const delta_inner_t &rhs) const { + return op == rhs.op && + key == rhs.key && + val == rhs.val; + } + }; + +public: + class delta_inner_buffer_t { + std::vector buffer; + public: + bool empty() const { + return buffer.empty(); + } + void insert( + const omap_inner_key_t &key, + const std::string val) { + omap_inner_key_le_t k; + k = key; + buffer.push_back( + delta_inner_t{ + delta_inner_t::op_t::INSERT, + k, + val + }); + } + void update( + const omap_inner_key_t &key, + const std::string &val) { + omap_inner_key_le_t k; + k = key; + buffer.push_back( + delta_inner_t{ + delta_inner_t::op_t::UPDATE, + k, + val + }); + } + void remove(std::string val) { + buffer.push_back( + delta_inner_t{ + delta_inner_t::op_t::REMOVE, + omap_inner_key_le_t(), + val + }); + } + + void replay(StringKVInnerNodeLayout &node) { + for (auto &i: buffer) { + i.replay(node); + } + } + size_t get_bytes() const { + size_t size = 0; + for (auto &i: buffer) { + size += sizeof(i.op_t) + sizeof(i.key) + i.val.size(); + } + return size; + } + //copy out + void encode(ceph::bufferlist &bl) { + using ceph::encode; + uint32_t num = buffer.size(); + encode(num, bl); + for (auto &&i: buffer) { + encode(i.op, bl); + bl.append((char*)&(i.key), sizeof(i.key)); + encode(i.val, bl); + } + buffer.clear(); + } + //copy in + void decode(const ceph::bufferlist &bl) { + using ceph::decode; + auto p = bl.cbegin(); + uint32_t num; + decode (num, p); + while (num--) { + delta_inner_t delta; + decode(delta.op, p); + omap_inner_key_le_t key; + p.copy(sizeof(key), (char*)&(key)); + delta.key = key; + decode(delta.val, p); + buffer.push_back(delta); + } + } + + bool operator==(const delta_inner_buffer_t &rhs) const { + return buffer == rhs.buffer; + } + }; + + void journal_inner_insert( + const_iterator _iter, + const laddr_t laddr, + const std::string val, + delta_inner_buffer_t *recorder) { + auto iter = iterator(this, _iter.index); + omap_inner_key_t node_key; + node_key.laddr = laddr; + node_key.key_len = val.size() + 1; + node_key.key_off = iter.get_index() == 0 ? + node_key.key_len : + (iter - 1).get_node_key().key_off + node_key.key_len; + if (recorder) { + recorder->insert( + node_key, + val); + } + inner_insert(iter, node_key, val); + } + + void journal_inner_update( + const_iterator _iter, + const laddr_t laddr, + delta_inner_buffer_t *recorder) { + auto iter = iterator(this, _iter.index); + auto node_key = iter.get_node_key(); + node_key.laddr = laddr; + if (recorder) { + recorder->update(node_key, iter->get_node_val()); + } + inner_update(iter, node_key); + } + + void journal_inner_replace( + const_iterator _iter, + const laddr_t laddr, + const std::string val, + delta_inner_buffer_t *recorder) { + auto iter = iterator(this, _iter.index); + omap_inner_key_t node_key; + node_key.laddr = laddr; + node_key.key_len = val.size() + 1; + node_key.key_off = iter.get_index() == 0? + node_key.key_len : + (iter - 1).get_node_key().key_off + node_key.key_len; + if (recorder) { + recorder->remove(iter->get_node_val()); + recorder->insert(node_key, val); + } + inner_replace(iter, node_key, val); + } + + void journal_inner_remove( + const_iterator _iter, + delta_inner_buffer_t *recorder) { + auto iter = iterator(this, _iter.index); + if (recorder) { + recorder->remove(iter->get_node_val()); + } + inner_remove(iter); + } + + StringKVInnerNodeLayout(char *buf) : + buf(buf) {} + + uint32_t get_size() const { + ceph_le32 &size = *layout.template Pointer<0>(buf); + return uint32_t(size); + } + + /** + * set_size + * + * Set size representation to match size + */ + void set_size(uint32_t size) { + ceph_le32 s; + s = size; + *layout.template Pointer<0>(buf) = s; + } + + const_iterator iter_begin() const { + return const_iterator( + this, + 0); + } + + const_iterator iter_end() const { + return const_iterator( + this, + get_size()); + } + + iterator iter_begin() { + return iterator( + this, + 0); + } + + iterator iter_end() { + return iterator( + this, + get_size()); + } + + const_iterator iter_idx(uint16_t off) const { + return const_iterator( + this, + off); + } + + const_iterator string_lower_bound(std::string str) const { + uint16_t start = 0, end = get_size(); + while (start != end) { + unsigned mid = (start + end) / 2; + const_iterator iter(this, mid); + std::string s = iter->get_node_val(); + if (s < str) + start = ++mid; + if ( s > str) + end = mid; + if (s == str) + return iter; + } + return const_iterator(this, start); + } + + iterator string_lower_bound(std::string str) { + const auto &tref = *this; + return iterator(this, tref.string_lower_bound(str).index); + } + + const_iterator string_upper_bound(std::string str) const { + auto ret = iter_begin(); + for (; ret != iter_end(); ++ret) { + std::string s = ret->get_node_val(); + if (s > str) + break; + } + return ret; + } + + iterator string_upper_bound(std::string str) { + const auto &tref = *this; + return iterator(this, tref.string_upper_bound(str).index); + } + + const_iterator find_string_key(const std::string &str) const { + auto ret = iter_begin(); + for (; ret != iter_end(); ++ret) { + std::string s = ret->get_node_val(); + if (s == str) + break; + } + return ret; + } + iterator find_string_key(const std::string &str) { + const auto &tref = *this; + return iterator(this, tref.find_string_key(str).index); + } + + const_iterator get_split_pivot() const { + uint32_t total_size = omap_inner_key_t(get_node_key_ptr()[get_size()-1]).key_off; + uint32_t pivot_size = total_size / 2; + uint32_t size = 0; + for (auto ite = iter_begin(); ite < iter_end(); ite++) { + auto node_key = ite->get_node_key(); + size += node_key.key_len; + if (size >= pivot_size){ + return ite; + } + } + return iter_end(); + } + + + /** + * get_meta/set_meta + * + * Enables stashing a templated type within the layout. + * Cannot be modified after initial write as it is not represented + * in delta_t + */ + Meta get_meta() const { + MetaInt &metaint = *layout.template Pointer<1>(buf); + return Meta(metaint); + } + void set_meta(const Meta &meta) { + *layout.template Pointer<1>(buf) = MetaInt(meta); + } + + uint32_t used_space() const { + uint32_t count = get_size(); + if (count) { + omap_inner_key_t last_key = omap_inner_key_t(get_node_key_ptr()[count-1]); + return last_key.key_off + count * sizeof(omap_inner_key_le_t); + } else { + return 0; + } + } + + uint32_t free_space() const { + return capacity() - used_space(); + } + + uint16_t capacity() const { + return BlockSize - (reinterpret_cast(layout.template Pointer<2>(buf))- + reinterpret_cast(layout.template Pointer<0>(buf))); + } + + char* from_end(int off) { + return buf + (BlockSize - off); + } + + bool is_overflow(size_t ksize) const { + return free_space() < (sizeof(omap_inner_key_le_t) + ksize); + } + bool below_min() const { + return free_space() > (capacity() / 2); + } + + bool operator==(const StringKVInnerNodeLayout &rhs) const { + if (get_size() != rhs.get_size()) { + return false; + } + + auto iter = iter_begin(); + auto iter2 = rhs.iter_begin(); + while (iter != iter_end()) { + if (iter->get_node_key() != iter2->get_node_key() || + iter->get_node_val() != iter2->get_node_val()) { + return false; + } + iter++; + iter2++; + } + return true; + } + + /** + * split_into + * + * Takes *this and splits its contents into left and right. + */ + std::string split_into( + StringKVInnerNodeLayout &left, + StringKVInnerNodeLayout &right) const { + auto piviter = get_split_pivot(); + assert(piviter != iter_end()); + + left.copy_from_foreign_head(left.iter_begin(), iter_begin(), piviter); + left.set_size(piviter - iter_begin()); + + right.copy_from_foreign_back(right.iter_begin(), piviter, iter_end()); + right.set_size(iter_end() - piviter); + + auto [lmeta, rmeta] = get_meta().split_into(); + left.set_meta(lmeta); + right.set_meta(rmeta); + + return piviter->get_node_val(); + } + + /** + * merge_from + * + * Takes two nodes and copies their contents into *this. + * + * precondition: left.size() + right.size() < CAPACITY + */ + void merge_from( + const StringKVInnerNodeLayout &left, + const StringKVInnerNodeLayout &right) { + copy_from_foreign_head( + iter_end(), + left.iter_begin(), + left.iter_end()); + set_size(left.get_size()); + + append_copy_from_foreign_head( + iter_end(), + right.iter_begin(), + right.iter_end()); + set_size(left.get_size() + right.get_size()); + set_meta(Meta::merge_from(left.get_meta(), right.get_meta())); + } + + /** + * balance_into_new_nodes + * + * Takes the contents of left and right and copies them into + * replacement_left and replacement_right such that + * the size of replacement_left just >= 1/2 of (left + right) + */ + static std::string balance_into_new_nodes( + const StringKVInnerNodeLayout &left, + const StringKVInnerNodeLayout &right, + StringKVInnerNodeLayout &replacement_left, + StringKVInnerNodeLayout &replacement_right) + { + uint32_t left_size = omap_inner_key_t(left.get_node_key_ptr()[left.get_size()-1]).key_off; + uint32_t right_size = omap_inner_key_t(right.get_node_key_ptr()[right.get_size()-1]).key_off; + uint32_t total = left_size + right_size; + uint32_t pivot_size = total / 2; + uint32_t pivot_idx = 0; + if (pivot_size < left_size) { + uint32_t size = 0; + for (auto ite = left.iter_begin(); ite < left.iter_end(); ite++) { + auto node_key = ite->get_node_key(); + size += node_key.key_len; + if (size >= pivot_size){ + pivot_idx = ite.get_index(); + break; + } + } + } else { + uint32_t more_size = pivot_size - left_size; + uint32_t size = 0; + for (auto ite = right.iter_begin(); ite < right.iter_end(); ite++) { + auto node_key = ite->get_node_key(); + size += node_key.key_len; + if (size >= more_size){ + pivot_idx = ite.get_index() + left.get_size(); + break; + } + } + } + + auto replacement_pivot = pivot_idx >= left.get_size() ? + right.iter_idx(pivot_idx - left.get_size())->get_node_val() : + left.iter_idx(pivot_idx)->get_node_val(); + + if (pivot_size < left_size) { + replacement_left.copy_from_foreign_head( + replacement_left.iter_end(), + left.iter_begin(), + left.iter_idx(pivot_idx)); + replacement_left.set_size(pivot_idx); + + replacement_right.copy_from_foreign_back( + replacement_right.iter_end(), + left.iter_idx(pivot_idx), + left.iter_end()); + replacement_right.set_size(left.get_size() - pivot_idx); + + replacement_right.append_copy_from_foreign_head( + replacement_right.iter_end(), + right.iter_begin(), + right.iter_end()); + replacement_right.set_size(right.get_size() + left.get_size()- pivot_idx); + } else { + replacement_left.copy_from_foreign_head( + replacement_left.iter_end(), + left.iter_begin(), + left.iter_end()); + replacement_left.set_size(left.get_size()); + + replacement_left.append_copy_from_foreign_head( + replacement_left.iter_end(), + right.iter_begin(), + right.iter_idx(pivot_idx - left.get_size())); + replacement_left.set_size(pivot_idx); + + replacement_right.copy_from_foreign_back( + replacement_right.iter_end(), + right.iter_idx(pivot_idx - left.get_size()), + right.iter_end()); + replacement_right.set_size(right.get_size() + left.get_size() - pivot_idx); + } + + auto [lmeta, rmeta] = Meta::rebalance( + left.get_meta(), right.get_meta()); + replacement_left.set_meta(lmeta); + replacement_right.set_meta(rmeta); + return replacement_pivot; + } + +private: + void inner_insert( + iterator iter, + const omap_inner_key_t key, + const std::string &val) { + if (VALIDATE_INVARIANTS) { + if (iter != iter_begin()) { + assert((iter - 1)->get_node_val() < val); + } + if (iter != iter_end()) { + assert(iter->get_node_val() > val); + } + assert(is_overflow(val.size() + 1) == false); + } + if (get_size() != 0 && iter != iter_end()) + local_move_back(key, iter + 1, iter, iter_end()); + + iter->set_node_key(key); + set_size(get_size() + 1); + iter->set_node_val(val); + } + + void inner_update( + iterator iter, + omap_inner_key_t key ) { + assert(iter != iter_end()); + iter->set_node_key(key); + } + + void inner_replace( + iterator iter, + const omap_inner_key_t &key, + const std::string &val) { + assert(iter != iter_end()); + if (VALIDATE_INVARIANTS) { + if (iter != iter_begin()) { + assert((iter - 1)->get_node_val() < val); + } + if ((iter + 1) != iter_end()) { + assert((iter + 1)->get_node_val() > val); + } + assert(is_overflow(val.size() + 1) == false); + } + inner_remove(iter); + inner_insert(iter, key, val); + } + + void inner_remove(iterator iter) { + assert(iter != iter_end()); + if ((iter + 1) != iter_end()) + local_move_ahead(iter, iter + 1, iter_end()); + set_size(get_size() - 1); + } + + /** + * get_key_ptr + * + * Get pointer to start of key array + */ + omap_inner_key_le_t *get_node_key_ptr() { + return L::Partial(1, 1, get_size()).template Pointer<2>(buf); + } + const omap_inner_key_le_t *get_node_key_ptr() const { + return L::Partial(1, 1, get_size()).template Pointer<2>(buf); + } + + /** + * copy_from_foreign_head + * + * Copy from another node begin entries to this node. + * [from_src, to_src) is another node entry range. + * tgt is this node entry to copy to. + * tgt and from_src must be from different nodes. + * from_src and to_src must be in the same node. + */ + static void copy_from_foreign_head( + iterator tgt, + const_iterator from_src, + const_iterator to_src) { + assert(tgt->node != from_src->node); + assert(to_src->node == from_src->node); + void* des = tgt.node->from_end((to_src -1)->get_node_key().key_off); + void* src = (to_src - 1)->get_node_val_ptr(); + size_t len = (to_src -1)->get_node_key().key_off; + memcpy(des, src, len); + memcpy( + tgt->get_node_key_ptr(), from_src->get_node_key_ptr(), + to_src->get_node_key_ptr() - from_src->get_node_key_ptr()); + } + + /** + * copy_from_foreign_back + * + * Copy from another node back entries to this node. + * [from_src, to_src) is another node entry range. + * tgt is this node entry to copy to. + * tgt and from_src must be from different nodes. + * from_src and to_src must be in the same node. + */ + void copy_from_foreign_back( + iterator tgt, + const_iterator from_src, + const_iterator to_src) { + assert(tgt->node != from_src->node); + assert(to_src->node == from_src->node); + auto offset = from_src.get_index() == 0? 0: (from_src-1)->get_node_key().key_off; + void* des = tgt.node->from_end((to_src -1)->get_node_key().key_off - offset); + void* src = (to_src - 1)->get_node_val_ptr(); + size_t len = from_src.get_index() == 0? (to_src -1)->get_node_key().key_off: + (from_src-1)->get_node_val_ptr() - (to_src -1)->get_node_val_ptr(); + memcpy(des, src, len); + memcpy( + tgt->get_node_key_ptr(), from_src->get_node_key_ptr(), + to_src->get_node_key_ptr() - from_src->get_node_key_ptr()); + if ( from_src.get_index() == 0) + return; + + omap_inner_key_t key = (from_src - 1)->get_node_key(); + auto end_idx = tgt.get_index() + to_src.get_index() - from_src.get_index(); + for (auto ite = tgt; ite.get_index() != end_idx; ite++) { + omap_inner_key_t node_key = ite->get_node_key(); + node_key.key_off -= key.key_off; + ite->set_node_key(node_key); + } + } + + /** + * append copy_from_foreign_ahead + * + * append another node head entries to this node back. + * [from_src, to_src) is another node entry range. + * tgt is this node entry to copy to. + * tgt and from_src must be from different nodes. + * from_src and to_src must be in the same node. + */ + void append_copy_from_foreign_head( + iterator tgt, + const_iterator from_src, + const_iterator to_src) { + assert(tgt->node != from_src->node); + assert(to_src->node == from_src->node); + if (from_src == to_src) + return; + + void* des = tgt.node->from_end((to_src -1)->get_node_key().key_off + (tgt - 1)->get_node_key().key_off); + void* src = (to_src - 1)->get_node_val_ptr(); + size_t len = (to_src -1)->get_node_key().key_off; + memcpy(des, src, len); + memcpy( + tgt->get_node_key_ptr(), from_src->get_node_key_ptr(), + to_src->get_node_key_ptr() - from_src->get_node_key_ptr()); + omap_inner_key_t key = (tgt - 1)->get_node_key(); + auto end_idx = tgt.get_index() + to_src.get_index() - from_src.get_index(); + for (auto ite = tgt; ite.get_index() != end_idx; ite++) { + omap_inner_key_t node_key = ite->get_node_key(); + node_key.key_off += key.key_off; + ite->set_node_key(node_key); + } + } + + /** + * local_move_back + * + * move this node entries range [from_src, to_src) back to tgt position. + * + * tgt, from_src, and to_src must be from the same node. + */ + static void local_move_back( + omap_inner_key_t key, + iterator tgt, + iterator from_src, + iterator to_src) { + assert(tgt->node == from_src->node); + assert(to_src->node == from_src->node); + void* des = (to_src-1)->get_node_val_ptr() - key.key_len; + void* src = (to_src-1)->get_node_val_ptr(); + size_t len = from_src.get_index() == 0? + from_src->node->buf + BlockSize - (to_src-1)->get_node_val_ptr(): + (from_src-1)->get_node_val_ptr() - (to_src-1)->get_node_val_ptr(); + + memmove(des, src, len); + for ( auto ite = from_src; ite < to_src; ite++) { + omap_inner_key_t node_key = ite->get_node_key(); + node_key.key_off += key.key_len; + ite->set_node_key(node_key); + } + memmove( + tgt->get_node_key_ptr(), from_src->get_node_key_ptr(), + to_src->get_node_key_ptr() - from_src->get_node_key_ptr()); + } + + /** + * local_move_ahead + * + * move this node entries range [from_src, to_src) ahead to tgt position. + * + * tgt, from_src, and to_src must be from the same node. + */ + static void local_move_ahead( + iterator tgt, + iterator from_src, + iterator to_src) { + assert(tgt->node == from_src->node); + assert(to_src->node == from_src->node); + assert(from_src.get_index() != 0); + omap_inner_key_t key = tgt->get_node_key(); + void* des = (to_src-1)->get_node_val_ptr() + key.key_len; + void* src = (to_src-1)->get_node_val_ptr(); + size_t len = (from_src-1)->get_node_val_ptr() - (to_src-1)->get_node_val_ptr(); + memmove(des, src, len); + for ( auto ite = from_src; ite < to_src; ite++) { + omap_inner_key_t node_key = ite->get_node_key(); + node_key.key_off -= key.key_len; + ite->set_node_key(node_key); + } + memmove( + tgt->get_node_key_ptr(), from_src->get_node_key_ptr(), + to_src->get_node_key_ptr() - from_src->get_node_key_ptr()); + } + +}; + +template < + typename Meta, + typename MetaInt, + bool VALIDATE_INVARIANTS> +class StringKVLeafNodeLayout { + char *buf = nullptr; + + using L = absl::container_internal::Layout; + static constexpr L layout{1, 1, 1}; // = L::Partial(1, 1, 1); + +public: + template + struct iter_t { + friend class StringKVLeafNodeLayout; + using parent_t = typename crimson::common::maybe_const_t::type; + + parent_t node; + uint16_t index; + + iter_t( + parent_t parent, + uint16_t index) : node(parent), index(index) {} + + iter_t(const iter_t &) = default; + iter_t(iter_t &&) = default; + iter_t &operator=(const iter_t &) = default; + iter_t &operator=(iter_t &&) = default; + + operator iter_t() const { + static_assert(!is_const); + return iter_t(node, index); + } + + // Work nicely with for loops without requiring a nested type. + iter_t &operator*() { return *this; } + iter_t *operator->() { return this; } + + iter_t operator++(int) { + auto ret = *this; + ++index; + return ret; + } + + iter_t &operator++() { + ++index; + return *this; + } + + uint16_t operator-(const iter_t &rhs) const { + assert(rhs.node == node); + return index - rhs.index; + } + + iter_t operator+(uint16_t off) const { + return iter_t( + node, + index + off); + } + iter_t operator-(uint16_t off) const { + return iter_t( + node, + index - off); + } + + uint16_t operator<(const iter_t &rhs) const { + assert(rhs.node == node); + return index < rhs.index; + } + + bool operator==(const iter_t &rhs) const { + assert(node == rhs.node); + return rhs.index == index; + } + + bool operator!=(const iter_t &rhs) const { + assert(node == rhs.node); + return index != rhs.index; + } + + omap_leaf_key_t get_node_key() const { + omap_leaf_key_le_t kint = node->get_node_key_ptr()[index]; + return omap_leaf_key_t(kint); + } + + char *get_node_val_ptr() { + auto tail = node->buf + BlockSize; + if ( *this == node->iter_end()) + return tail; + else + return tail - static_cast(get_node_key().key_off); + } + + const char *get_node_val_ptr() const { + auto tail = node->buf + BlockSize; + if ( *this == node->iter_end()) + return tail; + else + return tail - static_cast(get_node_key().key_off); + } + + char *get_string_val_ptr() { + auto tail = node->buf + BlockSize; + return tail - static_cast(get_node_key().val_off); + } + + const char *get_string_val_ptr() const { + auto tail = node->buf + BlockSize; + return tail - static_cast(get_node_key().val_off); + } + + void set_node_val(std::string val) const { + static_assert(!is_const); + std::strcpy((char*)get_node_val_ptr(), val.c_str()); //copy char* to char* include "\0" + } + + std::string get_node_val() { + std::string s(get_node_val_ptr()); + return s; + } + std::string get_node_val() const{ + std::string s(get_node_val_ptr()); + return s; + } + + void set_string_val(std::string val) { + static_assert(!is_const); + std::strcpy((char*)get_string_val_ptr(), val.c_str()); //copy char* to char* include "\0" + } + + std::string get_string_val() const { + std::string s(get_string_val_ptr()); + return s; + } + + bool contains(const std::string &key) const { + auto next = *this + 1; + if (*this == node->iter_begin()){ + if (next->get_node_val() > key) + return true; + else + return false; + } + if (next == node->iter_end()) + return get_node_val() <= key; + + return (get_node_val() <= key) && (next->get_node_val() > key); + } + + uint16_t get_index() const { + return index; + } + + private: + void set_node_key(omap_leaf_key_t _lb) const { + static_assert(!is_const); + omap_leaf_key_le_t lb; + lb = _lb; + node->get_node_key_ptr()[index] = lb; + } + + typename crimson::common::maybe_const_t::type get_node_key_ptr() const { + return reinterpret_cast< + typename crimson::common::maybe_const_t::type>( + node->get_node_key_ptr() + index); + } + }; + using const_iterator = iter_t; + using iterator = iter_t; + + struct delta_leaf_t { + enum class op_t : uint8_t { + INSERT, + UPDATE, + REMOVE, + } op; + std::string key; + std::string val; + + void replay(StringKVLeafNodeLayout &l) { + switch (op) { + case op_t::INSERT: { + l.leaf_insert(l.string_lower_bound(key), key, val); + break; + } + case op_t::UPDATE: { + auto iter = l.find_string_key(key); + assert(iter != l.iter_end()); + l.leaf_update(iter, key, val); + break; + } + case op_t::REMOVE: { + auto iter = l.find_string_key(key); + assert(iter != l.iter_end()); + l.leaf_remove(iter); + break; + } + default: + assert(0 == "Impossible"); + } + } + + bool operator==(const delta_leaf_t &rhs) const { + return op == rhs.op && + key == rhs.key && + val == rhs.val; + } + }; + +public: + class delta_leaf_buffer_t { + std::vector buffer; + public: + bool empty() const { + return buffer.empty(); + } + void insert( + const std::string &key, + const std::string &val) { + buffer.push_back( + delta_leaf_t{ + delta_leaf_t::op_t::INSERT, + key, + val + }); + } + void update( + const std::string &key, + const std::string &val) { + buffer.push_back( + delta_leaf_t{ + delta_leaf_t::op_t::UPDATE, + key, + val + }); + } + void remove(std::string key) { + buffer.push_back( + delta_leaf_t{ + delta_leaf_t::op_t::REMOVE, + key, + "" + }); + } + + void replay(StringKVLeafNodeLayout &node) { + for (auto &i: buffer) { + i.replay(node); + } + } + size_t get_bytes() const { + size_t size = 0; + for (auto &i: buffer) { + size += sizeof(i.op_t) + i.key.size() + i.val.size(); + } + return size; + } + //copy out + void encode(ceph::bufferlist &bl) { + using ceph::encode; + uint32_t num = buffer.size(); + encode(num, bl); + for (auto &&i: buffer) { + encode(i.op, bl); + encode(i.key, bl); + //bl.append((char*)&(i.key), sizeof(i.key)); + encode(i.val, bl); + } + buffer.clear(); + } + //copy in + void decode(const ceph::bufferlist &bl) { + using ceph::decode; + auto p = bl.cbegin(); + uint32_t num; + decode (num, p); + while (num--) { + delta_leaf_t delta; + decode(delta.op, p); + decode(delta.key, p); + decode(delta.val, p); + buffer.push_back(delta); + } + } + + bool operator==(const delta_leaf_buffer_t &rhs) const { + return buffer == rhs.buffer; + } + }; + + void journal_leaf_insert( + const_iterator _iter, + const std::string &key, + const std::string &val, + delta_leaf_buffer_t *recorder) { + auto iter = iterator(this, _iter.index); + if (recorder) { + recorder->insert( + key, + val); + } + leaf_insert(iter, key, val); + } + + void journal_leaf_update( + const_iterator _iter, + const std::string &key, + const std::string &val, + delta_leaf_buffer_t *recorder) { + auto iter = iterator(this, _iter.index); + if (recorder) { + recorder->remove(iter->get_node_val()); + recorder->insert(key, val); + } + leaf_update(iter, key, val); + } + + + void journal_leaf_remove( + const_iterator _iter, + delta_leaf_buffer_t *recorder) { + auto iter = iterator(this, _iter.index); + if (recorder) { + recorder->remove(iter->get_node_val()); + } + leaf_remove(iter); + } + + StringKVLeafNodeLayout(char *buf) : + buf(buf) {} + + const_iterator iter_begin() const { + return const_iterator( + this, + 0); + } + + const_iterator iter_end() const { + return const_iterator( + this, + get_size()); + } + + iterator iter_begin() { + return iterator( + this, + 0); + } + + iterator iter_end() { + return iterator( + this, + get_size()); + } + + const_iterator iter_idx(uint16_t off) const { + return const_iterator( + this, + off); + } + + const_iterator string_lower_bound(std::string str) const { + uint16_t start = 0, end = get_size(); + while (start != end) { + unsigned mid = (start + end) / 2; + const_iterator iter(this, mid); + std::string s = iter->get_node_val(); + if (s < str) + start = ++mid; + if (s > str) + end = mid; + if (s == str) + return iter; + } + return const_iterator(this, start); + } + + iterator string_lower_bound(std::string str) { + const auto &tref = *this; + return iterator(this, tref.string_lower_bound(str).index); + } + + const_iterator string_upper_bound(std::string str) const { + auto ret = iter_begin(); + for (; ret != iter_end(); ++ret) { + std::string s = ret->get_node_val(); + if (s > str) + break; + } + return ret; + } + + iterator string_upper_bound(std::string str) { + const auto &tref = *this; + return iterator(this, tref.string_upper_bound(str).index); + } + + const_iterator find_string_key(const std::string &str) const { + auto ret = iter_begin(); + for (; ret != iter_end(); ++ret) { + std::string s = ret->get_node_val(); + if (s == str) + break; + } + return ret; + } + iterator find_string_key(const std::string &str) { + const auto &tref = *this; + return iterator(this, tref.find_string_key(str).index); + } + + const_iterator get_split_pivot() const { + uint32_t total_size = omap_leaf_key_t(get_node_key_ptr()[get_size()-1]).key_off; + uint32_t pivot_size = total_size / 2; + uint32_t size = 0; + for (auto ite = iter_begin(); ite < iter_end(); ite++) { + auto node_key = ite->get_node_key(); + size += node_key.key_len + node_key.val_len; + if (size >= pivot_size){ + return ite; + } + } + return iter_end(); + } + + uint32_t get_size() const { + ceph_le32 &size = *layout.template Pointer<0>(buf); + return uint32_t(size); + } + + /** + * set_size + * + * Set size representation to match size + */ + void set_size(uint32_t size) { + ceph_le32 s; + s = size; + *layout.template Pointer<0>(buf) = s; + } + + /** + * get_meta/set_meta + * + * Enables stashing a templated type within the layout. + * Cannot be modified after initial write as it is not represented + * in delta_t + */ + Meta get_meta() const { + MetaInt &metaint = *layout.template Pointer<1>(buf); + return Meta(metaint); + } + void set_meta(const Meta &meta) { + *layout.template Pointer<1>(buf) = MetaInt(meta); + } + + uint32_t used_space() const { + uint32_t count = get_size(); + if (count) { + omap_leaf_key_t last_key = omap_leaf_key_t(get_node_key_ptr()[count-1]); + return last_key.key_off + count * sizeof(omap_leaf_key_le_t); + } else { + return 0; + } + } + + uint32_t free_space() const { + return capacity() - used_space(); + } + + uint32_t capacity() const { + return BlockSize - (reinterpret_cast(layout.template Pointer<2>(buf))- + reinterpret_cast(layout.template Pointer<0>(buf))); + } + char* from_end(int off) { + return buf + (BlockSize - off); + } + + bool is_overflow(size_t ksize, size_t vsize) const { + return free_space() < (sizeof(omap_leaf_key_le_t) + ksize + vsize); + } + bool below_min() const { + return free_space() > (capacity() / 2); + } + + bool operator==(const StringKVLeafNodeLayout &rhs) const { + if (get_size() != rhs.get_size()) { + return false; + } + + auto iter = iter_begin(); + auto iter2 = rhs.iter_begin(); + while (iter != iter_end()) { + if (iter->get_node_key() != iter2->get_node_key() || + iter->get_node_val() != iter2->get_node_val() || + iter->get_string_val() != iter2->get_string_val()){ + return false; + } + iter++; + iter2++; + } + return true; + } + + /** + * split_into + * + * Takes *this and splits its contents into left and right. + */ + std::string split_into( + StringKVLeafNodeLayout &left, + StringKVLeafNodeLayout &right) const { + auto piviter = get_split_pivot(); + assert (piviter != iter_end()); + + left.copy_from_foreign_head(left.iter_begin(), iter_begin(), piviter); + left.set_size(piviter - iter_begin()); + + right.copy_from_foreign_back(right.iter_begin(), piviter, iter_end()); + right.set_size(iter_end() - piviter); + + auto [lmeta, rmeta] = get_meta().split_into(); + left.set_meta(lmeta); + right.set_meta(rmeta); + + return piviter->get_node_val(); + } + + /** + * merge_from + * + * Takes two nodes and copies their contents into *this. + * + * precondition: left.size() + right.size() < CAPACITY + */ + void merge_from( + const StringKVLeafNodeLayout &left, + const StringKVLeafNodeLayout &right) + { + copy_from_foreign_head( + iter_end(), + left.iter_begin(), + left.iter_end()); + set_size(left.get_size()); + append_copy_from_foreign_head( + iter_end(), + right.iter_begin(), + right.iter_end()); + set_size(left.get_size() + right.get_size()); + set_meta(Meta::merge_from(left.get_meta(), right.get_meta())); + } + + /** + * balance_into_new_nodes + * + * Takes the contents of left and right and copies them into + * replacement_left and replacement_right such that + * the size of replacement_left side just >= 1/2 of the total size (left + right). + */ + static std::string balance_into_new_nodes( + const StringKVLeafNodeLayout &left, + const StringKVLeafNodeLayout &right, + StringKVLeafNodeLayout &replacement_left, + StringKVLeafNodeLayout &replacement_right) + { + uint32_t left_size = omap_leaf_key_t(left.get_node_key_ptr()[left.get_size()-1]).key_off; + uint32_t right_size = omap_leaf_key_t(right.get_node_key_ptr()[right.get_size()-1]).key_off; + uint32_t total = left_size + right_size; + uint32_t pivot_size = total / 2; + uint32_t pivot_idx = 0; + if (pivot_size < left_size) { + uint32_t size = 0; + for (auto ite = left.iter_begin(); ite < left.iter_end(); ite++) { + auto node_key = ite->get_node_key(); + size += node_key.key_len + node_key.val_len; + if (size >= pivot_size){ + pivot_idx = ite.get_index(); + break; + } + } + } else { + uint32_t more_size = pivot_size - left_size; + uint32_t size = 0; + for (auto ite = right.iter_begin(); ite < right.iter_end(); ite++) { + auto node_key = ite->get_node_key(); + size += node_key.key_len + node_key.val_len; + if (size >= more_size){ + pivot_idx = ite.get_index() + left.get_size(); + break; + } + } + } + + auto replacement_pivot = pivot_idx >= left.get_size() ? + right.iter_idx(pivot_idx - left.get_size())->get_node_val() : + left.iter_idx(pivot_idx)->get_node_val(); + + if (pivot_size < left_size) { + replacement_left.copy_from_foreign_head( + replacement_left.iter_end(), + left.iter_begin(), + left.iter_idx(pivot_idx)); + replacement_left.set_size(pivot_idx); + + replacement_right.copy_from_foreign_back( + replacement_right.iter_end(), + left.iter_idx(pivot_idx), + left.iter_end()); + replacement_right.set_size(left.get_size() - pivot_idx); + + replacement_right.append_copy_from_foreign_head( + replacement_right.iter_end(), + right.iter_begin(), + right.iter_end()); + replacement_right.set_size(right.get_size() + left.get_size() - pivot_idx); + } else { + replacement_left.copy_from_foreign_head( + replacement_left.iter_end(), + left.iter_begin(), + left.iter_end()); + replacement_left.set_size(left.get_size()); + + replacement_left.append_copy_from_foreign_head( + replacement_left.iter_end(), + right.iter_begin(), + right.iter_idx(pivot_idx - left.get_size())); + replacement_left.set_size(pivot_idx); + + replacement_right.copy_from_foreign_back( + replacement_right.iter_end(), + right.iter_idx(pivot_idx - left.get_size()), + right.iter_end()); + replacement_right.set_size(right.get_size() + left.get_size() - pivot_idx); + } + + auto [lmeta, rmeta] = Meta::rebalance( + left.get_meta(), right.get_meta()); + replacement_left.set_meta(lmeta); + replacement_right.set_meta(rmeta); + return replacement_pivot; + } + +private: + void leaf_insert( + iterator iter, + const std::string &key, + const std::string &val) { + if (VALIDATE_INVARIANTS) { + if (iter != iter_begin()) { + assert((iter - 1)->get_node_val() < key); + } + if (iter != iter_end()) { + assert(iter->get_node_val() > key); + } + assert(is_overflow(key.size() + 1, val.size() + 1) == false); + } + omap_leaf_key_t node_key; + if (iter == iter_begin()) { + node_key.key_off = key.size() + 1 + val.size() + 1; + node_key.key_len = key.size() + 1; + node_key.val_off = val.size() + 1; + node_key.val_len = val.size() + 1; + } else { + node_key.key_off = (iter - 1)->get_node_key().key_off + (key.size() + 1 + val.size() + 1); + node_key.key_len = key.size() + 1; + node_key.val_off = (iter - 1)->get_node_key().key_off + (val.size() + 1); + node_key.val_len = val.size() + 1; + } + if (get_size() != 0 && iter != iter_end()) + local_move_back(node_key, iter + 1, iter, iter_end()); + + iter->set_node_key(node_key); + set_size(get_size() + 1); + iter->set_node_val(key); + iter->set_string_val(val); + } + + void leaf_update( + iterator iter, + const std::string &key, + const std::string &val) { + assert(iter != iter_end()); + if (VALIDATE_INVARIANTS) { + assert(is_overflow(0, val.size() + 1) == false); + } + leaf_remove(iter); + leaf_insert(iter, key, val); + } + + void leaf_remove(iterator iter) { + assert(iter != iter_end()); + if ((iter + 1) != iter_end()) + local_move_ahead(iter, iter + 1, iter_end()); + set_size(get_size() - 1); + } + + /** + * get_key_ptr + * + * Get pointer to start of key array + */ + omap_leaf_key_le_t *get_node_key_ptr() { + return L::Partial(1, 1, get_size()).template Pointer<2>(buf); + } + const omap_leaf_key_le_t *get_node_key_ptr() const { + return L::Partial(1, 1, get_size()).template Pointer<2>(buf); + } + + /** + * copy_from_foreign_head + * + * Copy from another node begin entries to this node. + * [from_src, to_src) is another node entry range. + * tgt is this node entry to copy to. + * tgt and from_src must be from different nodes. + * from_src and to_src must be in the same node. + */ + static void copy_from_foreign_head( + iterator tgt, + const_iterator from_src, + const_iterator to_src) { + assert(tgt->node != from_src->node); + assert(to_src->node == from_src->node); + void* des = tgt.node->from_end((to_src -1)->get_node_key().key_off); + void* src = (to_src - 1)->get_node_val_ptr(); + size_t len = (to_src -1)->get_node_key().key_off; + memcpy(des, src, len); + memcpy( + tgt->get_node_key_ptr(), from_src->get_node_key_ptr(), + to_src->get_node_key_ptr() - from_src->get_node_key_ptr()); + } + + /** + * copy_from_foreign_back + * + * Copy from another node back entries to this node. + * [from_src, to_src) is another node entry range. + * tgt is this node entry to copy to. + * tgt and from_src must be from different nodes. + * from_src and to_src must be in the same node. + */ + void copy_from_foreign_back( + iterator tgt, + const_iterator from_src, + const_iterator to_src) { + assert(tgt->node != from_src->node); + assert(to_src->node == from_src->node); + auto offset = from_src.get_index() == 0? 0: (from_src-1)->get_node_key().key_off; + + void* des = tgt.node->from_end((to_src -1)->get_node_key().key_off - offset); + void* src = (to_src - 1)->get_node_val_ptr(); + size_t len = from_src.get_index() == 0? (to_src -1)->get_node_key().key_off: + (from_src-1)->get_node_val_ptr() - (to_src -1)->get_node_val_ptr(); + memcpy(des, src, len); + memcpy( + tgt->get_node_key_ptr(), from_src->get_node_key_ptr(), + to_src->get_node_key_ptr() - from_src->get_node_key_ptr()); + if ( from_src.get_index() == 0) + return; + + omap_leaf_key_t key = (from_src - 1)->get_node_key(); + for (auto ite = tgt; ite.get_index() < (tgt.get_index() + to_src.get_index() - from_src.get_index()); ite++) { + omap_leaf_key_t node_key = ite->get_node_key(); + node_key.key_off -= key.key_off; + node_key.val_off -= key.key_off; + ite->set_node_key(node_key); + } + } + + /** + * append copy_from_foreign_ahead + * + * append another node head entries to this node back. + * [from_src, to_src) is another node entry range. + * tgt is this node entry to copy to. + * tgt and from_src must be from different nodes. + * from_src and to_src must be in the same node. + */ + void append_copy_from_foreign_head( + iterator tgt, + const_iterator from_src, + const_iterator to_src) { + assert(tgt->node != from_src->node); + assert(to_src->node == from_src->node); + if (from_src == to_src) + return; + + void* des = tgt.node->from_end((to_src -1)->get_node_key().key_off + (tgt - 1)->get_node_key().key_off); + void* src = (to_src - 1)->get_node_val_ptr(); + size_t len = (to_src -1)->get_node_key().key_off; + memcpy(des, src, len); + memcpy( + tgt->get_node_key_ptr(), from_src->get_node_key_ptr(), + to_src->get_node_key_ptr() - from_src->get_node_key_ptr()); + omap_leaf_key_t key = (tgt - 1)->get_node_key(); + auto end_idx = tgt.get_index() + to_src.get_index() - from_src.get_index(); + for (auto ite = tgt; ite.get_index() != end_idx; ite++) { + omap_leaf_key_t node_key = ite->get_node_key(); + node_key.key_off += key.key_off; + node_key.val_off += key.key_off; + ite->set_node_key(node_key); + } + } + + /** + * local_move_back + * + * move this node entries range [from_src, to_src) back to tgt position. + * + * tgt, from_src, and to_src must be from the same node. + */ + static void local_move_back( + omap_leaf_key_t key, + iterator tgt, + iterator from_src, + iterator to_src) { + assert(tgt->node == from_src->node); + assert(to_src->node == from_src->node); + void* des = (to_src-1)->get_node_val_ptr() - (key.key_len + key.val_len); + void* src = (to_src-1)->get_node_val_ptr(); + size_t len = from_src.get_index() == 0? + from_src->node->buf + BlockSize - (to_src-1)->get_node_val_ptr(): + (from_src-1)->get_node_val_ptr() - (to_src-1)->get_node_val_ptr(); + memmove(des, src, len); + for ( auto ite = from_src; ite < to_src; ite++) { + omap_leaf_key_t node_key = ite->get_node_key(); + node_key.key_off += (key.key_len + key.val_len); + node_key.val_off += (key.key_len + key.val_len); + ite->set_node_key(node_key); + } + memmove( + tgt->get_node_key_ptr(), from_src->get_node_key_ptr(), + to_src->get_node_key_ptr() - from_src->get_node_key_ptr()); + } + + /** + * local_move_ahead + * + * move this node entries range [from_src, to_src) ahead to tgt position. + * + * tgt, from_src, and to_src must be from the same node. + */ + static void local_move_ahead( + iterator tgt, + iterator from_src, + iterator to_src) { + assert(tgt->node == from_src->node); + assert(to_src->node == from_src->node); + assert(from_src.get_index() != 0); + omap_leaf_key_t key = tgt->get_node_key(); + void* des = (to_src - 1)->get_node_val_ptr() + key.key_len + key.val_len; + void* src = (to_src - 1)->get_node_val_ptr(); + size_t len = (from_src - 1)->get_node_val_ptr() - (to_src - 1)->get_node_val_ptr(); + memmove(des, src, len); + for ( auto ite = from_src; ite < to_src; ite++) { + omap_leaf_key_t node_key = ite->get_node_key(); + node_key.key_off -= (key.key_len + key.val_len); + node_key.val_off -= (key.key_len + key.val_len); + ite->set_node_key(node_key); + } + memmove( + tgt->get_node_key_ptr(), from_src->get_node_key_ptr(), + to_src->get_node_key_ptr() - from_src->get_node_key_ptr()); + } + +}; + +} diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc index ff43b1e515bf..e4e52dfd0d26 100644 --- a/src/crimson/os/seastore/seastore_types.cc +++ b/src/crimson/os/seastore/seastore_types.cc @@ -59,6 +59,10 @@ std::ostream &operator<<(std::ostream &out, extent_types_t t) return out << "EXTMAP_LEAF"; case extent_types_t::ONODE_BLOCK_STAGED: return out << "ONODE_BLOCK_STAGED"; + case extent_types_t::OMAP_INNER: + return out << "OMAP_INNER"; + case extent_types_t::OMAP_LEAF: + return out << "OMAP_LEAF"; case extent_types_t::TEST_BLOCK: return out << "TEST_BLOCK"; case extent_types_t::TEST_BLOCK_PHYSICAL: diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index cb8480268e16..28ffdad18fa6 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -281,7 +281,9 @@ enum class extent_types_t : uint8_t { ONODE_BLOCK = 3, EXTMAP_INNER = 4, EXTMAP_LEAF = 5, - ONODE_BLOCK_STAGED = 6, + OMAP_INNER = 6, + OMAP_LEAF = 7, + ONODE_BLOCK_STAGED = 8, // Test Block Types TEST_BLOCK = 0xF0, diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 7b86631e2ca7..63bf46e8e450 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -168,6 +168,23 @@ TransactionManager::ref_ret TransactionManager::dec_ref( }); } +TransactionManager::refs_ret TransactionManager::dec_ref( + Transaction &t, + std::list offsets) +{ + return seastar::do_with(std::move(offsets), std::list(), + [this, &t] (auto &&offsets, auto &refcnt) { + return crimson::do_for_each(offsets.begin(), offsets.end(), + [this, &t, &refcnt] (auto &laddr) { + return dec_ref(t, laddr).safe_then([&refcnt] (auto ref) { + refcnt.push_back(ref); + }); + }).safe_then([&refcnt] { + return ref_ertr::make_ready_future>(std::move(refcnt)); + }); + }); +} + TransactionManager::submit_transaction_ertr::future<> TransactionManager::submit_transaction( TransactionRef t) diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index d28fd0b87923..8258a81a9f7d 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -176,6 +177,12 @@ public: Transaction &t, laddr_t offset); + /// remove refcount for list of offset + using refs_ret = ref_ertr::future>; + refs_ret dec_ref( + Transaction &t, + std::list offsets); + /** * alloc_extent * @@ -205,6 +212,35 @@ public: }); } + /* alloc_extents + * + * allocates more than one new blocks of type T. + */ + using alloc_extents_ertr = alloc_extent_ertr; + template + alloc_extents_ertr::future>> + alloc_extents( + Transaction &t, + laddr_t hint, + extent_len_t len, + int num) { + return seastar::do_with(std::vector>(), + [this, &t, hint, len, num] (auto &extents) { + return crimson::do_for_each( + boost::make_counting_iterator(0), + boost::make_counting_iterator(num), + [this, &t, len, hint, &extents] (auto i) { + return alloc_extent(t, hint, len).safe_then( + [&extents](auto &&node) { + extents.push_back(node); + }); + }).safe_then([&extents] { + return alloc_extents_ertr::make_ready_future + >>(std::move(extents)); + }); + }); + } + /** * submit_transaction * diff --git a/src/test/crimson/seastore/CMakeLists.txt b/src/test/crimson/seastore/CMakeLists.txt index 73feebe23c2b..840a59070c7d 100644 --- a/src/test/crimson/seastore/CMakeLists.txt +++ b/src/test/crimson/seastore/CMakeLists.txt @@ -45,4 +45,13 @@ target_link_libraries( ${CMAKE_DL_LIBS} crimson-seastore) +add_executable(unittest_omap_manager + test_omap_manager.cc + ../gtest_seastar.cc) +add_ceph_unittest(unittest_omap_manager) +target_link_libraries( + unittest_omap_manager + ${CMAKE_DL_LIBS} + crimson-seastore) + add_subdirectory(onode_tree) diff --git a/src/test/crimson/seastore/test_omap_manager.cc b/src/test/crimson/seastore/test_omap_manager.cc new file mode 100644 index 000000000000..173d4e6964b1 --- /dev/null +++ b/src/test/crimson/seastore/test_omap_manager.cc @@ -0,0 +1,604 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "test/crimson/gtest_seastar.h" + +#include "test/crimson/seastore/transaction_manager_test_state.h" + +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/transaction_manager.h" +#include "crimson/os/seastore/segment_manager.h" +#include "crimson/os/seastore/omap_manager.h" + +#include "test/crimson/seastore/test_block.h" + +using namespace crimson; +using namespace crimson::os; +using namespace crimson::os::seastore; +using namespace std; + +namespace { + [[maybe_unused]] seastar::logger& logger() { + return crimson::get_logger(ceph_subsys_test); + } +} + +struct omap_manager_test_t : + public seastar_test_suite_t, + TMTestState { + + OMapManagerRef omap_manager; + + omap_manager_test_t() {} + + seastar::future<> set_up_fut() final { + return tm_setup().then([this] { + omap_manager = omap_manager::create_omap_manager(*tm); + return seastar::now(); + }); + } + + seastar::future<> tear_down_fut() final { + return tm_teardown().then([this] { + omap_manager.reset(); + return seastar::now(); + }); + } + + using test_omap_t = std::map; + test_omap_t test_omap_mappings; + + bool set_key( + omap_root_t &omap_root, + Transaction &t, + string &key, + string &val) { + auto ret = omap_manager->omap_set_key(omap_root, t, key, val).unsafe_get0(); + EXPECT_EQ(ret, true); + test_omap_mappings[key] = val; + return ret; + } + + std::pair get_value( + omap_root_t &omap_root, + Transaction &t, + const string &key) { + auto ret = omap_manager->omap_get_value(omap_root, t, key).unsafe_get0(); + EXPECT_EQ(key, ret.first); + return ret; + } + + bool rm_key( + omap_root_t &omap_root, + Transaction &t, + const string &key) { + auto ret = omap_manager->omap_rm_key(omap_root, t, key).unsafe_get0(); + EXPECT_EQ(ret, true); + test_omap_mappings.erase(test_omap_mappings.find(key)); + return ret; + } + + list_keys_result_t list_keys( + omap_root_t &omap_root, + Transaction &t, + std::string &start, + size_t max = MAX_SIZE) { + auto ret = omap_manager->omap_list_keys(omap_root, t, start, max).unsafe_get0(); + if (start == "" && max == MAX_SIZE) { + EXPECT_EQ(test_omap_mappings.size(), ret.keys.size()); + for ( auto &i : ret.keys) { + auto it = test_omap_mappings.find(i); + EXPECT_NE(it, test_omap_mappings.end()); + EXPECT_EQ(i, it->first); + } + } else { + size_t i =0; + auto it = test_omap_mappings.find(start); + for (; it != test_omap_mappings.end() && i < max; it++) { + EXPECT_EQ(ret.keys[i], it->first); + i++; + } + if (it == test_omap_mappings.end()) { + EXPECT_EQ(ret.next, ""); + } else { + EXPECT_EQ(ret.keys.size(), max); + EXPECT_EQ(ret.next, it->first); + } + } + return ret; + } + + list_kvs_result_t list( + omap_root_t &omap_root, + Transaction &t, + std::string &start, + size_t max = MAX_SIZE) { + auto ret = omap_manager->omap_list(omap_root, t, start, max).unsafe_get0(); + if (start == "" && max == MAX_SIZE) { + EXPECT_EQ(test_omap_mappings.size(), ret.kvs.size()); + for ( auto &i : ret.kvs) { + auto it = test_omap_mappings.find(i.first); + EXPECT_NE(it, test_omap_mappings.end()); + EXPECT_EQ(i.second, it->second); + } + } else { + size_t i = 0; + auto it = test_omap_mappings.find(start); + for (; it != test_omap_mappings.end() && i < max; it++) { + EXPECT_EQ(ret.kvs[i].first, it->first); + i++; + } + if (it == test_omap_mappings.end()) { + EXPECT_EQ(ret.next, ""); + } else { + EXPECT_EQ(ret.kvs.size(), max); + EXPECT_EQ(ret.next, it->first); + } + } + + return ret; + } + + void clear( + omap_root_t &omap_root, + Transaction &t) { + omap_manager->omap_clear(omap_root, t).unsafe_get0(); + EXPECT_EQ(omap_root.omap_root_laddr, L_ADDR_NULL); + } + + void check_mappings(omap_root_t &omap_root, Transaction &t) { + for (const auto &i: test_omap_mappings){ + auto ret = get_value(omap_root, t, i.first); + EXPECT_EQ(i.first, ret.first); + EXPECT_EQ(i.second, ret.second); + } + } + + void check_mappings(omap_root_t &omap_root) { + auto t = tm->create_transaction(); + check_mappings(omap_root, *t); + } + + void replay() { + logger().debug("{}: begin", __func__); + tm->close().unsafe_get(); + destroy(); + static_cast(&*segment_manager)->remount(); + init(); + tm->mount().unsafe_get(); + omap_manager = omap_manager::create_omap_manager(*tm); + logger().debug("{}: end", __func__); + } +}; + +char* rand_string(char* str, const int len) +{ + int i; + for (i = 0; i < len; ++i) { + switch (rand() % 3) { + case 1: + str[i] = 'A' + rand() % 26; + break; + case 2: + str[i] = 'a' +rand() % 26; + break; + case 0: + str[i] = '0' + rand() % 10; + break; + } + } + str[len] = '\0'; + return str; +} + +TEST_F(omap_manager_test_t, basic) +{ + run_async([this] { + omap_root_t omap_root(0, L_ADDR_NULL); + { + auto t = tm->create_transaction(); + omap_root = omap_manager->initialize_omap(*t).unsafe_get0(); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + + string key = "owner"; + string val = "test"; + { + auto t = tm->create_transaction(); + logger().debug("first transaction"); + [[maybe_unused]] auto setret = set_key(omap_root, *t, key, val); + [[maybe_unused]] auto getret = get_value(omap_root, *t, key); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + { + auto t = tm->create_transaction(); + logger().debug("second transaction"); + [[maybe_unused]] auto getret = get_value(omap_root, *t, key); + [[maybe_unused]] auto rmret = rm_key(omap_root, *t, key); + [[maybe_unused]] auto getret2 = get_value(omap_root, *t, key); + EXPECT_EQ(getret2.second, ""); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + { + auto t = tm->create_transaction(); + logger().debug("third transaction"); + [[maybe_unused]] auto getret = get_value(omap_root, *t, key); + EXPECT_EQ(getret.second, ""); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + }); +} + +TEST_F(omap_manager_test_t, force_leafnode_split) +{ + run_async([this] { + omap_root_t omap_root(0, L_ADDR_NULL); + { + auto t = tm->create_transaction(); + omap_root = omap_manager->initialize_omap(*t).unsafe_get0(); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + const int STR_LEN = 50; + char str[STR_LEN + 1]; + for (unsigned i = 0; i < 40; i++) { + auto t = tm->create_transaction(); + logger().debug("opened transaction"); + for (unsigned j = 0; j < 10; ++j) { + string key(rand_string(str, rand() % STR_LEN)); + string val(rand_string(str, rand() % STR_LEN)); + [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val); + if ((i % 20 == 0) && (j == 5)) { + check_mappings(omap_root, *t); + } + } + logger().debug("force split submit transaction i = {}", i); + tm->submit_transaction(std::move(t)).unsafe_get(); + check_mappings(omap_root); + } + }); +} + +TEST_F(omap_manager_test_t, force_leafnode_split_merge) +{ + run_async([this] { + omap_root_t omap_root(0, L_ADDR_NULL); + { + auto t = tm->create_transaction(); + omap_root = omap_manager->initialize_omap(*t).unsafe_get0(); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + const int STR_LEN = 50; + char str[STR_LEN + 1]; + + for (unsigned i = 0; i < 80; i++) { + auto t = tm->create_transaction(); + logger().debug("opened split_merge transaction"); + for (unsigned j = 0; j < 5; ++j) { + string key(rand_string(str, rand() % STR_LEN)); + string val(rand_string(str, rand() % STR_LEN)); + [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val); + if ((i % 10 == 0) && (j == 3)) { + check_mappings(omap_root, *t); + } + } + logger().debug("submitting transaction"); + tm->submit_transaction(std::move(t)).unsafe_get(); + if (i % 50 == 0) { + check_mappings(omap_root); + } + } + auto t = tm->create_transaction(); + int i = 0; + for (auto &e: test_omap_mappings) { + if (i % 3 != 0) { + [[maybe_unused]] auto rmref= rm_key(omap_root, *t, e.first); + } + + if (i % 10 == 0) { + logger().debug("submitting transaction i= {}", i); + tm->submit_transaction(std::move(t)).unsafe_get(); + t = tm->create_transaction(); + } + if (i % 100 == 0) { + logger().debug("check_mappings i= {}", i); + check_mappings(omap_root, *t); + check_mappings(omap_root); + } + i++; + } + logger().debug("finally submitting transaction "); + tm->submit_transaction(std::move(t)).unsafe_get(); + }); +} + +TEST_F(omap_manager_test_t, force_leafnode_split_merge_fullandbalanced) +{ + run_async([this] { + omap_root_t omap_root(0, L_ADDR_NULL); + { + auto t = tm->create_transaction(); + omap_root = omap_manager->initialize_omap(*t).unsafe_get0(); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + const int STR_LEN = 50; + char str[STR_LEN + 1]; + + for (unsigned i = 0; i < 50; i++) { + auto t = tm->create_transaction(); + logger().debug("opened split_merge transaction"); + for (unsigned j = 0; j < 5; ++j) { + string key(rand_string(str, rand() % STR_LEN)); + string val(rand_string(str, rand() % STR_LEN)); + [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val); + if ((i % 10 == 0) && (j == 3)) { + check_mappings(omap_root, *t); + } + } + logger().debug("submitting transaction"); + tm->submit_transaction(std::move(t)).unsafe_get(); + if (i % 50 == 0) { + check_mappings(omap_root); + } + } + auto t = tm->create_transaction(); + int i = 0; + for (auto &e: test_omap_mappings) { + if (30 < i && i < 100) { + auto val = e; + [[maybe_unused]] auto rmref= rm_key(omap_root, *t, e.first); + } + + if (i % 10 == 0) { + logger().debug("submitting transaction i= {}", i); + tm->submit_transaction(std::move(t)).unsafe_get(); + t = tm->create_transaction(); + } + if (i % 50 == 0) { + logger().debug("check_mappings i= {}", i); + check_mappings(omap_root, *t); + check_mappings(omap_root); + } + i++; + if (i == 100) + break; + } + logger().debug("finally submitting transaction "); + tm->submit_transaction(std::move(t)).unsafe_get(); + check_mappings(omap_root); + }); +} + + +TEST_F(omap_manager_test_t, force_split_listkeys_list_clear) +{ + run_async([this] { + omap_root_t omap_root(0, L_ADDR_NULL); + { + auto t = tm->create_transaction(); + omap_root = omap_manager->initialize_omap(*t).unsafe_get0(); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + const int STR_LEN = 300; + char str[STR_LEN + 1]; + string temp; + for (unsigned i = 0; i < 40; i++) { + auto t = tm->create_transaction(); + logger().debug("opened transaction"); + for (unsigned j = 0; j < 10; ++j) { + string key(rand_string(str, rand() % STR_LEN)); + string val(rand_string(str, rand() % STR_LEN)); + [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val); + if (i == 10) + temp = key; + if ((i % 20 == 0) && (j == 5)) { + check_mappings(omap_root, *t); + } + } + logger().debug("force split submit transaction i = {}", i); + tm->submit_transaction(std::move(t)).unsafe_get(); + check_mappings(omap_root); + } + std::string empty = ""; + auto t = tm->create_transaction(); + [[maybe_unused]] auto keys = list_keys(omap_root, *t, empty); + tm->submit_transaction(std::move(t)).unsafe_get(); + + t = tm->create_transaction(); + keys = list_keys(omap_root, *t, temp, 100); + tm->submit_transaction(std::move(t)).unsafe_get(); + + t = tm->create_transaction(); + [[maybe_unused]] auto ls = list(omap_root, *t, empty); + tm->submit_transaction(std::move(t)).unsafe_get(); + + t = tm->create_transaction(); + ls = list(omap_root, *t, temp, 100); + tm->submit_transaction(std::move(t)).unsafe_get(); + + t = tm->create_transaction(); + clear(omap_root, *t); + tm->submit_transaction(std::move(t)).unsafe_get(); + + }); +} + +TEST_F(omap_manager_test_t, internal_force_split) +{ + run_async([this] { + omap_root_t omap_root(0, L_ADDR_NULL); + { + auto t = tm->create_transaction(); + omap_root = omap_manager->initialize_omap(*t).unsafe_get0(); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + const int STR_LEN = 300; + char str[STR_LEN + 1]; + for (unsigned i = 0; i < 10; i++) { + logger().debug("opened split transaction"); + auto t = tm->create_transaction(); + + for (unsigned j = 0; j < 80; ++j) { + string key(rand_string(str, rand() % STR_LEN)); + string val(rand_string(str, rand() % STR_LEN)); + [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val); + if ((i % 2 == 0) && (j % 50 == 0)) { + check_mappings(omap_root, *t); + } + } + logger().debug("submitting transaction i = {}", i); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + check_mappings(omap_root); + }); +} + +TEST_F(omap_manager_test_t, internal_force_merge_fullandbalanced) +{ + run_async([this] { + omap_root_t omap_root(0, L_ADDR_NULL); + { + auto t = tm->create_transaction(); + omap_root = omap_manager->initialize_omap(*t).unsafe_get0(); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + const int STR_LEN = 300; + char str[STR_LEN + 1]; + + for (unsigned i = 0; i < 8; i++) { + logger().debug("opened split transaction"); + auto t = tm->create_transaction(); + + for (unsigned j = 0; j < 80; ++j) { + string key(rand_string(str, rand() % STR_LEN)); + string val(rand_string(str, rand() % STR_LEN)); + [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val); + if ((i % 2 == 0) && (j % 50 == 0)) { + check_mappings(omap_root, *t); + } + } + logger().debug("submitting transaction"); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + auto t = tm->create_transaction(); + int i = 0; + for (auto &e: test_omap_mappings) { + auto val = e; + [[maybe_unused]] auto rmref= rm_key(omap_root, *t, e.first); + + if (i % 10 == 0) { + logger().debug("submitting transaction i= {}", i); + tm->submit_transaction(std::move(t)).unsafe_get(); + t = tm->create_transaction(); + } + if (i % 50 == 0) { + logger().debug("check_mappings i= {}", i); + check_mappings(omap_root, *t); + check_mappings(omap_root); + } + i++; + } + logger().debug("finally submitting transaction "); + tm->submit_transaction(std::move(t)).unsafe_get(); + check_mappings(omap_root); + }); +} + +TEST_F(omap_manager_test_t, replay) +{ + run_async([this] { + omap_root_t omap_root(0, L_ADDR_NULL); + { + auto t = tm->create_transaction(); + omap_root = omap_manager->initialize_omap(*t).unsafe_get0(); + tm->submit_transaction(std::move(t)).unsafe_get(); + replay(); + } + const int STR_LEN = 300; + char str[STR_LEN + 1]; + + for (unsigned i = 0; i < 8; i++) { + logger().debug("opened split transaction"); + auto t = tm->create_transaction(); + + for (unsigned j = 0; j < 80; ++j) { + string key(rand_string(str, rand() % STR_LEN)); + string val(rand_string(str, rand() % STR_LEN)); + [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val); + if ((i % 2 == 0) && (j % 50 == 0)) { + check_mappings(omap_root, *t); + } + } + logger().debug("submitting transaction i = {}", i); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + replay(); + check_mappings(omap_root); + + auto t = tm->create_transaction(); + int i = 0; + for (auto &e: test_omap_mappings) { + auto val = e; + [[maybe_unused]] auto rmref= rm_key(omap_root, *t, e.first); + + if (i % 10 == 0) { + logger().debug("submitting transaction i= {}", i); + tm->submit_transaction(std::move(t)).unsafe_get(); + replay(); + t = tm->create_transaction(); + } + if (i % 50 == 0) { + logger().debug("check_mappings i= {}", i); + check_mappings(omap_root, *t); + check_mappings(omap_root); + } + i++; + } + logger().debug("finally submitting transaction "); + tm->submit_transaction(std::move(t)).unsafe_get(); + replay(); + check_mappings(omap_root); + }); +} + + +TEST_F(omap_manager_test_t, internal_force_split_to_root) +{ + run_async([this] { + omap_root_t omap_root(0, L_ADDR_NULL); + { + auto t = tm->create_transaction(); + omap_root = omap_manager->initialize_omap(*t).unsafe_get0(); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + const int STR_LEN = 300; + char str[STR_LEN + 1]; + + logger().debug("set big keys"); + for (unsigned i = 0; i < 53; i++) { + auto t = tm->create_transaction(); + + for (unsigned j = 0; j < 8; ++j) { + string key(rand_string(str, STR_LEN)); + string val(rand_string(str, STR_LEN)); + [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val); + } + logger().debug("submitting transaction i = {}", i); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + logger().debug("set small keys"); + const int STR_LEN_2 = 100; + char str_2[STR_LEN_2 + 1]; + for (unsigned i = 0; i < 100; i++) { + auto t = tm->create_transaction(); + + for (unsigned j = 0; j < 8; ++j) { + string key(rand_string(str_2, STR_LEN_2)); + string val(rand_string(str_2, STR_LEN_2)); + [[maybe_unused]] auto addref = set_key(omap_root, *t, key, val); + } + logger().debug("submitting transaction last"); + tm->submit_transaction(std::move(t)).unsafe_get(); + } + check_mappings(omap_root); + }); +}