From 25722fba9545951c765510ce888e61c813ced1aa Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Fri, 18 Feb 2022 17:36:10 +0800 Subject: [PATCH] crimson/os/seastore: add backref manager Signed-off-by: Xuehan Xu --- src/crimson/os/seastore/CMakeLists.txt | 2 + .../os/seastore/backref/backref_tree_node.h | 4 +- .../seastore/backref/btree_backref_manager.cc | 473 ++++++++++++++++++ .../seastore/backref/btree_backref_manager.h | 115 +++++ src/crimson/os/seastore/backref_manager.cc | 20 + src/crimson/os/seastore/backref_manager.h | 147 ++++++ src/crimson/os/seastore/cache.h | 4 + src/crimson/os/seastore/cached_extent.h | 5 + src/crimson/os/seastore/seastore_types.h | 3 + src/crimson/os/seastore/transaction.h | 5 + 10 files changed, 777 insertions(+), 1 deletion(-) create mode 100644 src/crimson/os/seastore/backref/btree_backref_manager.cc create mode 100644 src/crimson/os/seastore/backref/btree_backref_manager.h create mode 100644 src/crimson/os/seastore/backref_manager.cc create mode 100644 src/crimson/os/seastore/backref_manager.h diff --git a/src/crimson/os/seastore/CMakeLists.txt b/src/crimson/os/seastore/CMakeLists.txt index 127f4a23ace..cd5b8f94fb9 100644 --- a/src/crimson/os/seastore/CMakeLists.txt +++ b/src/crimson/os/seastore/CMakeLists.txt @@ -9,7 +9,9 @@ set(crimson_seastore_srcs cache.cc lba_manager.cc segment_cleaner.cc + backref_manager.cc backref/backref_tree_node.cc + backref/btree_backref_manager.cc lba_manager/btree/btree_lba_manager.cc lba_manager/btree/lba_btree_node.cc omap_manager.cc diff --git a/src/crimson/os/seastore/backref/backref_tree_node.h b/src/crimson/os/seastore/backref/backref_tree_node.h index 316787830a1..2a29f94068e 100644 --- a/src/crimson/os/seastore/backref/backref_tree_node.h +++ b/src/crimson/os/seastore/backref/backref_tree_node.h @@ -7,6 +7,7 @@ namespace crimson::os::seastore::backref { +using backref_node_meta_t = fixed_kv_node_meta_t; using backref_node_meta_le_t = fixed_kv_node_meta_le_t; constexpr size_t INTERNAL_NODE_CAPACITY = 254; @@ -39,7 +40,8 @@ struct backref_map_val_le_t { extent_types_le_t type = 0; backref_map_val_le_t() = default; - backref_map_val_le_t(const backref_map_val_le_t &val) + backref_map_val_le_t(const backref_map_val_le_t &) = default; + explicit backref_map_val_le_t(const backref_map_val_t &val) : len(init_extent_len_le(val.len)), laddr(val.laddr), type(extent_types_le_t(val.type)) {} diff --git a/src/crimson/os/seastore/backref/btree_backref_manager.cc b/src/crimson/os/seastore/backref/btree_backref_manager.cc new file mode 100644 index 00000000000..d652da030ad --- /dev/null +++ b/src/crimson/os/seastore/backref/btree_backref_manager.cc @@ -0,0 +1,473 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "crimson/os/seastore/backref/btree_backref_manager.h" + +SET_SUBSYS(seastore_backref); + +namespace crimson::os::seastore { + +template<> +Transaction::tree_stats_t& get_tree_stats< + crimson::os::seastore::backref::BackrefBtree>(Transaction &t) { + return t.get_backref_tree_stats(); +} + +template<> +phy_tree_root_t& get_phy_tree_root< + crimson::os::seastore::backref::BackrefBtree>(root_t &r) { + return r.backref_root; +} + +} + +namespace crimson::os::seastore::backref { + +static depth_t get_depth(const CachedExtent &e) +{ + assert(is_backref_node(e.get_type())); + return e.cast()->get_node_meta().depth; +} + +BtreeBackrefManager::mkfs_ret +BtreeBackrefManager::mkfs( + Transaction &t) +{ + LOG_PREFIX(BtreeBackrefManager::mkfs); + INFOT("start", t); + return cache.get_root(t).si_then([this, &t](auto croot) { + croot->get_root().backref_root = BackrefBtree::mkfs(get_context(t)); + return mkfs_iertr::now(); + }).handle_error_interruptible( + mkfs_iertr::pass_further{}, + crimson::ct_error::assert_all{ + "Invalid error in BtreeBackrefManager::mkfs" + } + ); +} + +BtreeBackrefManager::get_mapping_ret +BtreeBackrefManager::get_mapping( + Transaction &t, + paddr_t offset) +{ + LOG_PREFIX(BtreeBackrefManager::get_mapping); + TRACET("{}", t, offset); + auto c = get_context(t); + return with_btree_ret( + cache, + c, + [c, offset](auto &btree) { + return btree.lower_bound( + c, offset + ).si_then([offset, c](auto iter) -> get_mapping_ret { + LOG_PREFIX(BtreeBackrefManager::get_mapping); + if (iter.is_end() || iter.get_key() != offset) { + ERRORT("{} doesn't exist", c.trans, offset); + return crimson::ct_error::enoent::make(); + } else { + TRACET("{} got {}, {}", + c.trans, offset, iter.get_key(), iter.get_val()); + auto e = iter.get_pin(); + return get_mapping_ret( + interruptible::ready_future_marker{}, + std::move(e)); + } + }); + }); +} + +BtreeBackrefManager::get_mappings_ret +BtreeBackrefManager::get_mappings( + Transaction &t, + paddr_t offset, + paddr_t end) +{ + LOG_PREFIX(BtreeBackrefManager::get_mappings); + TRACET("{}~{}", t, offset, end); + auto c = get_context(t); + return with_btree_state( + cache, + c, + [c, offset, end](auto &btree, auto &ret) { + return BackrefBtree::iterate_repeat( + c, + btree.upper_bound_right(c, offset), + [&ret, offset, end, c](auto &pos) { + LOG_PREFIX(BtreeBackrefManager::get_mappings); + if (pos.is_end() || pos.get_key() >= end) { + TRACET("{}~{} done with {} results", + c.trans, offset, end, ret.size()); + return BackrefBtree::iterate_repeat_ret_inner( + interruptible::ready_future_marker{}, + seastar::stop_iteration::yes); + } + TRACET("{}~{} got {}, {}, repeat ...", + c.trans, offset, end, pos.get_key(), pos.get_val()); + ceph_assert((pos.get_key() + pos.get_val().len) > offset); + ret.push_back(pos.get_pin()); + return BackrefBtree::iterate_repeat_ret_inner( + interruptible::ready_future_marker{}, + seastar::stop_iteration::no); + }); + }); +} + +BtreeBackrefManager::new_mapping_ret +BtreeBackrefManager::new_mapping( + Transaction &t, + paddr_t key, + extent_len_t len, + laddr_t addr, + extent_types_t type) +{ + ceph_assert( + is_aligned( + key.as_seg_paddr().get_segment_off(), + (uint64_t)sm_group.get_block_size())); + struct state_t { + paddr_t last_end; + + std::optional insert_iter; + std::optional ret; + + state_t(paddr_t hint) : last_end(hint) {} + }; + + LOG_PREFIX(BtreeBackrefManager::new_mapping); + DEBUGT("{}~{}, paddr={}", t, addr, len, key); + backref_map_val_t val{len, addr, type}; + auto c = get_context(t); + //++stats.num_alloc_extents; + //auto lookup_attempts = stats.num_alloc_extents_iter_nexts; + return crimson::os::seastore::with_btree_state( + cache, + c, + key, + [val, c, key, len, addr, /*lookup_attempts,*/ &t] + (auto &btree, auto &state) { + return BackrefBtree::iterate_repeat( + c, + btree.upper_bound_right(c, key), + [&state, len, addr, &t, key/*, lookup_attempts*/](auto &pos) { + LOG_PREFIX(BtreeBackrefManager::new_mapping); + //++stats.num_alloc_extents_iter_nexts; + if (pos.is_end()) { + DEBUGT("{}~{}, paddr={}, state: end, insert at {}", + t, addr, len, key, + //stats.num_alloc_extents_iter_nexts - lookup_attempts, + state.last_end); + state.insert_iter = pos; + return BackrefBtree::iterate_repeat_ret_inner( + interruptible::ready_future_marker{}, + seastar::stop_iteration::yes); + } else if (pos.get_key() >= (state.last_end.add_offset(len))) { + DEBUGT("{}~{}, paddr={}, state: {}~{}, " + "insert at {} -- {}", + t, addr, len, key, + pos.get_key(), pos.get_val().len, + //stats.num_alloc_extents_iter_nexts - lookup_attempts, + state.last_end, + pos.get_val()); + state.insert_iter = pos; + return BackrefBtree::iterate_repeat_ret_inner( + interruptible::ready_future_marker{}, + seastar::stop_iteration::yes); + } else { + ERRORT("{}~{}, paddr={}, state: {}~{}, repeat ... -- {}", + t, addr, len, key, + pos.get_key(), pos.get_val().len, + pos.get_val()); + ceph_abort("not possible for the backref tree"); + return BackrefBtree::iterate_repeat_ret_inner( + interruptible::ready_future_marker{}, + seastar::stop_iteration::no); + } + }).si_then([c, addr, len, key, &btree, &state, val] { + return btree.insert( + c, + *state.insert_iter, + state.last_end, + val + ).si_then([&state, c, addr, len, key](auto &&p) { + LOG_PREFIX(BtreeBackrefManager::alloc_extent); + auto [iter, inserted] = std::move(p); + TRACET("{}~{}, paddr={}, inserted at {}", + c.trans, addr, len, key, state.last_end); + ceph_assert(inserted); + state.ret = iter; + }); + }); + }).si_then([](auto &&state) { + return state.ret->get_pin(); + }); +} + +BtreeBackrefManager::batch_insert_ret +BtreeBackrefManager::batch_insert_from_cache( + Transaction &t, + const journal_seq_t &limit, + const uint64_t max) +{ + LOG_PREFIX(BtreeBackrefManager::batch_insert_from_cache); + DEBUGT("insert up to {}", t, limit); + return seastar::do_with( + limit, + cache.get_backref_bufs_to_flush().begin(), + JOURNAL_SEQ_NULL, + [this, &t, max](auto &limit, auto &iter, auto &inserted_to) { + return trans_intr::repeat( + [&iter, this, &limit, &t, max, &inserted_to]() + -> batch_insert_iertr::future { + if (iter == cache.get_backref_bufs_to_flush().end()) + return seastar::make_ready_future( + seastar::stop_iteration::yes); + auto &bbr = *iter; + LOG_PREFIX(BtreeBackrefManager::batch_insert_from_cache); + DEBUGT("backref buffer starting seq: {}", t, bbr->backrefs.begin()->first); + if (bbr->backrefs.begin()->first <= limit) { + return batch_insert( + t, + bbr, + limit, + max + ).si_then([max, &iter, &inserted_to, &t](auto new_inserted_to) { + assert(inserted_to == JOURNAL_SEQ_NULL + || new_inserted_to >= inserted_to); + inserted_to = new_inserted_to; + if (t.get_num_fresh_backref() * BACKREF_NODE_SIZE < max) { + iter++; + return seastar::make_ready_future( + seastar::stop_iteration::no); + } else { + return seastar::make_ready_future( + seastar::stop_iteration::yes); + } + }); + } else { + return seastar::make_ready_future( + seastar::stop_iteration::yes); + } + }).si_then([&inserted_to, this, &iter, &limit, &t, max] { + auto &backref_buffer = cache.get_newest_backref_buffer(); + if (iter == cache.get_backref_bufs_to_flush().end() + && backref_buffer) { + return batch_insert( + t, + backref_buffer, + limit, + max + ).si_then([&inserted_to](auto new_inserted_to) { + assert(inserted_to == JOURNAL_SEQ_NULL + || new_inserted_to >= inserted_to); + return seastar::make_ready_future( + std::move(new_inserted_to)); + }); + } + return batch_insert_iertr::make_ready_future( + std::move(inserted_to)); + }); + }); +} + +BtreeBackrefManager::scan_mapped_space_ret +BtreeBackrefManager::scan_mapped_space( + Transaction &t, + BtreeBackrefManager::scan_mapped_space_func_t &&f) +{ + LOG_PREFIX(BtreeBackrefManager::scan_mapped_space); + DEBUGT("start", t); + auto c = get_context(t); + return seastar::do_with( + std::move(f), + [this, c](auto &visitor) { + return with_btree( + cache, + c, + [c, &visitor](auto &btree) { + return BackrefBtree::iterate_repeat( + c, + btree.lower_bound( + c, + paddr_t::make_seg_paddr( + segment_id_t{0, 0}, 0), + &visitor), + [&visitor](auto &pos) { + if (pos.is_end()) { + return BackrefBtree::iterate_repeat_ret_inner( + interruptible::ready_future_marker{}, + seastar::stop_iteration::yes); + } + visitor(pos.get_key(), pos.get_val().len, 0); + return BackrefBtree::iterate_repeat_ret_inner( + interruptible::ready_future_marker{}, + seastar::stop_iteration::no); + }, + &visitor); + }); + }); +} + +BtreeBackrefManager::batch_insert_ret +BtreeBackrefManager::batch_insert( + Transaction &t, + backref_buffer_ref &bbr, + const journal_seq_t &limit, + const uint64_t max) +{ + return seastar::do_with( + bbr->backrefs.begin(), + JOURNAL_SEQ_NULL, + [this, &t, &limit, &bbr, max](auto &iter, auto &inserted_to) { + return trans_intr::repeat( + [&iter, this, &t, &limit, &bbr, max, &inserted_to]() + -> batch_insert_iertr::future { + if (iter == bbr->backrefs.end()) + return seastar::make_ready_future( + seastar::stop_iteration::yes); + auto &seq = iter->first; + auto &backref_list = iter->second; + LOG_PREFIX(BtreeBackrefManager::batch_insert); + DEBUGT("seq {}, limit {}, num_fresh_backref {}" + , t, seq, limit, t.get_num_fresh_backref()); + if (seq <= limit && t.get_num_fresh_backref() * BACKREF_NODE_SIZE < max) { + inserted_to = seq; + return trans_intr::do_for_each( + backref_list, + [this, &t](auto &backref) { + LOG_PREFIX(BtreeBackrefManager::batch_insert); + if (backref->laddr != L_ADDR_NULL) { + DEBUGT("new mapping: {}~{} -> {}", + t, backref->paddr, backref->len, backref->laddr); + return new_mapping( + t, + backref->paddr, + backref->len, + backref->laddr, + backref->type).si_then([](auto &&pin) { + return seastar::now(); + }); + } else { + DEBUGT("remove mapping: {}", t, backref->paddr); + return remove_mapping( + t, + backref->paddr).si_then([](auto&&) { + return seastar::now(); + }).handle_error_interruptible( + crimson::ct_error::input_output_error::pass_further(), + crimson::ct_error::assert_all("no enoent possible") + ); + } + }).si_then([&iter] { + iter++; + return seastar::make_ready_future( + seastar::stop_iteration::no); + }); + } + return seastar::make_ready_future( + seastar::stop_iteration::yes); + }).si_then([&inserted_to] { + return seastar::make_ready_future( + std::move(inserted_to)); + }); + }); +} + +BtreeBackrefManager::base_iertr::future<> _init_cached_extent( + op_context_t c, + const CachedExtentRef &e, + BackrefBtree &btree, + bool &ret) +{ + return btree.init_cached_extent(c, e + ).si_then([&ret](bool is_alive) { + ret = is_alive; + }); +} + +BtreeBackrefManager::init_cached_extent_ret BtreeBackrefManager::init_cached_extent( + Transaction &t, + CachedExtentRef e) +{ + LOG_PREFIX(BtreeBackrefManager::init_cached_extent); + TRACET("{}", t, *e); + return seastar::do_with(bool(), [this, e, &t](bool &ret) { + auto c = get_context(t); + return with_btree(cache, c, [c, e, &ret](auto &btree) + -> base_iertr::future<> { + LOG_PREFIX(BtreeBackrefManager::init_cached_extent); + DEBUGT("extent {}", c.trans, *e); + return _init_cached_extent(c, e, btree, ret); + }).si_then([&ret] { return ret; }); + }); +} + +BtreeBackrefManager::remove_mapping_ret +BtreeBackrefManager::remove_mapping( + Transaction &t, + paddr_t addr) +{ + auto c = get_context(t); + return with_btree_ret( + cache, + c, + [c, addr](auto &btree) mutable { + return btree.lower_bound( + c, addr + ).si_then([&btree, c, addr](auto iter) + -> remove_mapping_ret { + if (iter.is_end() || iter.get_key() != addr) { + LOG_PREFIX(BtreeBackrefManager::remove_mapping); + ERRORT("paddr={} doesn't exist", c.trans, addr); + return crimson::ct_error::enoent::make(); + } + + auto ret = remove_mapping_result_t{ + iter.get_key(), + iter.get_val().len, + iter.get_val().laddr}; + return btree.remove( + c, + iter + ).si_then([ret] { + return ret; + }); + }); + }); +} + +void BtreeBackrefManager::complete_transaction( + Transaction &t, + std::vector &to_clear, + std::vector &to_link) +{ + LOG_PREFIX(BtreeBackrefManager::complete_transaction); + DEBUGT("start", t); + // need to call check_parent from leaf->parent + std::sort( + to_clear.begin(), to_clear.end(), + [](auto &l, auto &r) { return get_depth(*l) < get_depth(*r); }); + + for (auto &e: to_clear) { + auto &pin = e->cast()->pin; + DEBUGT("retiring extent {} -- {}", t, pin, *e); + pin_set.retire(pin); + } + + std::sort( + to_link.begin(), to_link.end(), + [](auto &l, auto &r) -> bool { return get_depth(*l) > get_depth(*r); }); + + for (auto &e : to_link) { + DEBUGT("linking extent -- {}", t, *e); + pin_set.add_pin(e->cast()->pin); + } + + for (auto &e: to_clear) { + auto &pin = e->cast()->pin; + TRACET("checking extent {} -- {}", t, pin, *e); + pin_set.check_parent(pin); + } +} + +} // namespace crimson::os::seastore::backref diff --git a/src/crimson/os/seastore/backref/btree_backref_manager.h b/src/crimson/os/seastore/backref/btree_backref_manager.h new file mode 100644 index 00000000000..1ec9a38308b --- /dev/null +++ b/src/crimson/os/seastore/backref/btree_backref_manager.h @@ -0,0 +1,115 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "crimson/os/seastore/backref_manager.h" +#include "crimson/os/seastore/backref/backref_tree_node.h" +#include "crimson/os/seastore/btree/fixed_kv_btree.h" + +namespace crimson::os::seastore::backref { + +constexpr size_t BACKREF_BLOCK_SIZE = 4096; + +class BtreeBackrefPin : public BtreeNodePin { + extent_types_t type; +public: + BtreeBackrefPin() = default; + BtreeBackrefPin( + CachedExtentRef parent, + backref_map_val_t &val, + backref_node_meta_t &&meta) + : BtreeNodePin( + parent, + val.laddr, + val.len, + std::forward(meta)), + type(val.type) + {} + extent_types_t get_type() const final { + return type; + } +}; + +using BackrefBtree = FixedKVBtree< + paddr_t, backref_map_val_t, BackrefInternalNode, + BackrefLeafNode, BtreeBackrefPin, BACKREF_BLOCK_SIZE>; + +class BtreeBackrefManager : public BackrefManager { +public: + + BtreeBackrefManager( + SegmentManagerGroup &sm_group, + Cache &cache) + : sm_group(sm_group), + cache(cache) + {} + + mkfs_ret mkfs( + Transaction &t) final; + + get_mapping_ret get_mapping( + Transaction &t, + paddr_t offset) final; + + get_mappings_ret get_mappings( + Transaction &t, + paddr_t offset, + paddr_t end) final; + + new_mapping_ret new_mapping( + Transaction &t, + paddr_t key, + extent_len_t len, + laddr_t val, + extent_types_t type) final; + + batch_insert_ret batch_insert( + Transaction &t, + backref_buffer_ref &bbr, + const journal_seq_t &limit, + const uint64_t max) final; + + batch_insert_ret batch_insert_from_cache( + Transaction &t, + const journal_seq_t &limit, + const uint64_t max) final; + + remove_mapping_ret remove_mapping( + Transaction &t, + paddr_t offset) final; + + scan_mapped_space_ret scan_mapped_space( + Transaction &t, + scan_mapped_space_func_t &&f) final; + + init_cached_extent_ret init_cached_extent( + Transaction &t, + CachedExtentRef e) final; + + void complete_transaction( + Transaction &t, + std::vector &, + std::vector &) final; + + void add_pin(BackrefPin &pin) final { + auto *bpin = reinterpret_cast(&pin); + pin_set.add_pin(bpin->get_range_pin()); + bpin->set_parent(nullptr); + } + void remove_pin(BackrefPin &pin) final { + auto *bpin = reinterpret_cast(&pin); + pin_set.retire(bpin->get_range_pin()); + } +private: + SegmentManagerGroup &sm_group; + Cache &cache; + + btree_pin_set_t pin_set; + + op_context_t get_context(Transaction &t) { + return op_context_t{cache, t, &pin_set}; + } +}; + +} // namespace crimson::os::seastore::backref diff --git a/src/crimson/os/seastore/backref_manager.cc b/src/crimson/os/seastore/backref_manager.cc new file mode 100644 index 00000000000..8e195994cdc --- /dev/null +++ b/src/crimson/os/seastore/backref_manager.cc @@ -0,0 +1,20 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/backref_manager.h" +#include "crimson/os/seastore/backref/btree_backref_manager.h" + +namespace crimson::os::seastore { + +BackrefManagerRef create_backref_manager( + SegmentManagerGroup &sm_group, + Cache &cache) +{ + return BackrefManagerRef( + new backref::BtreeBackrefManager( + sm_group, cache)); +} + +} // namespace crimson::os::seastore::backref + diff --git a/src/crimson/os/seastore/backref_manager.h b/src/crimson/os/seastore/backref_manager.h new file mode 100644 index 00000000000..3ebd1064cca --- /dev/null +++ b/src/crimson/os/seastore/backref_manager.h @@ -0,0 +1,147 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "crimson/os/seastore/cache.h" +#include "crimson/os/seastore/cached_extent.h" +#include "crimson/os/seastore/segment_manager_group.h" +#include "crimson/os/seastore/transaction.h" + +namespace crimson::os::seastore { + +/** + * Abstract interface for managing back references that map paddr_t to laddr_t + */ +class BackrefManager { +public: + using base_ertr = crimson::errorator< + crimson::ct_error::input_output_error>; + using base_iertr = trans_iertr; + + using mkfs_iertr = base_iertr; + using mkfs_ret = mkfs_iertr::future<>; + virtual mkfs_ret mkfs( + Transaction &t) = 0; + + /** + * Fetches mappings for paddr_t in range [offset, offset + len) + * + * Future will not resolve until all pins have resolved + */ + using get_mappings_iertr = base_iertr; + using get_mappings_ret = get_mappings_iertr::future; + virtual get_mappings_ret get_mappings( + Transaction &t, + paddr_t offset, + paddr_t end) = 0; + + /** + * Fetches the mapping for paddr_t + * + * Future will not resolve until the pin has resolved + */ + using get_mapping_iertr = base_iertr::extend< + crimson::ct_error::enoent>; + using get_mapping_ret = get_mapping_iertr::future; + virtual get_mapping_ret get_mapping( + Transaction &t, + paddr_t offset) = 0; + + /** + * Insert new paddr_t -> laddr_t mapping + */ + using new_mapping_iertr = base_iertr; + using new_mapping_ret = new_mapping_iertr::future; + virtual new_mapping_ret new_mapping( + Transaction &t, + paddr_t key, + extent_len_t len, + laddr_t val, + extent_types_t type) = 0; + + /** + * Check if a CachedExtent is alive, should be called + * after replay on each cached extent. + * + * @return returns whether the extent is alive + */ + using init_cached_extent_iertr = base_iertr; + using init_cached_extent_ret = init_cached_extent_iertr::future; + virtual init_cached_extent_ret init_cached_extent( + Transaction &t, + CachedExtentRef e) = 0; + + /** + * insert new paddr_t -> laddr_t mappings in batches + */ + using batch_insert_iertr = base_iertr; + using batch_insert_ret = batch_insert_iertr::future; + virtual batch_insert_ret batch_insert( + Transaction &t, ///< Transaction that commits the updates + backref_buffer_ref &bbr, ///< the set of backref mappings to be inserted + const journal_seq_t &limit, ///< the journal seq upper bound that the insertion + // shouldn't cross + const uint64_t max ///< maximum fresh backref extents that can be + // created by this insertion + ) = 0; + + /** + * insert new mappings directly from Cache + */ + virtual batch_insert_ret batch_insert_from_cache( + Transaction &t, + const journal_seq_t &limit, + const uint64_t max) = 0; + + struct remove_mapping_result_t { + paddr_t offset; + extent_len_t len; + laddr_t laddr; + }; + + /** + * delete the mapping for paddr_t offset + */ + using remove_mapping_iertr = base_iertr::extend< + crimson::ct_error::enoent>; + using remove_mapping_ret = remove_mapping_iertr::future; + virtual remove_mapping_ret remove_mapping( + Transaction &t, + paddr_t offset) = 0; + + /** + * scan all extents, including backref extents, logical extents and lba extents, + * visit them with scan_mapped_space_func_t + */ + using scan_mapped_space_iertr = base_iertr::extend_ertr< + SegmentManager::read_ertr>; + using scan_mapped_space_ret = scan_mapped_space_iertr::future<>; + using scan_mapped_space_func_t = std::function< + void(paddr_t, extent_len_t, depth_t)>; + virtual scan_mapped_space_ret scan_mapped_space( + Transaction &t, + scan_mapped_space_func_t &&f) = 0; + + virtual void complete_transaction( + Transaction &t, + std::vector &to_clear, ///< extents whose pins are to be cleared, + // as the results of their retirements + std::vector &to_link ///< fresh extents whose pins are to be inserted + // into backref manager's pin set + ) = 0; + + virtual void add_pin(BackrefPin &pin) = 0; + virtual void remove_pin(BackrefPin &pin) = 0; + + virtual ~BackrefManager() {} +}; + +using BackrefManagerRef = + std::unique_ptr; + +BackrefManagerRef create_backref_manager( + SegmentManagerGroup &sm_group, + Cache &cache); + +} // namespace crimson::os::seastore::backref diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index 5f7664886c1..d2b12cfd9ad 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -615,6 +615,10 @@ public: return backref_remove_set; } + backref_buffer_ref& get_newest_backref_buffer() { + return backref_buffer; + } + std::list& get_backref_bufs_to_flush() { return backref_bufs_to_flush; } diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h index f0b88133d3e..4cb86c49eff 100644 --- a/src/crimson/os/seastore/cached_extent.h +++ b/src/crimson/os/seastore/cached_extent.h @@ -701,6 +701,11 @@ using lba_pin_list_t = std::list; std::ostream &operator<<(std::ostream &out, const lba_pin_list_t &rhs); +using BackrefPin = PhysicalNodePin; +using BackrefPinRef = PhysicalNodePinRef; + +using backref_pin_list_t = std::list; + /** * RetiredExtentPlaceholder * diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index ed87f276677..46b03c523e5 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -1213,6 +1213,7 @@ public: }; using lba_root_t = phy_tree_root_t; +using backref_root_t = phy_tree_root_t; /** * root_t @@ -1225,6 +1226,7 @@ struct __attribute__((packed)) root_t { static constexpr int MAX_META_LENGTH = 1024; + backref_root_t backref_root; lba_root_t lba_root; laddr_le_t onode_root; coll_root_le_t collection_root; @@ -1237,6 +1239,7 @@ struct __attribute__((packed)) root_t { void adjust_addrs_from_base(paddr_t base) { lba_root.adjust_addrs_from_base(base); + backref_root.adjust_addrs_from_base(base); } meta_t get_meta() { diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h index 25ef0eae4c4..a64d9022647 100644 --- a/src/crimson/os/seastore/transaction.h +++ b/src/crimson/os/seastore/transaction.h @@ -298,6 +298,7 @@ public: retired_set.clear(); onode_tree_stats = {}; lba_tree_stats = {}; + backref_tree_stats = {}; ool_write_stats = {}; to_release = NULL_SEG_ID; conflicted = false; @@ -327,6 +328,9 @@ public: tree_stats_t& get_lba_tree_stats() { return lba_tree_stats; } + tree_stats_t& get_backref_tree_stats() { + return backref_tree_stats; + } void add_rbm_alloc_info_blocks(rbm_alloc_delta_t &d) { rbm_alloc_info_blocks.push_back(d); } @@ -417,6 +421,7 @@ private: /// stats to collect when commit or invalidate tree_stats_t onode_tree_stats; tree_stats_t lba_tree_stats; + tree_stats_t backref_tree_stats; ool_write_stats_t ool_write_stats; ///< if != NULL_SEG_ID, release this segment after completion -- 2.39.5