From 34792d876ef2b4439e33ab34c4341b844eb313c9 Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Thu, 10 Mar 2022 10:55:38 +0800 Subject: [PATCH] crimson/os/seastore: lazy backref tree updates for extent (re)placement Buffer all backref updates(allocs/releases) in Cache. Also, organize them into an intrusive set, so later backref query can be done efficiently Backref query should always be first applied to this set and then to the backref manager Signed-off-by: Xuehan Xu --- src/crimson/os/seastore/cache.cc | 76 ++++++- src/crimson/os/seastore/cache.h | 193 ++++++++++++++++++ src/crimson/os/seastore/seastore.cc | 1 + src/crimson/os/seastore/seastore_types.h | 5 + .../os/seastore/transaction_manager.cc | 18 +- src/crimson/os/seastore/transaction_manager.h | 5 +- 6 files changed, 291 insertions(+), 7 deletions(-) diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 5b031457f2c..0cd0e85d56f 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -1166,6 +1166,50 @@ record_t Cache::prepare_record( return record; } +void Cache::backref_batch_update( + std::vector &&list, + const journal_seq_t &seq) +{ + LOG_PREFIX(Cache::backref_batch_update); + DEBUG("inserting {} entries", list.size()); + if (!backref_buffer) { + backref_buffer = std::make_unique(); + } + // backref_buf_entry_t::laddr == L_ADDR_NULL means erase + for (auto &ent : list) { + if (ent->laddr == L_ADDR_NULL) { + auto [it, insert] = backref_remove_set.insert(*ent); +#ifndef NDEBUG + if (!insert) { + ERROR("backref_remove_set already contains {}", ent->paddr); + } +#endif + assert(insert); + } else { +#ifndef NDEBUG + auto r = backref_remove_set.erase(ent->paddr, backref_buf_entry_t::cmp_t()); + if (r) { + ERROR("backref_remove_set contains: {}", ent->paddr); + } + assert(!r); +#endif + auto [it, insert] = backref_inserted_set.insert(*ent); + assert(insert); + } + } + + auto iter = backref_buffer->backrefs.find(seq); + if (iter == backref_buffer->backrefs.end()) { + backref_buffer->backrefs.emplace( + seq, std::move(list)); + } else { + iter->second.insert( + iter->second.end(), + std::make_move_iterator(list.begin()), + std::make_move_iterator(list.end())); + } +} + void Cache::complete_commit( Transaction &t, paddr_t final_block_start, @@ -1176,7 +1220,10 @@ void Cache::complete_commit( SUBTRACET(seastore_t, "final_block_start={}, seq={}", t, final_block_start, seq); - t.for_each_fresh_block([&](auto &i) { + may_roll_backref_buffer(final_block_start); + + std::vector backref_list; + t.for_each_fresh_block([&](const CachedExtentRef &i) { bool is_inline = false; if (i->is_inline()) { is_inline = true; @@ -1201,6 +1248,19 @@ void Cache::complete_commit( ? i->last_rewritten : seastar::lowres_system_clock::time_point()); } + if (is_backref_mapped_extent_node(i)) { + backref_list.emplace_back( + std::make_unique( + i->get_paddr(), + i->is_logical() + ? i->cast()->get_laddr() + : (is_lba_node(i->get_type()) + ? i->cast()->get_node_meta().begin + : L_ADDR_NULL), + i->get_length(), + i->get_type(), + seq)); + } } }); @@ -1240,7 +1300,19 @@ void Cache::complete_commit( last_commit = seq; for (auto &i: t.retired_set) { i->dirty_from_or_retired_at = last_commit; + if (is_backref_mapped_extent_node(i) + || is_retired_placeholder(i->get_type())) { + backref_list.emplace_back( + std::make_unique( + i->get_paddr(), + L_ADDR_NULL, + i->get_length(), + i->get_type(), + seq)); + } } + if (!backref_list.empty()) + backref_batch_update(std::move(backref_list), seq); } void Cache::init() @@ -1291,6 +1363,8 @@ Cache::close_ertr::future<> Cache::close() dirty.erase(i++); intrusive_ptr_release(ptr); } + backref_bufs_to_flush.clear(); + backref_buffer.reset(); assert(stats.dirty_bytes == 0); lru.clear(); return close_ertr::now(); diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index 789e5fa7a85..5f7664886c1 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -22,6 +22,77 @@ namespace crimson::os::seastore { +struct backref_buf_entry_t { + backref_buf_entry_t( + const paddr_t paddr, + const laddr_t laddr, + const extent_len_t len, + const extent_types_t type, + const journal_seq_t seq) + : paddr(paddr), + laddr(laddr), + len(len), + type(type), + seq(seq) + {} + const paddr_t paddr = P_ADDR_NULL; + const laddr_t laddr = L_ADDR_NULL; + const extent_len_t len = 0; + const extent_types_t type = + extent_types_t::ROOT; + const journal_seq_t seq; + friend bool operator< ( + const backref_buf_entry_t &l, + const backref_buf_entry_t &r) { + return l.paddr < r.paddr; + } + friend bool operator> ( + const backref_buf_entry_t &l, + const backref_buf_entry_t &r) { + return l.paddr > r.paddr; + } + friend bool operator== ( + const backref_buf_entry_t &l, + const backref_buf_entry_t &r) { + return l.paddr == r.paddr; + } + using hook_t = + boost::intrusive::set_member_hook< + boost::intrusive::link_mode< + boost::intrusive::auto_unlink>>; + hook_t backref_set_hook; + using backref_set_member_options = boost::intrusive::member_hook< + backref_buf_entry_t, + hook_t, + &backref_buf_entry_t::backref_set_hook>; + using set_t = boost::intrusive::set< + backref_buf_entry_t, + backref_set_member_options, + boost::intrusive::constant_time_size>; + struct cmp_t { + using is_transparent = paddr_t; + bool operator()( + const backref_buf_entry_t &l, + const backref_buf_entry_t &r) const { + return l.paddr < r.paddr; + } + bool operator()(const paddr_t l, const backref_buf_entry_t &r) const { + return l < r.paddr; + } + bool operator()(const backref_buf_entry_t &l, const paddr_t r) const { + return l.paddr < r; + } + }; +}; + +using backref_buf_entry_ref = + std::unique_ptr; + +struct backref_buffer_t { + std::map> backrefs; +}; +using backref_buffer_ref = std::unique_ptr; + /** * Cache * @@ -448,6 +519,12 @@ private: } } + backref_buffer_ref backref_buffer; + std::list backref_bufs_to_flush; + // backrefs that needs to be inserted into the backref tree + backref_buf_entry_t::set_t backref_inserted_set; + backref_buf_entry_t::set_t backref_remove_set; // backrefs needs to be removed + // from the backref tree public: /** * get_extent_by_type @@ -483,6 +560,96 @@ public: t, type, offset, laddr, length, [](CachedExtent &) {}); } + std::set< + backref_buf_entry_t, + backref_buf_entry_t::cmp_t> get_backrefs_in_range( + paddr_t start, + paddr_t end) { + auto start_iter = backref_inserted_set.lower_bound( + start, + backref_buf_entry_t::cmp_t()); + auto end_iter = backref_inserted_set.upper_bound( + end, + backref_buf_entry_t::cmp_t()); + std::set< + backref_buf_entry_t, + backref_buf_entry_t::cmp_t> res; + for (auto it = start_iter; + it != end_iter; + it++) { + res.emplace(it->paddr, it->laddr, it->len, it->type, it->seq); + } + return res; + } + + std::set< + backref_buf_entry_t, + backref_buf_entry_t::cmp_t> get_del_backrefs_in_range( + paddr_t start, + paddr_t end) { + LOG_PREFIX(Cache::get_del_backrefs_in_range); + SUBDEBUG(seastore_cache, "total {} del_backrefs", backref_remove_set.size()); + auto start_iter = backref_remove_set.lower_bound( + start, + backref_buf_entry_t::cmp_t()); + auto end_iter = backref_remove_set.upper_bound( + end, + backref_buf_entry_t::cmp_t()); + std::set< + backref_buf_entry_t, + backref_buf_entry_t::cmp_t> res; + for (auto it = start_iter; + it != end_iter; + it++) { + res.emplace(it->paddr, it->laddr, it->len, it->type, it->seq); + } + SUBDEBUG(seastore_cache, "{} del_backrefs in range", res.size()); + return res; + } + + const backref_buf_entry_t::set_t& get_backrefs() { + return backref_inserted_set; + } + + const backref_buf_entry_t::set_t& get_del_backrefs() { + return backref_remove_set; + } + + std::list& get_backref_bufs_to_flush() { + return backref_bufs_to_flush; + } + + void trim_backref_bufs(const journal_seq_t &trim_to) { + LOG_PREFIX(Cache::trim_backref_bufs); + SUBDEBUG(seastore_cache, "trimming to {}", trim_to); + auto &backref_bufs = get_backref_bufs_to_flush(); + for (auto iter = backref_bufs.begin(); + iter != backref_bufs.end();) { + auto &backref_buf = *iter; + assert(backref_buf); + if (!backref_buf->backrefs.empty() + && backref_buf->backrefs.rbegin()->first > trim_to) { + auto iter2 = backref_buf->backrefs.upper_bound(trim_to); + SUBDEBUG(seastore_cache, "trim backref up to {}", iter2->first); + backref_buf->backrefs.erase( + backref_buf->backrefs.begin(), iter2); + break; + } else { + if (!backref_buf->backrefs.empty()) { + SUBDEBUG(seastore_cache, "trim backref buf {}", + backref_buf->backrefs.rbegin()->first); + } + iter = backref_bufs.erase(iter); + } + } + if (backref_bufs.empty() && backref_buffer) { + assert(backref_buffer->backrefs.rbegin()->first >= trim_to); + auto iter = backref_buffer->backrefs.upper_bound(trim_to); + SUBDEBUG(seastore_cache, "trim backref buffer up to {}", iter->first); + backref_buffer->backrefs.erase( + backref_buffer->backrefs.begin(), iter); + } + } /** * alloc_new_extent @@ -738,6 +905,28 @@ public: /// Dump live extents void dump_contents(); + void may_roll_backref_buffer( + const paddr_t &final_block_start, + bool force_roll = false) { + if (force_roll) { + if (backref_buffer) { + backref_bufs_to_flush.emplace_back(std::move(backref_buffer)); + } + return; + } + if (backref_buffer && !backref_buffer->backrefs.empty()) { + auto &[seq, backref_list] = *backref_buffer->backrefs.rbegin(); + if (backref_list.empty()) + return; + auto &last_seg_paddr = seq.offset.as_seg_paddr(); + if (last_seg_paddr.get_segment_id() != + final_block_start.as_seg_paddr().get_segment_id()) { + // journal segment rolled + backref_bufs_to_flush.emplace_back(std::move(backref_buffer)); + } + } + } + private: ExtentPlacementManager& epm; RootBlockRef root; ///< ref to current root @@ -982,6 +1171,10 @@ private: } } + void backref_batch_update( + std::vector &&, + const journal_seq_t &); + /// Add extent to extents handling dirty and refcounting void add_extent(CachedExtentRef ref); diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index 916fe7a40df..41fbcd37651 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -22,6 +22,7 @@ #include "crimson/os/futurized_collection.h" +#include "crimson/os/seastore/backref_manager.h" #include "crimson/os/seastore/segment_cleaner.h" #include "crimson/os/seastore/collection_manager/flat_collection_manager.h" #include "crimson/os/seastore/onode_manager/staged-fltree/fltree_onode_manager.h" diff --git a/src/crimson/os/seastore/seastore_types.h b/src/crimson/os/seastore/seastore_types.h index c62a6398022..ed87f276677 100644 --- a/src/crimson/os/seastore/seastore_types.h +++ b/src/crimson/os/seastore/seastore_types.h @@ -886,6 +886,11 @@ constexpr bool is_logical_type(extent_types_t type) { } } +constexpr bool is_retired_placeholder(extent_types_t type) +{ + return type == extent_types_t::RETIRED_PLACEHOLDER; +} + constexpr bool is_lba_node(extent_types_t type) { return type == extent_types_t::LADDR_INTERNAL || diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 7a098e79969..795dc29e572 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -7,6 +7,7 @@ #include "crimson/os/seastore/logging.h" #include "crimson/os/seastore/transaction_manager.h" #include "crimson/os/seastore/journal.h" +#include "crimson/os/seastore/lba_manager/btree/lba_btree_node.h" /* * TransactionManager logs @@ -26,12 +27,14 @@ TransactionManager::TransactionManager( JournalRef _journal, CacheRef _cache, LBAManagerRef _lba_manager, - ExtentPlacementManagerRef &&epm) + ExtentPlacementManagerRef &&epm, + BackrefManagerRef&& backref_manager) : segment_cleaner(std::move(_segment_cleaner)), cache(std::move(_cache)), lba_manager(std::move(_lba_manager)), journal(std::move(_journal)), epm(std::move(epm)), + backref_manager(std::move(backref_manager)), sm_group(*segment_cleaner->get_segment_manager_group()) { segment_cleaner->set_extent_callback(this); @@ -58,6 +61,8 @@ TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs() return cache->mkfs(t ).si_then([this, &t] { return lba_manager->mkfs(t); + }).si_then([this, &t] { + return backref_manager->mkfs(t); }).si_then([this, FNAME, &t] { INFOT("submitting mkfs transaction", t); return submit_transaction_direct(t); @@ -540,25 +545,28 @@ TransactionManager::~TransactionManager() {} TransactionManagerRef make_transaction_manager(bool detailed) { + auto epm = std::make_unique(); + auto cache = std::make_unique(*epm); + auto lba_manager = lba_manager::create_lba_manager(*cache); auto sms = std::make_unique(); + auto backref_manager = create_backref_manager(*sms, *cache); auto segment_cleaner = std::make_unique( SegmentCleaner::config_t::get_default(), std::move(sms), + *backref_manager, detailed); auto journal = journal::make_segmented(*segment_cleaner); - auto epm = std::make_unique(); epm->init_ool_writers( *segment_cleaner, segment_cleaner->get_ool_segment_seq_allocator()); - auto cache = std::make_unique(*epm); - auto lba_manager = lba_manager::create_lba_manager(*cache); return std::make_unique( std::move(segment_cleaner), std::move(journal), std::move(cache), std::move(lba_manager), - std::move(epm)); + std::move(epm), + std::move(backref_manager)); } } diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index a2db409775d..fd7c743e26f 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -25,6 +25,7 @@ #include "crimson/os/seastore/seastore_types.h" #include "crimson/os/seastore/cache.h" #include "crimson/os/seastore/lba_manager.h" +#include "crimson/os/seastore/backref_manager.h" #include "crimson/os/seastore/journal.h" #include "crimson/os/seastore/extent_placement_manager.h" #include "crimson/os/seastore/device.h" @@ -69,7 +70,8 @@ public: JournalRef journal, CacheRef cache, LBAManagerRef lba_manager, - ExtentPlacementManagerRef &&epm); + ExtentPlacementManagerRef &&epm, + BackrefManagerRef&& backref_manager); /// Writes initial metadata to disk using mkfs_ertr = base_ertr; @@ -555,6 +557,7 @@ private: LBAManagerRef lba_manager; JournalRef journal; ExtentPlacementManagerRef epm; + BackrefManagerRef backref_manager; SegmentManagerGroup &sm_group; WritePipeline write_pipeline; -- 2.39.5