From: Xuehan Xu Date: Tue, 15 Mar 2022 12:44:30 +0000 (+0800) Subject: crimson/os/seastore/segment_cleaner: trim journal and reclaim space with the help... X-Git-Tag: v18.0.0~915^2~7 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=17669171f6157b6922d4f01881312e1a72c11a93;p=ceph-ci.git crimson/os/seastore/segment_cleaner: trim journal and reclaim space with the help of backref Signed-off-by: Xuehan Xu --- diff --git a/src/crimson/os/seastore/backref/btree_backref_manager.cc b/src/crimson/os/seastore/backref/btree_backref_manager.cc index d652da030ad..18ee1a6a628 100644 --- a/src/crimson/os/seastore/backref/btree_backref_manager.cc +++ b/src/crimson/os/seastore/backref/btree_backref_manager.cc @@ -402,6 +402,28 @@ BtreeBackrefManager::init_cached_extent_ret BtreeBackrefManager::init_cached_ext }); } +BtreeBackrefManager::rewrite_extent_ret +BtreeBackrefManager::rewrite_extent( + Transaction &t, + CachedExtentRef extent) +{ + LOG_PREFIX(BtreeBackrefManager::rewrite_extent); + auto updated = cache.update_extent_from_transaction(t, extent); + if (!updated) { + DEBUGT("extent is already retired, skipping -- {}", t, *extent); + return rewrite_extent_iertr::now(); + } + extent = updated; + + auto c = get_context(t); + return with_btree( + cache, + c, + [c, extent](auto &btree) mutable { + return btree.rewrite_extent(c, extent); + }); +} + BtreeBackrefManager::remove_mapping_ret BtreeBackrefManager::remove_mapping( Transaction &t, diff --git a/src/crimson/os/seastore/backref/btree_backref_manager.h b/src/crimson/os/seastore/backref/btree_backref_manager.h index 1ec9a38308b..fb0dd50ab0f 100644 --- a/src/crimson/os/seastore/backref/btree_backref_manager.h +++ b/src/crimson/os/seastore/backref/btree_backref_manager.h @@ -92,6 +92,10 @@ public: std::vector &, std::vector &) final; + rewrite_extent_ret rewrite_extent( + Transaction &t, + CachedExtentRef extent) final; + void add_pin(BackrefPin &pin) final { auto *bpin = reinterpret_cast(&pin); pin_set.add_pin(bpin->get_range_pin()); diff --git a/src/crimson/os/seastore/backref_manager.h b/src/crimson/os/seastore/backref_manager.h index 3ebd1064cca..8afa21ef79e 100644 --- a/src/crimson/os/seastore/backref_manager.h +++ b/src/crimson/os/seastore/backref_manager.h @@ -48,6 +48,17 @@ public: Transaction &t, paddr_t offset) = 0; + /** + * rewrite_extent + * + * rewrite extent into passed transaction + */ + using rewrite_extent_iertr = base_iertr; + using rewrite_extent_ret = rewrite_extent_iertr::future<>; + virtual rewrite_extent_ret rewrite_extent( + Transaction &t, + CachedExtentRef extent) = 0; + /** * Insert new paddr_t -> laddr_t mapping */ diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index f36d4c9d31d..002d948da6e 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -1037,8 +1037,9 @@ record_t Cache::prepare_record( retire_stat.increment(i->get_length()); DEBUGT("retired and remove extent -- {}", t, *i); commit_retire_extent(t, i); - if (is_backref_mapped_extent_node(i) - || is_retired_placeholder(i->get_type())) { + if ((is_backref_mapped_extent_node(i) + || is_retired_placeholder(i->get_type())) + && t.should_record_release(i->get_paddr())) { rel_delta.alloc_blk_ranges.emplace_back( i->get_paddr(), L_ADDR_NULL, @@ -1334,13 +1335,15 @@ void Cache::complete_commit( i->dirty_from_or_retired_at = last_commit; if (is_backref_mapped_extent_node(i) || is_retired_placeholder(i->get_type())) { - backref_list.emplace_back( - std::make_unique( - i->get_paddr(), - L_ADDR_NULL, - i->get_length(), - i->get_type(), - seq)); + if (t.should_record_release(i->get_paddr())) { + backref_list.emplace_back( + std::make_unique( + i->get_paddr(), + L_ADDR_NULL, + i->get_length(), + i->get_type(), + seq)); + } } else if (is_backref_node(i->get_type())) { remove_backref_extent(i->get_paddr()); } else { diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index aa30c061d64..7270a4fe379 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -614,6 +614,13 @@ public: return res; } + backref_buf_entry_t get_del_backref( + paddr_t addr) { + auto it = backref_remove_set.find(addr, backref_buf_entry_t::cmp_t()); + assert(it != backref_remove_set.end()); + return *it; + } + const backref_buf_entry_t::set_t& get_backrefs() { return backref_inserted_set; } diff --git a/src/crimson/os/seastore/segment_cleaner.cc b/src/crimson/os/seastore/segment_cleaner.cc index 2f6ecf26b75..a5c11fb76a3 100644 --- a/src/crimson/os/seastore/segment_cleaner.cc +++ b/src/crimson/os/seastore/segment_cleaner.cc @@ -377,11 +377,13 @@ SegmentCleaner::SegmentCleaner( config_t config, SegmentManagerGroupRef&& sm_group, BackrefManager &backref_manager, + Cache &cache, bool detailed) : detailed(detailed), config(config), sm_group(std::move(sm_group)), backref_manager(backref_manager), + cache(cache), ool_segment_seq_allocator( new SegmentSeqAllocator(segment_type_t::OOL)), gc_process(*this) @@ -527,30 +529,37 @@ void SegmentCleaner::close_segment(segment_id_t segment) get_projected_reclaim_ratio()); } +SegmentCleaner::trim_backrefs_ret SegmentCleaner::trim_backrefs( + Transaction &t, + journal_seq_t limit) +{ + return backref_manager.batch_insert_from_cache( + t, + limit, + config.journal_rewrite_backref_per_cycle + ); +} + SegmentCleaner::rewrite_dirty_ret SegmentCleaner::rewrite_dirty( Transaction &t, journal_seq_t limit) { - LOG_PREFIX(SegmentCleaner::rewrite_dirty); return ecb->get_next_dirty_extents( t, limit, - config.journal_rewrite_per_cycle + config.journal_rewrite_dirty_per_cycle ).si_then([=, &t](auto dirty_list) { + LOG_PREFIX(SegmentCleaner::rewrite_dirty); DEBUGT("rewrite {} dirty extents", t, dirty_list.size()); return seastar::do_with( std::move(dirty_list), - [FNAME, this, &t](auto &dirty_list) { - return backref_manager.batch_insert_from_cache( - t, - dirty_list.back()->get_dirty_from() - ).si_then([FNAME, this, &t, &dirty_list] { - return trans_intr::do_for_each( - dirty_list, - [FNAME, this, &t](auto &e) { - DEBUGT("cleaning {}", t, *e); - return ecb->rewrite_extent(t, e); - }); + [this, &t](auto &dirty_list) { + return trans_intr::do_for_each( + dirty_list, + [this, &t](auto &e) { + LOG_PREFIX(SegmentCleaner::rewrite_dirty); + DEBUGT("cleaning {}", t, *e); + return ecb->rewrite_extent(t, e); }); }); }); @@ -597,130 +606,263 @@ SegmentCleaner::gc_cycle_ret SegmentCleaner::do_gc_cycle() SegmentCleaner::gc_trim_journal_ret SegmentCleaner::gc_trim_journal() { - return repeat_eagain([this] { - return ecb->with_transaction_intr( - Transaction::src_t::CLEANER_TRIM, - "trim_journal", - [this](auto& t) - { - return rewrite_dirty(t, get_dirty_tail() - ).si_then([this, &t] { - return ecb->submit_transaction_direct(t); + return ecb->with_transaction_intr( + Transaction::src_t::TRIM_BACKREF, + "trim_backref", + [this](auto &t) { + return seastar::do_with( + get_dirty_tail(), + [this, &t](auto &limit) { + return trim_backrefs(t, limit).si_then( + [this, &t, &limit](auto trim_backrefs_to) + -> ExtentCallbackInterface::submit_transaction_direct_iertr::future< + journal_seq_t> { + if (trim_backrefs_to != JOURNAL_SEQ_NULL) { + return ecb->submit_transaction_direct( + t, std::make_optional(trim_backrefs_to) + ).si_then([trim_backrefs_to=std::move(trim_backrefs_to)]() mutable { + return seastar::make_ready_future< + journal_seq_t>(std::move(trim_backrefs_to)); + }); + } + return seastar::make_ready_future(std::move(limit)); + }); + }); + }).handle_error( + crimson::ct_error::eagain::handle([](auto) { + ceph_abort("unexpected eagain"); + }), + crimson::ct_error::pass_further_all() + ).safe_then([this](auto seq) { + return repeat_eagain([this, seq=std::move(seq)]() mutable { + return ecb->with_transaction_intr( + Transaction::src_t::CLEANER_TRIM, + "trim_journal", + [this, seq=std::move(seq)](auto& t) + { + return rewrite_dirty(t, seq + ).si_then([this, &t] { + return ecb->submit_transaction_direct(t); + }); + }); + }); + }); +} + +SegmentCleaner::retrieve_backref_extents_ret +SegmentCleaner::_retrieve_backref_extents( + Transaction &t, + std::set< + Cache::backref_extent_buf_entry_t, + Cache::backref_extent_buf_entry_t::cmp_t> &&backref_extents, + std::vector &extents) +{ + return trans_intr::parallel_for_each( + backref_extents, + [this, &extents, &t](auto &ent) { + // only the gc fiber which is single can rewrite backref extents, + // so it must be alive + assert(is_backref_node(ent.type)); + LOG_PREFIX(SegmentCleaner::_retrieve_backref_extents); + DEBUGT("getting backref extent of type {} at {}", + t, + ent.type, + ent.paddr); + return cache.get_extent_by_type( + t, ent.type, ent.paddr, L_ADDR_NULL, BACKREF_NODE_SIZE + ).si_then([&extents](auto ext) { + extents.emplace_back(std::move(ext)); + }); + }); +} + +SegmentCleaner::retrieve_live_extents_ret +SegmentCleaner::_retrieve_live_extents( + Transaction &t, + std::set< + backref_buf_entry_t, + backref_buf_entry_t::cmp_t> &&backrefs, + std::vector &extents) +{ + return seastar::do_with( + JOURNAL_SEQ_NULL, + std::move(backrefs), + [this, &t, &extents](auto &seq, auto &backrefs) { + return trans_intr::do_for_each( + backrefs, + [this, &extents, &t, &seq](auto &ent) { + LOG_PREFIX(SegmentCleaner::gc_reclaim_space); + DEBUGT("getting extent of type {} at {}~{}", + t, + ent.type, + ent.paddr, + ent.len); + return ecb->get_extent_if_live( + t, ent.type, ent.paddr, ent.laddr, ent.len + ).si_then([this, &extents, &ent, &seq](auto ext) { + if (!ext) { + logger().debug( + "SegmentCleaner::gc_reclaim_space:" + " addr {} dead, skipping", + ent.paddr); + auto backref = cache.get_del_backref(ent.paddr); + if (seq == JOURNAL_SEQ_NULL || seq < backref.seq) { + seq = backref.seq; + } + } else { + extents.emplace_back(std::move(ext)); + } + return ExtentCallbackInterface::rewrite_extent_iertr::now(); }); + }).si_then([&seq] { + return retrieve_live_extents_iertr::make_ready_future< + journal_seq_t>(std::move(seq)); }); }); } SegmentCleaner::gc_reclaim_space_ret SegmentCleaner::gc_reclaim_space() { - if (!scan_cursor) { + if (!next_reclaim_pos) { journal_seq_t next = get_next_gc_target(); - if (next == JOURNAL_SEQ_NULL) { - logger().debug( - "SegmentCleaner::do_gc: no segments to gc"); - return seastar::now(); - } - scan_cursor = - std::make_unique( - next); - logger().debug( - "SegmentCleaner::do_gc: starting gc on segment {}", - scan_cursor->seq); + next_reclaim_pos = std::make_optional(next.offset); + } + LOG_PREFIX(SegmentCleaner::gc_reclaim_space); + INFO("cleaning {}", *next_reclaim_pos); + auto &seg_paddr = next_reclaim_pos->as_seg_paddr(); + paddr_t end_paddr; + auto segment_id = seg_paddr.get_segment_id(); + if (final_reclaim()) { + segment_id_t next_segment_id{ + segment_id.device_id(), + segment_id.device_segment_id() + 1}; + end_paddr = paddr_t::make_seg_paddr(next_segment_id, 0); } else { - ceph_assert(!scan_cursor->is_complete()); + end_paddr = seg_paddr + config.reclaim_bytes_stride; } - return sm_group->scan_extents( - *scan_cursor, - config.reclaim_bytes_stride - ).safe_then([this](auto &&_extents) { - return seastar::do_with( - std::move(_extents), - (size_t)0, - [this](auto &extents, auto &reclaimed) { - return repeat_eagain([this, &extents, &reclaimed]() mutable { - reclaimed = 0; - logger().debug( - "SegmentCleaner::gc_reclaim_space: processing {} extents", - extents.size()); - return ecb->with_transaction_intr( - Transaction::src_t::CLEANER_RECLAIM, - "reclaim_space", - [this, &extents, &reclaimed](auto& t) - { - return trans_intr::do_for_each( - extents, - [this, &t, &reclaimed](auto &extent) { - auto &addr = extent.first; - auto commit_time = extent.second.first.commit_time; - auto commit_type = extent.second.first.commit_type; - auto &info = extent.second.second; - logger().debug( - "SegmentCleaner::gc_reclaim_space: checking extent {}", - info); - return ecb->get_extent_if_live( - t, - info.type, - addr, - info.addr, - info.len - ).si_then([&info, commit_type, commit_time, addr=addr, &t, this, &reclaimed] - (CachedExtentRef ext) { - if (!ext) { - logger().debug( - "SegmentCleaner::gc_reclaim_space: addr {} dead, skipping", - addr); - return ExtentCallbackInterface::rewrite_extent_iertr::now(); - } else { - logger().debug( - "SegmentCleaner::gc_reclaim_space: addr {} alive, gc'ing {}", - addr, - *ext); - assert(commit_time); - assert(info.last_modified); - assert(commit_type == record_commit_type_t::MODIFY - || commit_type == record_commit_type_t::REWRITE); - if (ext->get_last_modified() == time_point()) { - assert(ext->get_last_rewritten() == time_point()); - ext->set_last_modified(duration(info.last_modified)); - } - if (commit_type == record_commit_type_t::REWRITE - && ext->get_last_rewritten() == time_point()) { - ext->set_last_rewritten(duration(commit_time)); - } + double pavail_ratio = get_projected_available_ratio(); + seastar::lowres_system_clock::time_point start = seastar::lowres_system_clock::now(); - assert( - (commit_type == record_commit_type_t::MODIFY - && commit_time <= - ext->get_last_modified().time_since_epoch().count()) - || (commit_type == record_commit_type_t::REWRITE - && commit_time == - ext->get_last_rewritten().time_since_epoch().count())); - - reclaimed += ext->get_length(); - return ecb->rewrite_extent( - t, - ext); - } - }); - }).si_then([this, &t] { - if (scan_cursor->is_complete()) { - t.mark_segment_to_release(scan_cursor->get_segment_id()); - } - return ecb->submit_transaction_direct(t); - }); - }); - }).safe_then([&reclaimed] { - return seastar::make_ready_future(reclaimed); + return seastar::do_with( + (size_t)0, + (size_t)0, + [this, segment_id, pavail_ratio, start, end_paddr]( + auto &reclaimed, + auto &runs) { + return repeat_eagain( + [this, &reclaimed, segment_id, &runs, end_paddr]() mutable { + reclaimed = 0; + runs++; + return seastar::do_with( + cache.get_backref_extents_in_range( + *next_reclaim_pos, end_paddr), + cache.get_backrefs_in_range(*next_reclaim_pos, end_paddr), + cache.get_del_backrefs_in_range( + *next_reclaim_pos, end_paddr), + JOURNAL_SEQ_NULL, + [this, segment_id, &reclaimed, end_paddr] + (auto &backref_extents, auto &backrefs, auto &del_backrefs, auto &seq) { + return ecb->with_transaction_intr( + Transaction::src_t::CLEANER_RECLAIM, + "reclaim_space", + [segment_id, this, &backref_extents, &backrefs, &seq, + &del_backrefs, &reclaimed, end_paddr](auto &t) { + return backref_manager.get_mappings( + t, *next_reclaim_pos, end_paddr + ).si_then( + [segment_id, this, &backref_extents, &backrefs, &seq, + &del_backrefs, &reclaimed, &t](auto pin_list) { + LOG_PREFIX(SegmentCleaner::gc_reclaim_space); + DEBUG("{} backrefs, {} del_backrefs, {} pins", + backrefs.size(), del_backrefs.size(), pin_list.size()); + for (auto &br : backrefs) { + if (seq == JOURNAL_SEQ_NULL + || (br.seq != JOURNAL_SEQ_NULL && br.seq > seq)) + seq = br.seq; + } + for (auto &pin : pin_list) { + backrefs.emplace( + pin->get_key(), + pin->get_val(), + pin->get_length(), + pin->get_type(), + journal_seq_t()); + } + for (auto &del_backref : del_backrefs) { + INFO("del_backref {}~{} {} {}", + del_backref.paddr, del_backref.len, del_backref.type, del_backref.seq); + auto it = backrefs.find(del_backref.paddr); + if (it != backrefs.end()) + backrefs.erase(it); + if (seq == JOURNAL_SEQ_NULL + || (del_backref.seq != JOURNAL_SEQ_NULL && del_backref.seq > seq)) + seq = del_backref.seq; + } + return seastar::do_with( + std::vector(), + [this, &backref_extents, &backrefs, &reclaimed, &t, &seq] + (auto &extents) { + return _retrieve_backref_extents( + t, std::move(backref_extents), extents + ).si_then([this, &extents, &t, &backrefs] { + return _retrieve_live_extents( + t, std::move(backrefs), extents); + }).si_then([this, &seq, &t](auto nseq) { + if (nseq != JOURNAL_SEQ_NULL && nseq > seq) + seq = nseq; + auto fut = BackrefManager::batch_insert_iertr::now(); + if (seq != JOURNAL_SEQ_NULL) { + fut = backref_manager.batch_insert_from_cache( + t, seq, std::numeric_limits::max() + ).si_then([](auto) { + return BackrefManager::batch_insert_iertr::now(); + }); + } + return fut; + }).si_then([&extents, this, &t, &reclaimed] { + return trans_intr::do_for_each( + extents, + [this, &t, &reclaimed](auto &ext) { + reclaimed += ext->get_length(); + return ecb->rewrite_extent(t, ext); + }); + }); + }).si_then([this, &t, segment_id, &seq] { + if (final_reclaim()) + t.mark_segment_to_release(segment_id); + return ecb->submit_transaction_direct( + t, std::make_optional(std::move(seq))); + }); + }); + }); }); + }).safe_then( + [&reclaimed, this, pavail_ratio, start, &runs, end_paddr] { + LOG_PREFIX(SegmentCleaner::gc_reclaim_space); +#ifndef NDEBUG + auto ndel_backrefs = cache.get_del_backrefs_in_range( + *next_reclaim_pos, end_paddr); + if (!ndel_backrefs.empty()) { + for (auto &del_br : ndel_backrefs) { + ERROR("unexpected del_backref {}~{} {} {}", + del_br.paddr, del_br.len, del_br.type, del_br.seq); + } + ceph_abort("impossible"); + } +#endif + stats.reclaiming_bytes += reclaimed; + auto d = seastar::lowres_system_clock::now() - start; + INFO("duration: {}, pavail_ratio before: {}, repeats: {}", d, pavail_ratio, runs); + if (final_reclaim()) { + stats.reclaim_rewrite_bytes += stats.reclaiming_bytes; + stats.reclaiming_bytes = 0; + next_reclaim_pos.reset(); + } else { + next_reclaim_pos = + paddr_t(*next_reclaim_pos + config.reclaim_bytes_stride); + } }); - }).safe_then([this](size_t reclaimed) { - stats.reclaiming_bytes += reclaimed; - if (scan_cursor->is_complete()) { - stats.reclaim_rewrite_bytes += stats.reclaiming_bytes; - stats.reclaiming_bytes = 0; - scan_cursor.reset(); - } }); } diff --git a/src/crimson/os/seastore/segment_cleaner.h b/src/crimson/os/seastore/segment_cleaner.h index b189af533b2..f04073faa77 100644 --- a/src/crimson/os/seastore/segment_cleaner.h +++ b/src/crimson/os/seastore/segment_cleaner.h @@ -433,7 +433,9 @@ public: size_t reclaim_bytes_stride = 0; /// Number of bytes of journal entries to rewrite per cycle - size_t journal_rewrite_per_cycle = 0; + size_t journal_rewrite_dirty_per_cycle = 0; + + size_t journal_rewrite_backref_per_cycle = 0; static config_t get_default() { return config_t{ @@ -444,7 +446,8 @@ public: .6, // reclaim_ratio_gc_threshhold .2, // available_ratio_hard_limit 1<<25,// reclaim 64MB per gc cycle - 1<<25 // rewrite 64MB of journal entries per gc cycle + 1<<25,// rewrite 64MB of journal entries per gc cycle + 1<<24 // create 16MB of backref extents per gc cycle }; } }; @@ -542,7 +545,8 @@ public: using submit_transaction_direct_ret = submit_transaction_direct_iertr::future<>; virtual submit_transaction_direct_ret submit_transaction_direct( - Transaction &t) = 0; + Transaction &t, + std::optional seq_to_trim = std::nullopt) = 0; }; private: @@ -551,6 +555,7 @@ private: SegmentManagerGroupRef sm_group; BackrefManager &backref_manager; + Cache &cache; SpaceTrackerIRef space_tracker; segments_info_t segments; @@ -596,6 +601,7 @@ public: config_t config, SegmentManagerGroupRef&& sm_group, BackrefManager &backref_manager, + Cache &cache, bool detailed = false); SegmentSeqAllocator& get_ool_segment_seq_allocator() { @@ -810,6 +816,12 @@ private: Transaction &t, journal_seq_t limit); + using trim_backrefs_iertr = work_iertr; + using trim_backrefs_ret = trim_backrefs_iertr::future; + trim_backrefs_ret trim_backrefs( + Transaction &t, + journal_seq_t limit); + journal_seq_t get_dirty_tail() const { auto ret = journal_head; ret.segment_seq -= std::min( @@ -827,9 +839,12 @@ private: } // GC status helpers - std::unique_ptr< - SegmentManagerGroup::scan_extents_cursor - > scan_cursor; + std::optional next_reclaim_pos; + + bool final_reclaim() { + return next_reclaim_pos->as_seg_paddr().get_segment_off() + + config.reclaim_bytes_stride >= (size_t)segments.get_segment_size(); + } /** * GCProcess @@ -919,6 +934,26 @@ private: using gc_reclaim_space_ret = gc_reclaim_space_ertr::future<>; gc_reclaim_space_ret gc_reclaim_space(); + using retrieve_backref_extents_iertr = work_iertr; + using retrieve_backref_extents_ret = + retrieve_backref_extents_iertr::future<>; + retrieve_backref_extents_ret _retrieve_backref_extents( + Transaction &t, + std::set< + Cache::backref_extent_buf_entry_t, + Cache::backref_extent_buf_entry_t::cmp_t> &&backref_extents, + std::vector &extents); + + using retrieve_live_extents_iertr = work_iertr; + using retrieve_live_extents_ret = + retrieve_live_extents_iertr::future; + retrieve_live_extents_ret _retrieve_live_extents( + Transaction &t, + std::set< + backref_buf_entry_t, + backref_buf_entry_t::cmp_t> &&backrefs, + std::vector &extents); + size_t get_bytes_used_current_segment() const { auto& seg_addr = journal_head.offset.as_seg_paddr(); return seg_addr.get_segment_off(); @@ -929,18 +964,6 @@ private: return segment_size - get_bytes_used_current_segment(); } - /** - * get_bytes_scanned_current_segment - * - * Returns the number of bytes from the current gc segment that - * have been scanned. - */ - size_t get_bytes_scanned_current_segment() const { - if (!scan_cursor) - return 0; - return scan_cursor->get_segment_offset(); - } - /// Returns free space available for writes size_t get_available_bytes() const { return segments.get_available_bytes(); diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h index 19d0e7e2135..17479795df1 100644 --- a/src/crimson/os/seastore/transaction.h +++ b/src/crimson/os/seastore/transaction.h @@ -132,6 +132,12 @@ public: } fresh_block_stats.increment(ref->get_length()); write_set.insert(*ref); + if (is_backref_node(ref->get_type())) + fresh_backref_extents++; + } + + uint64_t get_num_fresh_backref() const { + return fresh_backref_extents; } void mark_delayed_extent_inline(LogicalCachedExtentRef& ref) { @@ -212,6 +218,19 @@ public: return retired_set; } + bool should_record_release(paddr_t addr) { + auto count = no_release_delta_retired_set.count(addr); +#ifndef NDEBUG + if (count) + assert(retired_set.count(addr)); +#endif + return count == 0; + } + + void dont_record_release(CachedExtentRef ref) { + no_release_delta_retired_set.insert(ref); + } + template auto for_each_fresh_block(F &&f) const { std::for_each(ool_block_list.begin(), ool_block_list.end(), f); @@ -232,6 +251,7 @@ public: MUTATE = 0, READ, // including weak and non-weak read transactions CLEANER_TRIM, + TRIM_BACKREF, CLEANER_RECLAIM, MAX }; @@ -288,6 +308,7 @@ public: offset = 0; delayed_temp_offset = 0; read_set.clear(); + fresh_backref_extents = 0; invalidate_clear_write_set(); mutated_block_list.clear(); fresh_block_stats = {}; @@ -296,6 +317,7 @@ public: inline_block_list.clear(); ool_block_list.clear(); retired_set.clear(); + no_release_delta_retired_set.clear(); onode_tree_stats = {}; lba_tree_stats = {}; backref_tree_stats = {}; @@ -375,6 +397,8 @@ private: */ read_set_t read_set; ///< set of extents read by paddr + uint64_t fresh_backref_extents = 0; // counter of new backref extents + /** * write_set * @@ -407,6 +431,8 @@ private: */ pextent_set_t retired_set; + pextent_set_t no_release_delta_retired_set; + /// stats to collect when commit or invalidate tree_stats_t onode_tree_stats; tree_stats_t lba_tree_stats; @@ -437,6 +463,8 @@ inline std::ostream& operator<<(std::ostream& os, return os << "READ"; case Transaction::src_t::CLEANER_TRIM: return os << "CLEANER_TRIM"; + case Transaction::src_t::TRIM_BACKREF: + return os << "TRIM_BACKREF"; case Transaction::src_t::CLEANER_RECLAIM: return os << "CLEANER_RECLAIM"; default: diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc index 1588d0a96a7..aec0b57a9d4 100644 --- a/src/crimson/os/seastore/transaction_manager.cc +++ b/src/crimson/os/seastore/transaction_manager.cc @@ -319,7 +319,8 @@ TransactionManager::submit_transaction( TransactionManager::submit_transaction_direct_ret TransactionManager::submit_transaction_direct( - Transaction &tref) + Transaction &tref, + std::optional seq_to_trim) { LOG_PREFIX(TransactionManager::submit_transaction_direct); SUBTRACET(seastore_t, "start", tref); @@ -352,7 +353,7 @@ TransactionManager::submit_transaction_direct( }).si_then([this, FNAME, &tref] { SUBTRACET(seastore_t, "about to prepare", tref); return tref.get_handle().enter(write_pipeline.prepare); - }).si_then([this, FNAME, &tref]() mutable + }).si_then([this, FNAME, &tref, seq_to_trim=std::move(seq_to_trim)]() mutable -> submit_transaction_iertr::future<> { auto record = cache->prepare_record(tref, segment_cleaner.get()); @@ -360,11 +361,15 @@ TransactionManager::submit_transaction_direct( SUBTRACET(seastore_t, "about to submit to journal", tref); return journal->submit_record(std::move(record), tref.get_handle() - ).safe_then([this, FNAME, &tref](auto submit_result) mutable { + ).safe_then([this, FNAME, &tref, seq_to_trim=std::move(seq_to_trim)] + (auto submit_result) mutable { SUBDEBUGT(seastore_t, "committed with {}", tref, submit_result); auto start_seq = submit_result.write_result.start_seq; auto end_seq = submit_result.write_result.get_end_seq(); segment_cleaner->set_journal_head(end_seq); + if (seq_to_trim && *seq_to_trim != JOURNAL_SEQ_NULL) { + cache->trim_backref_bufs(*seq_to_trim); + } cache->complete_commit( tref, submit_result.record_block_base, @@ -461,6 +466,11 @@ TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent( CachedExtentRef extent) { LOG_PREFIX(TransactionManager::rewrite_extent); + + if (is_backref_node(extent->get_type())) { + return backref_manager->rewrite_extent(t, extent); + } + { auto updated = cache->update_extent_from_transaction(t, extent); if (!updated) { @@ -476,12 +486,24 @@ TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent( return rewrite_extent_iertr::now(); } + auto fut = rewrite_extent_iertr::now(); if (extent->is_logical()) { - return rewrite_logical_extent(t, extent->cast()); + fut = rewrite_logical_extent(t, extent->cast()); } else { DEBUGT("rewriting physical extent -- {}", t, *extent); - return lba_manager->rewrite_extent(t, extent); + fut = lba_manager->rewrite_extent(t, extent); } + + return fut.si_then([this, extent, &t] { + t.dont_record_release(extent); + return backref_manager->remove_mapping( + t, extent->get_paddr()).si_then([](auto) { + return seastar::now(); + }).handle_error_interruptible( + crimson::ct_error::input_output_error::pass_further(), + crimson::ct_error::assert_all() + ); + }); } TransactionManager::get_extent_if_live_ret TransactionManager::get_extent_if_live( @@ -588,6 +610,7 @@ TransactionManagerRef make_transaction_manager(bool detailed) SegmentCleaner::config_t::get_default(), std::move(sms), *backref_manager, + *cache, detailed); auto journal = journal::make_segmented(*segment_cleaner); epm->init_ool_writers( diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h index b4e9ed92af4..34490baec08 100644 --- a/src/crimson/os/seastore/transaction_manager.h +++ b/src/crimson/os/seastore/transaction_manager.h @@ -382,7 +382,8 @@ public: /// SegmentCleaner::ExtentCallbackInterface using SegmentCleaner::ExtentCallbackInterface::submit_transaction_direct_ret; submit_transaction_direct_ret submit_transaction_direct( - Transaction &t) final; + Transaction &t, + std::optional seq_to_trim = std::nullopt) final; /** * flush