From: Yingxin Cheng Date: Wed, 27 Jul 2022 06:05:46 +0000 (+0800) Subject: crimson/os/seastore: set tail to the last-known journal head under the full trim X-Git-Tag: v18.0.0~391^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=133b1f83f8c70fd4529caf699beb5b159add0a21;p=ceph.git crimson/os/seastore: set tail to the last-known journal head under the full trim Rather than setting to the start_seq of the current record during replay time. Signed-off-by: Yingxin Cheng --- diff --git a/src/crimson/os/seastore/async_cleaner.h b/src/crimson/os/seastore/async_cleaner.h index 440c21a1646db..14822b09a89ea 100644 --- a/src/crimson/os/seastore/async_cleaner.h +++ b/src/crimson/os/seastore/async_cleaner.h @@ -269,6 +269,9 @@ private: */ class SegmentProvider { public: + // get the committed journal head + virtual journal_seq_t get_journal_head() const = 0; + // set the committed journal head virtual void set_journal_head(journal_seq_t) = 0; @@ -772,6 +775,10 @@ public: /* * SegmentProvider interfaces */ + journal_seq_t get_journal_head() const final { + return journal_head; + } + const segment_info_t& get_seg_info(segment_id_t id) const final { return segments[id]; } diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc index 3326f0a131c06..4b5572d2ad898 100644 --- a/src/crimson/os/seastore/cache.cc +++ b/src/crimson/os/seastore/cache.cc @@ -1252,24 +1252,32 @@ record_t Cache::prepare_record( journal_seq_t dirty_tail; auto maybe_dirty_tail = get_oldest_dirty_from(); if (!maybe_dirty_tail.has_value()) { - dirty_tail = JOURNAL_SEQ_NULL; + dirty_tail = cleaner->get_journal_head(); + SUBINFOT(seastore_t, "dirty_tail all trimmed, set to head {}, src={}", + t, dirty_tail, trans_src); } else if (*maybe_dirty_tail == JOURNAL_SEQ_NULL) { dirty_tail = cleaner->get_dirty_tail(); - ceph_assert(dirty_tail != JOURNAL_SEQ_NULL); + SUBINFOT(seastore_t, "dirty_tail is pending, set to {}, src={}", + t, dirty_tail, trans_src); } else { dirty_tail = *maybe_dirty_tail; } + ceph_assert(dirty_tail != JOURNAL_SEQ_NULL); journal_seq_t alloc_tail; auto maybe_alloc_tail = get_oldest_backref_dirty_from(); if (!maybe_alloc_tail.has_value()) { - alloc_tail = JOURNAL_SEQ_NULL; - // see notes in Cache::complete_commit(). - SUBWARNT(seastore_t, "backref_buffer all trimmed", t); + // FIXME: the replay point of the allocations requires to be accurate. + // Setting the alloc_tail to get_journal_head() cannot skip replaying the + // last unnecessary record. + alloc_tail = cleaner->get_journal_head(); + SUBINFOT(seastore_t, "alloc_tail all trimmed, set to head {}, src={}", + t, alloc_tail, trans_src); } else if (*maybe_alloc_tail == JOURNAL_SEQ_NULL) { ceph_abort("impossible"); } else { alloc_tail = *maybe_alloc_tail; } + ceph_assert(alloc_tail != JOURNAL_SEQ_NULL); auto tails = journal_tail_delta_t{alloc_tail, dirty_tail}; SUBDEBUGT(seastore_t, "update tails as delta {}", t, tails); bufferlist bl; @@ -1409,12 +1417,12 @@ void Cache::backref_batch_update( void Cache::complete_commit( Transaction &t, paddr_t final_block_start, - journal_seq_t seq, + journal_seq_t start_seq, AsyncCleaner *cleaner) { LOG_PREFIX(Cache::complete_commit); - SUBTRACET(seastore_t, "final_block_start={}, seq={}", - t, final_block_start, seq); + SUBTRACET(seastore_t, "final_block_start={}, start_seq={}", + t, final_block_start, start_seq); std::vector backref_list; t.for_each_fresh_block([&](const CachedExtentRef &i) { @@ -1442,19 +1450,6 @@ void Cache::complete_commit( t, i->get_paddr(), i->get_length()); - // FIXME: In theroy, adding backref_list in the finalize phase - // can result in wrong alloc_tail for replay: - // * trans-1 enters prepare record with allocations. - // * trans-2 (cleaner) enters prepare record, see no alloc-tail, - // so it assume the alloc-tail is the start-seq of trans-2. - // * trans-1 enters finalize and update the backref_list, - // implying that alloc-tail is the start-seq of trans-1. - // * trans-2 (cleaner) enters finalize. - // During replay, the alloc-tail will be set to the start-seq of - // trans-2 according to journal_tail_delta_t, but it should be - // actually the start-seq of trans-1. - // This can only happen if alloc_tail trimming is able to trim to - // the journal head. backref_list.emplace_back( std::make_unique( i->get_paddr(), @@ -1465,7 +1460,7 @@ void Cache::complete_commit( : L_ADDR_NULL), i->get_length(), i->get_type(), - seq)); + start_seq)); } else if (is_backref_node(i->get_type())) { add_backref_extent(i->get_paddr(), i->get_type()); } else { @@ -1487,7 +1482,7 @@ void Cache::complete_commit( i->state = CachedExtent::extent_state_t::DIRTY; assert(i->version > 0); if (i->version == 1 || i->get_type() == extent_types_t::ROOT) { - i->dirty_from_or_retired_at = seq; + i->dirty_from_or_retired_at = start_seq; DEBUGT("commit extent done, become dirty -- {}", t, *i); } else { DEBUGT("commit extent done -- {}", t, *i); @@ -1516,9 +1511,9 @@ void Cache::complete_commit( i->complete_io(); } - last_commit = seq; + last_commit = start_seq; for (auto &i: t.retired_set) { - i->dirty_from_or_retired_at = last_commit; + i->dirty_from_or_retired_at = start_seq; if (is_backref_mapped_extent_node(i) || is_retired_placeholder(i->get_type())) { DEBUGT("backref_list free {} len {}", @@ -1531,7 +1526,7 @@ void Cache::complete_commit( L_ADDR_NULL, i->get_length(), i->get_type(), - seq)); + start_seq)); } else if (is_backref_node(i->get_type())) { remove_backref_extent(i->get_paddr()); } else { @@ -1564,13 +1559,13 @@ void Cache::complete_commit( i->cast()->get_laddr(), i->get_length(), i->get_type(), - seq)); + start_seq)); const auto t_src = t.get_src(); add_extent(i, &t_src); } } if (!backref_list.empty()) { - backref_batch_update(std::move(backref_list), seq); + backref_batch_update(std::move(backref_list), start_seq); } } diff --git a/src/crimson/os/seastore/cache.h b/src/crimson/os/seastore/cache.h index a5b0f9bb68f25..1ece53e9a754e 100644 --- a/src/crimson/os/seastore/cache.h +++ b/src/crimson/os/seastore/cache.h @@ -646,8 +646,7 @@ public: backref_buffer->backrefs_by_seq.begin(), iter); } if (!backref_buffer || backref_buffer->backrefs_by_seq.empty()) { - // see notes in Cache::complete_commit(). - SUBWARN(seastore_cache, "backref_buffer all trimmed"); + SUBDEBUG(seastore_cache, "backref_buffer all trimmed"); } } @@ -920,7 +919,7 @@ public: } else { auto oldest = dirty.begin()->get_dirty_from(); if (oldest == JOURNAL_SEQ_NULL) { - SUBINFO(seastore_cache, "dirty_oldest: pending"); + SUBDEBUG(seastore_cache, "dirty_oldest: pending"); } else { SUBDEBUG(seastore_cache, "dirty_oldest: {}", oldest); } diff --git a/src/crimson/os/seastore/journal/segmented_journal.cc b/src/crimson/os/seastore/journal/segmented_journal.cc index 3e32ada9eb13b..e7d0559648d4c 100644 --- a/src/crimson/os/seastore/journal/segmented_journal.cc +++ b/src/crimson/os/seastore/journal/segmented_journal.cc @@ -199,12 +199,8 @@ SegmentedJournal::scan_last_segment( decode(tail_delta, delta.bl); auto start_seq = locator.write_result.start_seq; INFO("got {}, at seq {}", tail_delta, start_seq); - if (tail_delta.alloc_tail == JOURNAL_SEQ_NULL) { - tail_delta.alloc_tail = start_seq; - } - if (tail_delta.dirty_tail == JOURNAL_SEQ_NULL) { - tail_delta.dirty_tail = start_seq; - } + ceph_assert(tail_delta.dirty_tail != JOURNAL_SEQ_NULL); + ceph_assert(tail_delta.alloc_tail != JOURNAL_SEQ_NULL); segment_provider.update_journal_tails( tail_delta.dirty_tail, tail_delta.alloc_tail); } diff --git a/src/test/crimson/seastore/test_btree_lba_manager.cc b/src/test/crimson/seastore/test_btree_lba_manager.cc index 1bf7c5050f4b1..a562e7d019d8d 100644 --- a/src/test/crimson/seastore/test_btree_lba_manager.cc +++ b/src/test/crimson/seastore/test_btree_lba_manager.cc @@ -51,6 +51,8 @@ struct btree_test_base : /* * SegmentProvider interfaces */ + journal_seq_t get_journal_head() const final { return dummy_tail; } + void set_journal_head(journal_seq_t) final {} journal_seq_t get_dirty_tail() const final { return dummy_tail; } diff --git a/src/test/crimson/seastore/test_seastore_journal.cc b/src/test/crimson/seastore/test_seastore_journal.cc index 2a0f6b16f83aa..6df8e08337bfb 100644 --- a/src/test/crimson/seastore/test_seastore_journal.cc +++ b/src/test/crimson/seastore/test_seastore_journal.cc @@ -92,6 +92,8 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider { /* * SegmentProvider interfaces */ + journal_seq_t get_journal_head() const final { return dummy_tail; } + void set_journal_head(journal_seq_t) final {} journal_seq_t get_dirty_tail() const final { return dummy_tail; }