]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: set tail to the last-known journal head under the full trim 47291/head
authorYingxin Cheng <yingxin.cheng@intel.com>
Wed, 27 Jul 2022 06:05:46 +0000 (14:05 +0800)
committerYingxin Cheng <yingxin.cheng@intel.com>
Wed, 27 Jul 2022 08:24:06 +0000 (16:24 +0800)
Rather than setting to the start_seq of the current record during replay
time.

Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
src/crimson/os/seastore/async_cleaner.h
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/journal/segmented_journal.cc
src/test/crimson/seastore/test_btree_lba_manager.cc
src/test/crimson/seastore/test_seastore_journal.cc

index 440c21a1646db718f2003ee3d07970f0e22dbde8..14822b09a89ea703423329e634020382f363eb99 100644 (file)
@@ -269,6 +269,9 @@ private:
  */
 class SegmentProvider {
 public:
+  // get the committed journal head
+  virtual journal_seq_t get_journal_head() const = 0;
+
   // set the committed journal head
   virtual void set_journal_head(journal_seq_t) = 0;
 
@@ -772,6 +775,10 @@ public:
   /*
    * SegmentProvider interfaces
    */
+  journal_seq_t get_journal_head() const final {
+    return journal_head;
+  }
+
   const segment_info_t& get_seg_info(segment_id_t id) const final {
     return segments[id];
   }
index 3326f0a131c06b8f5c3b6d0d64fa8eb0580edd99..4b5572d2ad89809dd96fd30ea1dee13198079804 100644 (file)
@@ -1252,24 +1252,32 @@ record_t Cache::prepare_record(
     journal_seq_t dirty_tail;
     auto maybe_dirty_tail = get_oldest_dirty_from();
     if (!maybe_dirty_tail.has_value()) {
-      dirty_tail = JOURNAL_SEQ_NULL;
+      dirty_tail = cleaner->get_journal_head();
+      SUBINFOT(seastore_t, "dirty_tail all trimmed, set to head {}, src={}",
+               t, dirty_tail, trans_src);
     } else if (*maybe_dirty_tail == JOURNAL_SEQ_NULL) {
       dirty_tail = cleaner->get_dirty_tail();
-      ceph_assert(dirty_tail != JOURNAL_SEQ_NULL);
+      SUBINFOT(seastore_t, "dirty_tail is pending, set to {}, src={}",
+               t, dirty_tail, trans_src);
     } else {
       dirty_tail = *maybe_dirty_tail;
     }
+    ceph_assert(dirty_tail != JOURNAL_SEQ_NULL);
     journal_seq_t alloc_tail;
     auto maybe_alloc_tail = get_oldest_backref_dirty_from();
     if (!maybe_alloc_tail.has_value()) {
-      alloc_tail = JOURNAL_SEQ_NULL;
-      // see notes in Cache::complete_commit().
-      SUBWARNT(seastore_t, "backref_buffer all trimmed", t);
+      // FIXME: the replay point of the allocations requires to be accurate.
+      // Setting the alloc_tail to get_journal_head() cannot skip replaying the
+      // last unnecessary record.
+      alloc_tail = cleaner->get_journal_head();
+      SUBINFOT(seastore_t, "alloc_tail all trimmed, set to head {}, src={}",
+               t, alloc_tail, trans_src);
     } else if (*maybe_alloc_tail == JOURNAL_SEQ_NULL) {
       ceph_abort("impossible");
     } else {
       alloc_tail = *maybe_alloc_tail;
     }
+    ceph_assert(alloc_tail != JOURNAL_SEQ_NULL);
     auto tails = journal_tail_delta_t{alloc_tail, dirty_tail};
     SUBDEBUGT(seastore_t, "update tails as delta {}", t, tails);
     bufferlist bl;
@@ -1409,12 +1417,12 @@ void Cache::backref_batch_update(
 void Cache::complete_commit(
   Transaction &t,
   paddr_t final_block_start,
-  journal_seq_t seq,
+  journal_seq_t start_seq,
   AsyncCleaner *cleaner)
 {
   LOG_PREFIX(Cache::complete_commit);
-  SUBTRACET(seastore_t, "final_block_start={}, seq={}",
-            t, final_block_start, seq);
+  SUBTRACET(seastore_t, "final_block_start={}, start_seq={}",
+            t, final_block_start, start_seq);
 
   std::vector<backref_buf_entry_ref> backref_list;
   t.for_each_fresh_block([&](const CachedExtentRef &i) {
@@ -1442,19 +1450,6 @@ void Cache::complete_commit(
               t,
               i->get_paddr(),
               i->get_length());
-        // FIXME: In theroy, adding backref_list in the finalize phase
-        //        can result in wrong alloc_tail for replay:
-        // * trans-1 enters prepare record with allocations.
-        // * trans-2 (cleaner) enters prepare record, see no alloc-tail,
-        //   so it assume the alloc-tail is the start-seq of trans-2.
-        // * trans-1 enters finalize and update the backref_list,
-        //   implying that alloc-tail is the start-seq of trans-1.
-        // * trans-2 (cleaner) enters finalize.
-        // During replay, the alloc-tail will be set to the start-seq of
-        //   trans-2 according to journal_tail_delta_t, but it should be
-        //   actually the start-seq of trans-1.
-        // This can only happen if alloc_tail trimming is able to trim to
-        //   the journal head.
        backref_list.emplace_back(
          std::make_unique<backref_buf_entry_t>(
            i->get_paddr(),
@@ -1465,7 +1460,7 @@ void Cache::complete_commit(
              : L_ADDR_NULL),
            i->get_length(),
            i->get_type(),
-           seq));
+           start_seq));
       } else if (is_backref_node(i->get_type())) {
        add_backref_extent(i->get_paddr(), i->get_type());
       } else {
@@ -1487,7 +1482,7 @@ void Cache::complete_commit(
     i->state = CachedExtent::extent_state_t::DIRTY;
     assert(i->version > 0);
     if (i->version == 1 || i->get_type() == extent_types_t::ROOT) {
-      i->dirty_from_or_retired_at = seq;
+      i->dirty_from_or_retired_at = start_seq;
       DEBUGT("commit extent done, become dirty -- {}", t, *i);
     } else {
       DEBUGT("commit extent done -- {}", t, *i);
@@ -1516,9 +1511,9 @@ void Cache::complete_commit(
     i->complete_io();
   }
 
-  last_commit = seq;
+  last_commit = start_seq;
   for (auto &i: t.retired_set) {
-    i->dirty_from_or_retired_at = last_commit;
+    i->dirty_from_or_retired_at = start_seq;
     if (is_backref_mapped_extent_node(i)
          || is_retired_placeholder(i->get_type())) {
       DEBUGT("backref_list free {} len {}",
@@ -1531,7 +1526,7 @@ void Cache::complete_commit(
          L_ADDR_NULL,
          i->get_length(),
          i->get_type(),
-         seq));
+         start_seq));
     } else if (is_backref_node(i->get_type())) {
       remove_backref_extent(i->get_paddr());
     } else {
@@ -1564,13 +1559,13 @@ void Cache::complete_commit(
          i->cast<LogicalCachedExtent>()->get_laddr(),
          i->get_length(),
          i->get_type(),
-         seq));
+         start_seq));
       const auto t_src = t.get_src();
       add_extent(i, &t_src);
     }
   }
   if (!backref_list.empty()) {
-    backref_batch_update(std::move(backref_list), seq);
+    backref_batch_update(std::move(backref_list), start_seq);
   }
 }
 
index a5b0f9bb68f2534fd2b436fd69499742176b9482..1ece53e9a754e41e7ccaab65f7dc82afb4d26e39 100644 (file)
@@ -646,8 +646,7 @@ public:
        backref_buffer->backrefs_by_seq.begin(), iter);
     }
     if (!backref_buffer || backref_buffer->backrefs_by_seq.empty()) {
-      // see notes in Cache::complete_commit().
-      SUBWARN(seastore_cache, "backref_buffer all trimmed");
+      SUBDEBUG(seastore_cache, "backref_buffer all trimmed");
     }
   }
 
@@ -920,7 +919,7 @@ public:
     } else {
       auto oldest = dirty.begin()->get_dirty_from();
       if (oldest == JOURNAL_SEQ_NULL) {
-       SUBINFO(seastore_cache, "dirty_oldest: pending");
+       SUBDEBUG(seastore_cache, "dirty_oldest: pending");
       } else {
        SUBDEBUG(seastore_cache, "dirty_oldest: {}", oldest);
       }
index 3e32ada9eb13b74a8acf0e630feb00819bbc0602..e7d0559648d4c8d0ae3b565ff5f2484b202c6abd 100644 (file)
@@ -199,12 +199,8 @@ SegmentedJournal::scan_last_segment(
               decode(tail_delta, delta.bl);
               auto start_seq = locator.write_result.start_seq;
               INFO("got {}, at seq {}", tail_delta, start_seq);
-              if (tail_delta.alloc_tail == JOURNAL_SEQ_NULL) {
-                tail_delta.alloc_tail = start_seq;
-              }
-              if (tail_delta.dirty_tail == JOURNAL_SEQ_NULL) {
-                tail_delta.dirty_tail = start_seq;
-              }
+              ceph_assert(tail_delta.dirty_tail != JOURNAL_SEQ_NULL);
+              ceph_assert(tail_delta.alloc_tail != JOURNAL_SEQ_NULL);
               segment_provider.update_journal_tails(
                   tail_delta.dirty_tail, tail_delta.alloc_tail);
             }
index 1bf7c5050f4b16b802bc7c712247287fc6194e35..a562e7d019d8d4fd4705257310f112f7c26e7ed6 100644 (file)
@@ -51,6 +51,8 @@ struct btree_test_base :
   /*
    * SegmentProvider interfaces
    */
+  journal_seq_t get_journal_head() const final { return dummy_tail; }
+
   void set_journal_head(journal_seq_t) final {}
 
   journal_seq_t get_dirty_tail() const final { return dummy_tail; }
index 2a0f6b16f83aae2b7c88777eb2a1b53c41f95ae2..6df8e08337bfba782348c7bbaeb1e2e1041fb2a5 100644 (file)
@@ -92,6 +92,8 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
   /*
    * SegmentProvider interfaces
    */
+  journal_seq_t get_journal_head() const final { return dummy_tail; }
+
   void set_journal_head(journal_seq_t) final {}
 
   journal_seq_t get_dirty_tail() const final { return dummy_tail; }