]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore: record replay_from info for dirty extents and alloc infos in...
authorXuehan Xu <xxhdx1985126@gmail.com>
Thu, 10 Mar 2022 09:46:37 +0000 (17:46 +0800)
committerXuehan Xu <xxhdx1985126@gmail.com>
Sat, 7 May 2022 05:13:38 +0000 (13:13 +0800)
Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
12 files changed:
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/journal.h
src/crimson/os/seastore/journal/segment_allocator.cc
src/crimson/os/seastore/journal/segmented_journal.cc
src/crimson/os/seastore/seastore_types.h
src/crimson/os/seastore/segment_cleaner.cc
src/crimson/os/seastore/segment_cleaner.h
src/crimson/os/seastore/transaction.h
src/crimson/os/seastore/transaction_manager.cc
src/test/crimson/seastore/test_btree_lba_manager.cc
src/test/crimson/seastore/test_seastore_journal.cc

index 002d948da6eb90c73f642ef69af12267b5cb64d9..7bf9ba776f77683f96384b977f8cb84f2e59e797 100644 (file)
@@ -1134,6 +1134,15 @@ record_t Cache::prepare_record(
     record.push_back(std::move(delta));
   }
 
+  if (t.is_cleaner_transaction()) {
+    bufferlist bl;
+    encode(get_oldest_backref_dirty_from().value_or(JOURNAL_SEQ_NULL), bl);
+    delta_info_t delta;
+    delta.type = extent_types_t::ALLOC_TAIL;
+    delta.bl = bl;
+    record.push_back(std::move(delta));
+  }
+
   ceph_assert(t.get_fresh_block_stats().num ==
               t.inline_block_list.size() +
               t.ool_block_list.size() +
@@ -1416,9 +1425,11 @@ Cache::replay_delta(
   journal_seq_t journal_seq,
   paddr_t record_base,
   const delta_info_t &delta,
+  const journal_seq_t &alloc_replay_from,
   seastar::lowres_system_clock::time_point& last_modified)
 {
   LOG_PREFIX(Cache::replay_delta);
+  assert(alloc_replay_from != JOURNAL_SEQ_NULL);
   if (delta.type == extent_types_t::ROOT) {
     TRACE("replay root delta at {} {}, remove extent ... -- {}, prv_root={}",
           journal_seq, record_base, delta, *root);
@@ -1432,6 +1443,11 @@ Cache::replay_delta(
     add_extent(root);
     return replay_delta_ertr::now();
   } else if (delta.type == extent_types_t::ALLOC_INFO) {
+    if (journal_seq < alloc_replay_from) {
+      DEBUG("journal_seq {} < alloc_replay_from {}, don't replay {}",
+       journal_seq, alloc_replay_from, delta);
+      return replay_delta_ertr::now();
+    }
     may_roll_backref_buffer(journal_seq.offset);
     alloc_delta_t alloc_delta;
     decode(alloc_delta, delta.bl);
@@ -1454,6 +1470,9 @@ Cache::replay_delta(
     if (!backref_list.empty())
       backref_batch_update(std::move(backref_list), journal_seq);
     return replay_delta_ertr::now();
+  } else if (delta.type == extent_types_t::ALLOC_TAIL) {
+    // this delta should have been dealt with during segment cleaner mounting
+    return replay_delta_ertr::now();
   } else {
     auto _get_extent_if_cached = [this](paddr_t addr)
       -> get_extent_ertr::future<CachedExtentRef> {
index 7270a4fe379078fdd975c60b7eca10ee8f0463fd..b16bdaff7b9a8ed1fe125ec002e2655037f92f49 100644 (file)
@@ -784,6 +784,8 @@ public:
     journal_seq_t seq,
     paddr_t record_block_base,
     const delta_info_t &delta,
+    const journal_seq_t &, // journal seq from which alloc
+                          // delta should be replayed
     seastar::lowres_system_clock::time_point& last_modified);
 
   /**
@@ -906,15 +908,40 @@ public:
     journal_seq_t seq,
     size_t max_bytes);
 
+  std::optional<journal_seq_t> get_oldest_backref_dirty_from() const {
+    LOG_PREFIX(Cache::get_oldest_backref_dirty_from);
+    journal_seq_t backref_oldest = JOURNAL_SEQ_NULL;
+    if (backref_bufs_to_flush.empty()) {
+      if (backref_buffer && !backref_buffer->backrefs.empty()) {
+       backref_oldest = backref_buffer->backrefs.begin()->first;
+      }
+    } else {
+      auto &oldest_buf = backref_bufs_to_flush.front();
+      backref_oldest = oldest_buf->backrefs.begin()->first;
+    }
+    if (backref_oldest == JOURNAL_SEQ_NULL) {
+      SUBDEBUG(seastore_cache, "backref_oldest: null");
+      return std::nullopt;
+    } else {
+      SUBDEBUG(seastore_cache, "backref_oldest: {}",
+       backref_oldest);
+      return backref_oldest;
+    }
+  }
+
   /// returns std::nullopt if no dirty extents or get_dirty_from() for oldest
   std::optional<journal_seq_t> get_oldest_dirty_from() const {
+    LOG_PREFIX(Cache::get_oldest_dirty_from);
     if (dirty.empty()) {
+      SUBDEBUG(seastore_cache, "oldest: null");
       return std::nullopt;
     } else {
       auto oldest = dirty.begin()->get_dirty_from();
       if (oldest == JOURNAL_SEQ_NULL) {
+       SUBDEBUG(seastore_cache, "oldest: null");
        return std::nullopt;
       } else {
+       SUBDEBUG(seastore_cache, "oldest: {}", oldest);
        return oldest;
       }
     }
index c135cf551f6eb4afb05d9cfbd5c754a731f76596..a33a5468684bd6c5257b0bd2f0adc5f6baa14fb2 100644 (file)
@@ -80,6 +80,8 @@ public:
   using delta_handler_t = std::function<
     replay_ret(const record_locator_t&,
               const delta_info_t&,
+              const journal_seq_t, // journal seq from which
+                                   // alloc delta should replayed
               seastar::lowres_system_clock::time_point last_modified)>;
   virtual replay_ret replay(
     delta_handler_t &&delta_handler) = 0;
index e27900561b9aa10c476bc00d03af8c26b5fabea7..015d31286f313f9df8d79f113c94c5e593521a94 100644 (file)
@@ -51,16 +51,20 @@ SegmentAllocator::do_open()
   ).safe_then([this, FNAME, new_segment_seq](auto sref) {
     // initialize new segment
     journal_seq_t new_journal_tail;
+    journal_seq_t new_alloc_replay_from;
     if (type == segment_type_t::JOURNAL) {
       new_journal_tail = segment_provider.get_journal_tail_target();
+      new_alloc_replay_from = segment_provider.get_alloc_info_replay_from();
     } else { // OOL
       new_journal_tail = NO_DELTAS;
+      new_alloc_replay_from = NO_DELTAS;
     }
     segment_id_t segment_id = sref->get_segment_id();
     auto header = segment_header_t{
       new_segment_seq,
       segment_id,
       new_journal_tail,
+      new_alloc_replay_from,
       current_segment_nonce,
       type};
     INFO("{} writing header to new segment ... -- {}",
@@ -210,15 +214,19 @@ SegmentAllocator::close_segment(bool is_rolling)
   }
   auto close_seg_info = segment_provider.get_seg_info(close_segment_id);
   journal_seq_t cur_journal_tail;
+  journal_seq_t new_alloc_replay_from;
   if (type == segment_type_t::JOURNAL) {
     cur_journal_tail = segment_provider.get_journal_tail_target();
+    new_alloc_replay_from = segment_provider.get_alloc_info_replay_from();
   } else { // OOL
     cur_journal_tail = NO_DELTAS;
+    new_alloc_replay_from = NO_DELTAS;
   }
   auto tail = segment_tail_t{
     close_seg_info.seq,
     close_segment_id,
     cur_journal_tail,
+    new_alloc_replay_from,
     current_segment_nonce,
     type,
     close_seg_info.last_modified.time_since_epoch().count(),
index 2413ba14c134781c21d8b86d2bba5f5aac7dd2fa..f9c979df05dfdfbd5c1c1a90d302330c9c5975f9 100644 (file)
@@ -223,6 +223,7 @@ SegmentedJournal::replay_segment(
            return handler(
              locator,
              delta,
+             segment_provider.get_alloc_info_replay_from(),
              seastar::lowres_system_clock::time_point(
                seastar::lowres_system_clock::duration(commit_time)));
           });
index 404a4f48126b554d7e9798f0e5bf04482042eafd..839b2bf88366f2688807808fc50c5b83a86dec08 100644 (file)
@@ -862,13 +862,14 @@ enum class extent_types_t : uint8_t {
   // the following two types are not extent types,
   // they are just used to indicates paddr allocation deltas
   ALLOC_INFO = 9,
+  ALLOC_TAIL = 10,
   // Test Block Types
-  TEST_BLOCK = 10,
-  TEST_BLOCK_PHYSICAL = 11,
-  BACKREF_INTERNAL = 12,
-  BACKREF_LEAF = 13,
+  TEST_BLOCK = 11,
+  TEST_BLOCK_PHYSICAL = 12,
+  BACKREF_INTERNAL = 13,
+  BACKREF_LEAF = 14,
   // None and the number of valid extent_types_t
-  NONE = 14,
+  NONE = 15,
 };
 using extent_types_le_t = uint8_t;
 constexpr auto EXTENT_TYPES_MAX = static_cast<uint8_t>(extent_types_t::NONE);
@@ -1348,6 +1349,7 @@ struct segment_header_t {
   segment_id_t physical_segment_id; // debugging
 
   journal_seq_t journal_tail;
+  journal_seq_t alloc_replay_from;
   segment_nonce_t segment_nonce;
 
   segment_type_t type;
@@ -1361,6 +1363,7 @@ struct segment_header_t {
     denc(v.segment_seq, p);
     denc(v.physical_segment_id, p);
     denc(v.journal_tail, p);
+    denc(v.alloc_replay_from, p);
     denc(v.segment_nonce, p);
     denc(v.type, p);
     DENC_FINISH(p);
@@ -1373,6 +1376,7 @@ struct segment_tail_t {
   segment_id_t physical_segment_id; // debugging
 
   journal_seq_t journal_tail;
+  journal_seq_t alloc_replay_from;
   segment_nonce_t segment_nonce;
 
   segment_type_t type;
@@ -1389,6 +1393,7 @@ struct segment_tail_t {
     denc(v.segment_seq, p);
     denc(v.physical_segment_id, p);
     denc(v.journal_tail, p);
+    denc(v.alloc_replay_from, p);
     denc(v.segment_nonce, p);
     denc(v.type, p);
     denc(v.last_modified, p);
index a5c11fb76a3fdaf435c38fcc78dcf856fb90a0b4..1d0ed0a48f417147548b6ff695f8ce691dc2181b 100644 (file)
@@ -482,14 +482,28 @@ segment_id_t SegmentCleaner::allocate_segment(
   return NULL_SEG_ID;
 }
 
-void SegmentCleaner::update_journal_tail_target(journal_seq_t target)
+void SegmentCleaner::update_journal_tail_target(
+  journal_seq_t dirty_replay_from,
+  journal_seq_t alloc_replay_from)
 {
+  logger().debug(
+    "{}: {}, current dirty_extents_replay_from {}",
+    __func__,
+    dirty_replay_from,
+    dirty_extents_replay_from);
+  if (dirty_extents_replay_from == JOURNAL_SEQ_NULL
+      || dirty_replay_from > dirty_extents_replay_from) {
+    dirty_extents_replay_from = dirty_replay_from;
+  }
+
+  update_alloc_info_replay_from(alloc_replay_from);
+
+  journal_seq_t target = std::min(dirty_replay_from, alloc_replay_from);
   logger().debug(
     "{}: {}, current tail target {}",
     __func__,
     target,
     journal_tail_target);
-  assert(journal_tail_target == JOURNAL_SEQ_NULL || target >= journal_tail_target);
   if (journal_tail_target == JOURNAL_SEQ_NULL || target > journal_tail_target) {
     journal_tail_target = target;
   }
@@ -497,6 +511,20 @@ void SegmentCleaner::update_journal_tail_target(journal_seq_t target)
   maybe_wake_gc_blocked_io();
 }
 
+void SegmentCleaner::update_alloc_info_replay_from(
+  journal_seq_t alloc_replay_from)
+{
+  logger().debug(
+    "{}: {}, current alloc_info_replay_from {}",
+    __func__,
+    alloc_replay_from,
+    alloc_info_replay_from);
+  if (alloc_info_replay_from == JOURNAL_SEQ_NULL
+      || alloc_replay_from > alloc_info_replay_from) {
+    alloc_info_replay_from = alloc_replay_from;
+  }
+}
+
 void SegmentCleaner::update_journal_tail_committed(journal_seq_t committed)
 {
   if (journal_tail_committed == JOURNAL_SEQ_NULL ||
@@ -876,6 +904,8 @@ SegmentCleaner::mount_ret SegmentCleaner::mount()
   journal_tail_target = JOURNAL_SEQ_NULL;
   journal_tail_committed = JOURNAL_SEQ_NULL;
   journal_head = JOURNAL_SEQ_NULL;
+  dirty_extents_replay_from = JOURNAL_SEQ_NULL;
+  alloc_info_replay_from = JOURNAL_SEQ_NULL;
   
   space_tracker.reset(
     detailed ?
@@ -924,6 +954,12 @@ SegmentCleaner::mount_ret SegmentCleaner::mount()
            time_point last_rewritten(duration(tail.last_rewritten));
            segments.update_last_modified_rewritten(
                 segment_id, last_modified, last_rewritten);
+           if (tail.get_type() == segment_type_t::JOURNAL) {
+             update_journal_tail_committed(tail.journal_tail);
+             update_journal_tail_target(
+               tail.journal_tail,
+               tail.alloc_replay_from);
+           }
            init_mark_segment_closed(
              segment_id,
              header.segment_seq,
@@ -955,23 +991,23 @@ SegmentCleaner::scan_extents_ret SegmentCleaner::scan_nonfull_segment(
   scan_extents_ret_bare& segment_set,
   segment_id_t segment_id)
 {
-  if (header.get_type() == segment_type_t::OOL) {
-    logger().info(
-      "SegmentCleaner::scan_nonfull_segment: out-of-line segment {}",
-      segment_id);
+  return seastar::do_with(
+    scan_valid_records_cursor({
+      segments[segment_id].seq,
+      paddr_t::make_seg_paddr(segment_id, 0)}),
+    [this, segment_id, segment_header=header](auto& cursor) {
     return seastar::do_with(
-      scan_valid_records_cursor({
-       segments[segment_id].seq,
-       paddr_t::make_seg_paddr(segment_id, 0)}),
-      [this, segment_id, header](auto& cursor) {
-      return seastar::do_with(
-       SegmentManagerGroup::found_record_handler_t([this, segment_id](
-           record_locator_t locator,
-           const record_group_header_t& header,
-           const bufferlist& mdbuf
-         ) mutable -> SegmentManagerGroup::scan_valid_records_ertr::future<> {
-         LOG_PREFIX(SegmentCleaner::scan_nonfull_segment);
-         DEBUG("decodeing {} records", header.records);
+       SegmentManagerGroup::found_record_handler_t(
+       [this, segment_id, segment_header](
+         record_locator_t locator,
+         const record_group_header_t& header,
+         const bufferlist& mdbuf
+       ) mutable -> SegmentManagerGroup::scan_valid_records_ertr::future<> {
+       LOG_PREFIX(SegmentCleaner::scan_nonfull_segment);
+       if (segment_header.get_type() == segment_type_t::OOL) {
+         DEBUG("out-of-line segment {}, decodeing {} records",
+           segment_id,
+           header.records);
          auto maybe_headers = try_decode_record_headers(header, mdbuf);
          if (!maybe_headers) {
            ERROR("unable to decode record headers for record group {}",
@@ -997,36 +1033,44 @@ SegmentCleaner::scan_extents_ret SegmentCleaner::scan_nonfull_segment(
               segments.update_last_modified_rewritten(segment_id, {}, commit_time);
            }
          }
-         return seastar::now();
-       }),
-       [&cursor, header, this](auto& handler) {
-         return sm_group->scan_valid_records(
-           cursor,
-           header.segment_nonce,
-           segments.get_segment_size(),
-           handler);
+       } else {
+         DEBUG("inline segment {}, decodeing {} records",
+           segment_id,
+           header.records);
+         auto maybe_record_deltas_list = try_decode_deltas(
+           header, mdbuf, locator.record_block_base);
+         if (!maybe_record_deltas_list) {
+           ERROR("unable to decode deltas for record {} at {}",
+                 header, locator);
+           return crimson::ct_error::input_output_error::make();
+         }
+         for (auto &record_deltas : *maybe_record_deltas_list) {
+           for (auto &[ctime, delta] : record_deltas.deltas) {
+             if (delta.type == extent_types_t::ALLOC_TAIL) {
+               journal_seq_t seq;
+               decode(seq, delta.bl);
+               update_alloc_info_replay_from(seq);
+             }
+           }
+         }
        }
-      );
-    }).safe_then([this, segment_id, header](auto) {
-      init_mark_segment_closed(
-       segment_id,
-       header.segment_seq,
-       header.type);
-      return seastar::now();
-    });
-  } else if (header.get_type() == segment_type_t::JOURNAL) {
-    logger().info(
-      "SegmentCleaner::scan_nonfull_segment: journal segment {}",
-      segment_id);
-    segment_set.emplace_back(std::make_pair(segment_id, std::move(header)));
-  } else {
-    ceph_abort("unexpected segment type");
-  }
-  init_mark_segment_closed(
-    segment_id,
-    header.segment_seq,
-    header.type);
-  return seastar::now();
+       return seastar::now();
+      }),
+      [&cursor, segment_header, this](auto& handler) {
+       return sm_group->scan_valid_records(
+         cursor,
+         segment_header.segment_nonce,
+         segments.get_segment_size(),
+         handler);
+      }
+    );
+  }).safe_then([this, segment_id, header](auto) {
+    init_mark_segment_closed(
+      segment_id,
+      header.segment_seq,
+      header.type);
+    return seastar::now();
+  });
 }
 
 SegmentCleaner::release_ertr::future<>
index 357b4bf78512d7692244582c22ccdf5afe9542b1..0070527b2b25a3c6d7cdeeaae17f6babc979ca07 100644 (file)
@@ -193,6 +193,10 @@ public:
   virtual segment_id_t allocate_segment(
       segment_seq_t seq, segment_type_t type) = 0;
 
+  virtual journal_seq_t get_dirty_extents_replay_from() const = 0;
+
+  virtual journal_seq_t get_alloc_info_replay_from() const = 0;
+
   virtual void close_segment(segment_id_t) = 0;
 
   virtual void update_journal_tail_committed(journal_seq_t tail_committed) = 0;
@@ -583,6 +587,12 @@ private:
   /// target journal_tail for next fresh segment
   journal_seq_t journal_tail_target;
 
+  /// target replay_from for dirty extents
+  journal_seq_t dirty_extents_replay_from;
+
+  /// target replay_from for alloc infos
+  journal_seq_t alloc_info_replay_from;
+
   /// most recently committed journal_tail
   journal_seq_t journal_tail_committed;
 
@@ -639,7 +649,20 @@ public:
     return sm_group.get();
   }
 
-  void update_journal_tail_target(journal_seq_t target);
+  journal_seq_t get_dirty_extents_replay_from() const final {
+    return dirty_extents_replay_from;
+  }
+
+  journal_seq_t get_alloc_info_replay_from() const final {
+    return alloc_info_replay_from;
+  }
+
+  void update_journal_tail_target(
+    journal_seq_t dirty_replay_from,
+    journal_seq_t alloc_replay_from);
+
+  void update_alloc_info_replay_from(
+    journal_seq_t alloc_replay_from);
 
   void init_mkfs(journal_seq_t head) {
     journal_tail_target = head;
index 17479795df1c794ec5944f889515a39a2ed290d6..466f0b4a995004a9e2933ad709a9ff520c48a03d 100644 (file)
@@ -260,6 +260,10 @@ public:
     return src;
   }
 
+  bool is_cleaner_transaction() const {
+    return src >= Transaction::src_t::CLEANER_TRIM;
+  }
+
   bool is_weak() const {
     return weak;
   }
index 7251b07fac9ad08862efbf28e66c94f0d92ad64b..308a76e51cdd59edd796da0bcef0bb6ddde8b672 100644 (file)
@@ -89,14 +89,21 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount()
   return segment_cleaner->mount(
   ).safe_then([this] {
     return journal->replay(
-      [this](const auto &offsets, const auto &e, auto last_modified) {
+      [this](
+       const auto &offsets,
+       const auto &e,
+       const journal_seq_t alloc_replay_from,
+       auto last_modified)
+      {
        auto start_seq = offsets.write_result.start_seq;
        segment_cleaner->update_journal_tail_target(
-         cache->get_oldest_dirty_from().value_or(start_seq));
+         cache->get_oldest_dirty_from().value_or(start_seq),
+         cache->get_oldest_backref_dirty_from().value_or(start_seq));
        return cache->replay_delta(
          start_seq,
          offsets.record_block_base,
          e,
+         alloc_replay_from,
          last_modified);
       });
   }).safe_then([this] {
@@ -405,7 +412,8 @@ TransactionManager::submit_transaction_direct(
       backref_manager->complete_transaction(tref, backref_to_clear, backref_to_link);
 
       segment_cleaner->update_journal_tail_target(
-       cache->get_oldest_dirty_from().value_or(start_seq));
+       cache->get_oldest_dirty_from().value_or(start_seq),
+       cache->get_oldest_backref_dirty_from().value_or(start_seq));
       return segment_cleaner->maybe_release_segment(tref);
     }).safe_then([FNAME, &tref] {
       SUBTRACET(seastore_t, "completed", tref);
index 8e21f0affe85958d52fa0037d8488dbb1c822b15..bde23e84f35ea31e794cccf75e030df83f8dd93c 100644 (file)
@@ -79,6 +79,14 @@ struct btree_test_base :
 
   SegmentManagerGroup* get_segment_manager_group() final { return sms.get(); }
 
+  journal_seq_t get_dirty_extents_replay_from() const final {
+    return JOURNAL_SEQ_NULL;
+  }
+
+  journal_seq_t get_alloc_info_replay_from() const final {
+    return JOURNAL_SEQ_NULL;
+  }
+
   virtual void complete_commit(Transaction &t) {}
   seastar::future<> submit_transaction(TransactionRef t)
   {
index e33211fd59004eb4467027d1ec92666b64c950cc..4641987810f5ccafa830a9583acee6459554d8f2 100644 (file)
@@ -99,6 +99,14 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
     return tmp_info;
   }
 
+  journal_seq_t get_dirty_extents_replay_from() const final {
+    return JOURNAL_SEQ_NULL;
+  }
+
+  journal_seq_t get_alloc_info_replay_from() const final {
+    return JOURNAL_SEQ_NULL;
+  }
+
   segment_id_t allocate_segment(
     segment_seq_t seq,
     segment_type_t type
@@ -184,7 +192,10 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
     replay(
       [&advance,
        &delta_checker]
-      (const auto &offsets, const auto &di, auto t) mutable {
+      (const auto &offsets,
+       const auto &di,
+       const journal_seq_t,
+       auto t) mutable {
        if (!delta_checker) {
          EXPECT_FALSE("No Deltas Left");
        }