]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/os/seastore: store valid journal tail during mkfs
authorYingxin Cheng <yingxin.cheng@intel.com>
Thu, 21 Jul 2022 08:50:15 +0000 (16:50 +0800)
committerYingxin Cheng <yingxin.cheng@intel.com>
Fri, 22 Jul 2022 02:46:02 +0000 (10:46 +0800)
Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
12 files changed:
src/crimson/os/seastore/extent_placement_manager.h
src/crimson/os/seastore/journal.h
src/crimson/os/seastore/journal/circular_bounded_journal.cc
src/crimson/os/seastore/journal/circular_bounded_journal.h
src/crimson/os/seastore/journal/segment_allocator.cc
src/crimson/os/seastore/journal/segment_allocator.h
src/crimson/os/seastore/journal/segmented_journal.cc
src/crimson/os/seastore/journal/segmented_journal.h
src/crimson/os/seastore/transaction_manager.cc
src/test/crimson/seastore/test_btree_lba_manager.cc
src/test/crimson/seastore/test_cbjournal.cc
src/test/crimson/seastore/test_seastore_journal.cc

index d03c1eb5e4219d2c82d8b3aa2884a4a3210ee765..733632b7ee8ad5baa8ce10940d80323aa3fd9416 100644 (file)
@@ -54,7 +54,7 @@ public:
                      SegmentSeqAllocator &ssa);
 
   open_ertr::future<> open() final {
-    return record_submitter.open().discard_result();
+    return record_submitter.open(false).discard_result();
   }
 
   alloc_write_iertr::future<> alloc_write_ool_extents(
index f43c6236e9b52876a129df54e551b4c2765c83ec..f50d229f32ddee7cb60194b5bd877f2ae247810d 100644 (file)
@@ -25,16 +25,24 @@ enum class journal_type_t {
 
 class Journal {
 public:
+  /**
+   * initializes journal for mkfs writes -- must run prior to calls
+   * to submit_record.
+   */
+  using open_for_mkfs_ertr = crimson::errorator<
+    crimson::ct_error::input_output_error
+    >;
+  using open_for_mkfs_ret = open_for_mkfs_ertr::future<journal_seq_t>;
+  virtual open_for_mkfs_ret open_for_mkfs() = 0;
+
   /**
    * initializes journal for new writes -- must run prior to calls
    * to submit_record.  Should be called after replay if not a new
    * Journal.
    */
-  using open_for_write_ertr = crimson::errorator<
-    crimson::ct_error::input_output_error
-    >;
-  using open_for_write_ret = open_for_write_ertr::future<journal_seq_t>;
-  virtual open_for_write_ret open_for_write() = 0;
+  using open_for_mount_ertr = open_for_mkfs_ertr;
+  using open_for_mount_ret = open_for_mkfs_ret;
+  virtual open_for_mount_ret open_for_mount() = 0;
 
   /// close journal
   using close_ertr = crimson::errorator<
index de6000bec05f855cdbb417e7ff5021b3a21b616c..766dbe7cba6d5db9478f4f54b63dba462f3bee0d 100644 (file)
@@ -63,13 +63,13 @@ CircularBoundedJournal::mkfs(const mkfs_config_t& config)
   });
 }
 
-CircularBoundedJournal::open_for_write_ertr::future<>
+CircularBoundedJournal::open_for_mount_ertr::future<>
 CircularBoundedJournal::_open_device(const std::string &path)
 {
   ceph_assert(device);
   return device->open(path, seastar::open_flags::rw
   ).handle_error(
-    open_for_write_ertr::pass_further{},
+    open_for_mount_ertr::pass_further{},
     crimson::ct_error::assert_all{
       "Invalid error device->open"
     }
@@ -93,7 +93,14 @@ ceph::bufferlist CircularBoundedJournal::encode_header()
   return bl;
 }
 
-CircularBoundedJournal::open_for_write_ret CircularBoundedJournal::open_for_write()
+CircularBoundedJournal::open_for_mkfs_ret
+CircularBoundedJournal::open_for_mkfs()
+{
+  return open_for_mount();
+}
+
+CircularBoundedJournal::open_for_mount_ret
+CircularBoundedJournal::open_for_mount()
 {
   ceph_assert(initialized);
   paddr_t paddr = convert_abs_addr_to_paddr(
@@ -102,8 +109,8 @@ CircularBoundedJournal::open_for_write_ret CircularBoundedJournal::open_for_writ
   if (circulation_seq == NULL_SEG_SEQ) {
     circulation_seq = 0;
   }
-  return open_for_write_ret(
-    open_for_write_ertr::ready_future_marker{},
+  return open_for_mount_ret(
+    open_for_mount_ertr::ready_future_marker{},
     journal_seq_t{
       circulation_seq,
       paddr
@@ -117,14 +124,14 @@ CircularBoundedJournal::close_ertr::future<> CircularBoundedJournal::close()
     initialized = false;
     return device->close();
   }).handle_error(
-    open_for_write_ertr::pass_further{},
+    open_for_mount_ertr::pass_further{},
     crimson::ct_error::assert_all{
       "Invalid error write_header"
     }
   );
 }
 
-CircularBoundedJournal::open_for_write_ret
+CircularBoundedJournal::open_for_mount_ret
 CircularBoundedJournal::open_device_read_header()
 {
   LOG_PREFIX(CircularBoundedJournal::open_device_read_header);
@@ -133,7 +140,7 @@ CircularBoundedJournal::open_device_read_header()
   ).safe_then([this, FNAME]() {
     return read_header(
     ).handle_error(
-      open_for_write_ertr::pass_further{},
+      open_for_mount_ertr::pass_further{},
       crimson::ct_error::assert_all{
        "Invalid error read_header"
     }).safe_then([this, FNAME](auto p) mutable {
@@ -144,15 +151,15 @@ CircularBoundedJournal::open_device_read_header()
        get_written_to(),
        header.device_id);
       initialized = true;
-      return open_for_write_ret(
-       open_for_write_ertr::ready_future_marker{},
+      return open_for_mount_ret(
+       open_for_mount_ertr::ready_future_marker{},
        journal_seq_t{
          circulation_seq,
          paddr
        });
     });
   }).handle_error(
-    open_for_write_ertr::pass_further{},
+    open_for_mount_ertr::pass_further{},
     crimson::ct_error::assert_all{
       "Invalid error _open_device"
   });
index 539d9ad3a0d89187c723ac33af791557fa08a067..098cb99c58e313ecb7bc64a28978c0d13bfd4979 100644 (file)
@@ -80,8 +80,11 @@ public:
   CircularBoundedJournal(NVMeBlockDevice* device, const std::string &path);
   ~CircularBoundedJournal() {}
 
-  open_for_write_ret open_for_write() final;
-  open_for_write_ret open_device_read_header();
+  open_for_mkfs_ret open_for_mkfs() final;
+
+  open_for_mount_ret open_for_mount() final;
+
+  open_for_mount_ret open_device_read_header();
   close_ertr::future<> close() final;
 
   journal_type_t get_type() final {
@@ -102,7 +105,7 @@ public:
 
   replay_ret replay(delta_handler_t &&delta_handler) final;
 
-  open_for_write_ertr::future<> _open_device(const std::string &path);
+  open_for_mount_ertr::future<> _open_device(const std::string &path);
 
   struct cbj_header_t;
   using write_ertr = submit_record_ertr;
index baf1ee89f104874bf0f3ef62caf2fcac5199463f..6ad2ab1a53eef80e830d219d5fb90b7ecb48c82a 100644 (file)
@@ -31,7 +31,7 @@ SegmentAllocator::SegmentAllocator(
 }
 
 SegmentAllocator::open_ret
-SegmentAllocator::do_open()
+SegmentAllocator::do_open(bool is_mkfs)
 {
   LOG_PREFIX(SegmentAllocator::do_open);
   ceph_assert(!current_segment);
@@ -51,18 +51,32 @@ SegmentAllocator::do_open()
     crimson::ct_error::assert_all{
       "Invalid error in SegmentAllocator::do_open open"
     }
-  ).safe_then([this, FNAME, new_segment_seq](auto sref) {
+  ).safe_then([this, is_mkfs, FNAME, new_segment_seq](auto sref) {
     // initialize new segment
+    segment_id_t segment_id = sref->get_segment_id();
     journal_seq_t new_journal_tail;
     journal_seq_t new_alloc_replay_from;
     if (type == segment_type_t::JOURNAL) {
       new_journal_tail = segment_provider.get_journal_tail_target();
       new_alloc_replay_from = segment_provider.get_alloc_info_replay_from();
+      if (is_mkfs) {
+        ceph_assert(new_journal_tail == JOURNAL_SEQ_NULL);
+        ceph_assert(new_alloc_replay_from == JOURNAL_SEQ_NULL);
+        auto mkfs_seq = journal_seq_t{
+          new_segment_seq,
+          paddr_t::make_seg_paddr(segment_id, 0)
+        };
+        new_journal_tail = mkfs_seq;
+        new_alloc_replay_from = mkfs_seq;
+      } else {
+        ceph_assert(new_journal_tail != JOURNAL_SEQ_NULL);
+        ceph_assert(new_alloc_replay_from != JOURNAL_SEQ_NULL);
+      }
     } else { // OOL
+      ceph_assert(!is_mkfs);
       new_journal_tail = NO_DELTAS;
       new_alloc_replay_from = NO_DELTAS;
     }
-    segment_id_t segment_id = sref->get_segment_id();
     auto header = segment_header_t{
       new_segment_seq,
       segment_id,
@@ -119,7 +133,7 @@ SegmentAllocator::do_open()
 }
 
 SegmentAllocator::open_ret
-SegmentAllocator::open()
+SegmentAllocator::open(bool is_mkfs)
 {
   LOG_PREFIX(SegmentAllocator::open);
   auto& device_ids = sm_group.get_device_ids();
@@ -132,7 +146,7 @@ SegmentAllocator::open()
   print_name = oss.str();
 
   INFO("{}", print_name);
-  return do_open();
+  return do_open(is_mkfs);
 }
 
 SegmentAllocator::roll_ertr::future<>
@@ -140,7 +154,7 @@ SegmentAllocator::roll()
 {
   ceph_assert(can_write());
   return close_segment().safe_then([this] {
-    return do_open().discard_result();
+    return do_open(false).discard_result();
   });
 }
 
@@ -583,9 +597,9 @@ RecordSubmitter::submit(record_t&& record)
 }
 
 RecordSubmitter::open_ret
-RecordSubmitter::open()
+RecordSubmitter::open(bool is_mkfs)
 {
-  return segment_allocator.open(
+  return segment_allocator.open(is_mkfs
   ).safe_then([this](journal_seq_t ret) {
     LOG_PREFIX(RecordSubmitter::open);
     DEBUG("{} register metrics", get_name());
index 3af3be7b66478507fbfb7ed0769efa54f9040cca..a664b42e78f5669e04778c61df832c6d49caa3d0 100644 (file)
@@ -86,7 +86,7 @@ class SegmentAllocator {
   // open for write and generate the correct print name
   using open_ertr = base_ertr;
   using open_ret = open_ertr::future<journal_seq_t>;
-  open_ret open();
+  open_ret open(bool is_mkfs);
 
   // close the current segment and initialize next one
   using roll_ertr = base_ertr;
@@ -104,7 +104,7 @@ class SegmentAllocator {
   close_ertr::future<> close();
 
  private:
-  open_ret do_open();
+  open_ret do_open(bool is_mkfs);
 
   void reset() {
     current_segment.reset();
@@ -359,7 +359,7 @@ public:
   // open for write, generate the correct print name, and register metrics
   using open_ertr = base_ertr;
   using open_ret = open_ertr::future<journal_seq_t>;
-  open_ret open();
+  open_ret open(bool is_mkfs);
 
   using close_ertr = base_ertr;
   close_ertr::future<> close();
index fe8e557f086dda3363420d43f900be3bd8ed1b7f..9fc3ec7bdcbb106c1e6f0947afd47d334efddfdd 100644 (file)
@@ -49,9 +49,16 @@ SegmentedJournal::SegmentedJournal(
 {
 }
 
-SegmentedJournal::open_for_write_ret SegmentedJournal::open_for_write()
+SegmentedJournal::open_for_mkfs_ret
+SegmentedJournal::open_for_mkfs()
 {
-  return record_submitter.open();
+  return record_submitter.open(true);
+}
+
+SegmentedJournal::open_for_mount_ret
+SegmentedJournal::open_for_mount()
+{
+  return record_submitter.open(false);
 }
 
 SegmentedJournal::close_ertr::future<> SegmentedJournal::close()
@@ -95,30 +102,25 @@ SegmentedJournal::prep_replay_segments(
 
   auto journal_tail = segments.rbegin()->second.journal_tail;
   segment_provider.update_journal_tail_committed(journal_tail);
-  auto replay_from = journal_tail.offset;
-  auto from = segments.begin();
-  if (replay_from != P_ADDR_NULL) {
-    from = std::find_if(
-      segments.begin(),
-      segments.end(),
-      [&replay_from](const auto &seg) -> bool {
-       auto& seg_addr = replay_from.as_seg_paddr();
-       return seg.first == seg_addr.get_segment_id();
-      });
-    if (from->second.segment_seq != journal_tail.segment_seq) {
-      ERROR("journal_tail {} does not match {}",
-            journal_tail, from->second);
-      ceph_abort();
-    }
-  } else {
-    replay_from = paddr_t::make_seg_paddr(
-      from->first,
-      journal_segment_allocator.get_block_size());
+  auto journal_tail_paddr = journal_tail.offset;
+  ceph_assert(journal_tail != JOURNAL_SEQ_NULL);
+  ceph_assert(journal_tail_paddr != P_ADDR_NULL);
+  auto from = std::find_if(
+    segments.begin(),
+    segments.end(),
+    [&journal_tail_paddr](const auto &seg) -> bool {
+      auto& seg_addr = journal_tail_paddr.as_seg_paddr();
+      return seg.first == seg_addr.get_segment_id();
+    });
+  if (from->second.segment_seq != journal_tail.segment_seq) {
+    ERROR("journal_tail {} does not match {}",
+          journal_tail, from->second);
+    ceph_abort();
   }
 
   auto num_segments = segments.end() - from;
-  INFO("{} segments to replay, from {}",
-       num_segments, replay_from);
+  INFO("{} segments to replay from {}",
+       num_segments, journal_tail);
   auto ret = replay_segments_t(num_segments);
   std::transform(
     from, segments.end(), ret.begin(),
@@ -127,11 +129,11 @@ SegmentedJournal::prep_replay_segments(
        p.second.segment_seq,
        paddr_t::make_seg_paddr(
          p.first,
-         journal_segment_allocator.get_block_size())
+         sm_group.get_block_size())
       };
       return std::make_pair(ret, p.second);
     });
-  ret[0].first.offset = replay_from;
+  ret[0].first.offset = journal_tail_paddr;
   return prep_replay_segments_fut(
     prep_replay_segments_ertr::ready_future_marker{},
     std::move(ret));
index a97db1b74ce680b9329facd59403f86dc84642cd..8c6485a1302f755fa51ad94dcf3ebcf0813668f4 100644 (file)
@@ -27,7 +27,9 @@ public:
   SegmentedJournal(SegmentProvider &segment_provider);
   ~SegmentedJournal() {}
 
-  open_for_write_ret open_for_write() final;
+  open_for_mkfs_ret open_for_mkfs() final;
+
+  open_for_mount_ret open_for_mount() final;
 
   close_ertr::future<> close() final;
 
index 0fb625d01e52f2abc6d8cfaca7a96319e0e29242..f9a082634e1fb1e87f1e20912e25adbc04c69818 100644 (file)
@@ -47,7 +47,7 @@ TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs()
   INFO("enter");
   return async_cleaner->mount(
   ).safe_then([this] {
-    return journal->open_for_write();
+    return journal->open_for_mkfs();
   }).safe_then([this](auto) {
     async_cleaner->init_mkfs();
     return epm->open();
@@ -107,7 +107,7 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount()
          modify_time);
       });
   }).safe_then([this] {
-    return journal->open_for_write();
+    return journal->open_for_mount();
   }).safe_then([this, FNAME](auto) {
     return seastar::do_with(
       create_weak_transaction(
index 9fbf1e1886c8d4040e668e78c0a4d782ef251fe1..c782531680a0ad0ccce4b1bbe1ee1b27fef9bb52 100644 (file)
@@ -42,6 +42,8 @@ struct btree_test_base :
   std::map<segment_id_t, segment_seq_t> segment_seqs;
   std::map<segment_id_t, segment_type_t> segment_types;
 
+  journal_seq_t dummy_tail;
+
   mutable segment_info_t tmp_info;
 
   btree_test_base() = default;
@@ -51,7 +53,7 @@ struct btree_test_base :
    */
   void set_journal_head(journal_seq_t) final {}
 
-  journal_seq_t get_journal_tail_target() const final { return journal_seq_t{}; }
+  journal_seq_t get_journal_tail_target() const final { return dummy_tail; }
 
   const segment_info_t& get_seg_info(segment_id_t id) const final {
     tmp_info = {};
@@ -86,11 +88,11 @@ struct btree_test_base :
   SegmentManagerGroup* get_segment_manager_group() final { return sms.get(); }
 
   journal_seq_t get_dirty_extents_replay_from() const final {
-    return JOURNAL_SEQ_NULL;
+    return dummy_tail;
   }
 
   journal_seq_t get_alloc_info_replay_from() const final {
-    return JOURNAL_SEQ_NULL;
+    return dummy_tail;
   }
 
   virtual void complete_commit(Transaction &t) {}
@@ -126,8 +128,10 @@ struct btree_test_base :
       epm->add_device(segment_manager.get(), true);
       journal->set_write_pipeline(&pipeline);
 
-      return journal->open_for_write().discard_result();
+      return journal->open_for_mkfs().discard_result();
     }).safe_then([this] {
+      dummy_tail = journal_seq_t{0,
+        paddr_t::make_seg_paddr(segment_id_t(segment_manager->get_device_id(), 0), 0)};
       return epm->open();
     }).safe_then([this] {
       return seastar::do_with(
index 1161f7dfc32fe401ae402b48cbc961c4a3eb9d23..0d1709c62eece5f5bff4a3b4ecec013bfab81748 100644 (file)
@@ -234,7 +234,7 @@ struct cbjournal_test_t : public seastar_test_suite_t
   }
   void open() {
     cbj->open_device_read_header().unsafe_get0();
-    cbj->open_for_write().unsafe_get0();
+    cbj->open_for_mkfs().unsafe_get0();
   }
   auto get_available_size() {
     return cbj->get_available_size();
index a2ddd9203a405e3136af4fa7ea55e992da9853c3..63a61f8a6f96c219816ca66747fe76e743af4849 100644 (file)
@@ -83,6 +83,8 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
   std::map<segment_id_t, segment_seq_t> segment_seqs;
   std::map<segment_id_t, segment_type_t> segment_types;
 
+  journal_seq_t dummy_tail;
+
   mutable segment_info_t tmp_info;
 
   journal_test_t() = default;
@@ -92,7 +94,7 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
    */
   void set_journal_head(journal_seq_t) final {}
 
-  journal_seq_t get_journal_tail_target() const final { return journal_seq_t{}; }
+  journal_seq_t get_journal_tail_target() const final { return dummy_tail; }
 
   const segment_info_t& get_seg_info(segment_id_t id) const final {
     tmp_info = {};
@@ -102,11 +104,11 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
   }
 
   journal_seq_t get_dirty_extents_replay_from() const final {
-    return JOURNAL_SEQ_NULL;
+    return dummy_tail;
   }
 
   journal_seq_t get_alloc_info_replay_from() const final {
-    return JOURNAL_SEQ_NULL;
+    return dummy_tail;
   }
 
   segment_id_t allocate_segment(
@@ -147,12 +149,13 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
       journal = journal::make_segmented(*this);
       journal->set_write_pipeline(&pipeline);
       sms->add_segment_manager(segment_manager.get());
-      return journal->open_for_write();
-    }).safe_then(
-      [](auto){},
-      crimson::ct_error::all_same_way([] {
-       ASSERT_FALSE("Unable to mount");
-      }));
+      return journal->open_for_mkfs();
+    }).safe_then([this](auto) {
+      dummy_tail = journal_seq_t{0,
+        paddr_t::make_seg_paddr(segment_id_t(segment_manager->get_device_id(), 0), 0)};
+    }, crimson::ct_error::all_same_way([] {
+      ASSERT_FALSE("Unable to mount");
+    }));
   }
 
   seastar::future<> tear_down_fut() final {
@@ -176,7 +179,7 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
       journal->set_write_pipeline(&pipeline);
       return journal->replay(std::forward<T>(std::move(f)));
     }).safe_then([this] {
-      return journal->open_for_write();
+      return journal->open_for_mount();
     });
   }