]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore: coordinate segment seq of journal and ool segments 45526/head
authorXuehan Xu <xxhdx1985126@gmail.com>
Sun, 20 Mar 2022 12:36:05 +0000 (20:36 +0800)
committerXuehan Xu <xxhdx1985126@gmail.com>
Mon, 28 Mar 2022 09:48:33 +0000 (17:48 +0800)
the segment seq in ool segments' headers also need to be set to the
current journal segment seq, because we rely on this to judge whether a
delta needs to be replayed

Signed-off-by: Xuehan Xu <xxhdx1985126@gmail.com>
19 files changed:
src/crimson/os/seastore/cache.cc
src/crimson/os/seastore/cache.h
src/crimson/os/seastore/extent_placement_manager.cc
src/crimson/os/seastore/extent_placement_manager.h
src/crimson/os/seastore/journal.h
src/crimson/os/seastore/journal/segment_allocator.cc
src/crimson/os/seastore/journal/segment_allocator.h
src/crimson/os/seastore/journal/segmented_journal.cc
src/crimson/os/seastore/journal/segmented_journal.h
src/crimson/os/seastore/seastore_types.cc
src/crimson/os/seastore/seastore_types.h
src/crimson/os/seastore/segment_cleaner.cc
src/crimson/os/seastore/segment_cleaner.h
src/crimson/os/seastore/segment_seq_allocator.h [new file with mode: 0644]
src/crimson/os/seastore/transaction_manager.cc
src/crimson/os/seastore/transaction_manager.h
src/test/crimson/seastore/test_btree_lba_manager.cc
src/test/crimson/seastore/test_seastore_cache.cc
src/test/crimson/seastore/test_seastore_journal.cc

index d0e6bfbd8dab58152a45e62193549aa542290892..8c8a0f8b5c01bd382c8e47bff9751c35e2f3bb60 100644 (file)
@@ -910,7 +910,9 @@ CachedExtentRef Cache::duplicate_for_write(
   return ret;
 }
 
-record_t Cache::prepare_record(Transaction &t)
+record_t Cache::prepare_record(
+  Transaction &t,
+  SegmentProvider *cleaner)
 {
   LOG_PREFIX(Cache::prepare_record);
   SUBTRACET(seastore_t, "enter", t);
@@ -982,6 +984,7 @@ record_t Cache::prepare_record(Transaction &t)
          0,
          0,
          t.root->get_version() - 1,
+         MAX_SEG_SEQ,
          std::move(delta_bl)
        });
     } else {
@@ -996,6 +999,9 @@ record_t Cache::prepare_record(Transaction &t)
          final_crc,
          (seastore_off_t)i->get_length(),
          i->get_version() - 1,
+         cleaner
+         ? cleaner->get_seq(i->get_paddr().as_seg_paddr().get_segment_id())
+         : MAX_SEG_SEQ,
          std::move(delta_bl)
        });
       i->last_committed_crc = final_crc;
@@ -1340,7 +1346,7 @@ Cache::replay_delta(
        return;
       }
 
-      TRACE("replay extent delta at {} {} ... -- {}, prv_extent={}",
+      DEBUG("replay extent delta at {} {} ... -- {}, prv_extent={}",
             journal_seq, record_base, delta, *extent);
 
       assert(extent->version == delta.pversion);
@@ -1383,6 +1389,10 @@ Cache::get_next_dirty_extents_ret Cache::get_next_dirty_extents(
        i != dirty.end() && bytes_so_far < max_bytes;
        ++i) {
     auto dirty_from = i->get_dirty_from();
+    if (!(dirty_from != JOURNAL_SEQ_NULL &&
+                dirty_from != JOURNAL_SEQ_MAX &&
+                dirty_from != NO_DELTAS))
+      ERRORT("{}", *i);
     ceph_assert(dirty_from != JOURNAL_SEQ_NULL &&
                 dirty_from != JOURNAL_SEQ_MAX &&
                 dirty_from != NO_DELTAS);
index a335a6b22c3cbbe884ca3a6e27ef72c6609f2f9a..b2f5b213aceabc81fa4c917d44cbcc91dc30bca7 100644 (file)
@@ -544,7 +544,8 @@ public:
    * Construct the record for Journal from transaction.
    */
   record_t prepare_record(
-    Transaction &t ///< [in, out] current transaction
+    Transaction &t, ///< [in, out] current transaction
+    SegmentProvider *cleaner
   );
 
   /**
index ed1c21d5657eab1744e69ae84f1ec217809e709a..ddba2186228b4afe120cd7309b1935fb13a20960 100644 (file)
@@ -11,17 +11,18 @@ namespace crimson::os::seastore {
 
 SegmentedAllocator::SegmentedAllocator(
   SegmentProvider& sp,
-  SegmentManager& sm)
-  : cold_writer{"COLD", sp, sm},
-    rewrite_writer{"REWRITE", sp, sm}
-{
-}
+  SegmentManager& sm,
+  SegmentSeqAllocator &ssa)
+  : cold_writer{"COLD", sp, sm, ssa},
+    rewrite_writer{"REWRITE", sp, sm, ssa}
+{}
 
 SegmentedAllocator::Writer::Writer(
   std::string name,
   SegmentProvider& sp,
-  SegmentManager& sm)
-  : segment_allocator(name, segment_type_t::OOL, sp, sm),
+  SegmentManager& sm,
+  SegmentSeqAllocator &ssa)
+  : segment_allocator(name, segment_type_t::OOL, sp, sm, ssa),
     record_submitter(crimson::common::get_conf<uint64_t>(
                        "seastore_journal_iodepth_limit"),
                      crimson::common::get_conf<uint64_t>(
@@ -55,7 +56,6 @@ SegmentedAllocator::Writer::write_record(
   return record_submitter.submit(std::move(record)
   ).safe_then([this, FNAME, &t, extents=std::move(extents)
               ](record_locator_t ret) mutable {
-    assert(ret.write_result.start_seq.segment_seq == OOL_SEG_SEQ);
     DEBUGT("{} finish with {} and {} extents",
            t, segment_allocator.get_name(),
            ret, extents.size());
index 34368aca7d85030e2d386480456af5c97de9f169..fcf1375e3bd848953df8eed57b07d920538a91f8 100644 (file)
@@ -75,7 +75,10 @@ class SegmentProvider;
 class SegmentedAllocator : public ExtentAllocator {
   class Writer : public ExtentOolWriter {
   public:
-    Writer(std::string name, SegmentProvider& sp, SegmentManager& sm);
+    Writer(std::string name,
+           SegmentProvider& sp,
+           SegmentManager& sm,
+           SegmentSeqAllocator &ssa);
     Writer(Writer &&) = default;
 
     open_ertr::future<> open() final {
@@ -111,7 +114,8 @@ class SegmentedAllocator : public ExtentAllocator {
 public:
   SegmentedAllocator(
     SegmentProvider& sp,
-    SegmentManager& sm);
+    SegmentManager& sm,
+    SegmentSeqAllocator &ssa);
 
   Writer &get_writer(placement_hint_t hint) {
     assert(hint >= placement_hint_t::COLD);
index 8cbbb1f07042db71ff6e3fa2e4ecf0fd0caadb21..7c0a1a8cabd667f35e5df7f2fb0222dbc3216333 100644 (file)
@@ -7,6 +7,7 @@
 
 #include "crimson/os/seastore/ordering_handle.h"
 #include "crimson/os/seastore/seastore_types.h"
+#include "crimson/os/seastore/segment_seq_allocator.h"
 
 namespace crimson::os::seastore {
 
@@ -84,6 +85,8 @@ public:
   virtual replay_ret replay(
     delta_handler_t &&delta_handler) = 0;
 
+  virtual SegmentSeqAllocator& get_segment_seq_allocator() = 0;
+
   virtual ~Journal() {}
 };
 using JournalRef = std::unique_ptr<Journal>;
index 9f3a16efbc038faf5f870af77565c630dc7ae0cf..0533121aa9af6ffa0d2667c5b97bc9097712e113 100644 (file)
@@ -16,11 +16,13 @@ SegmentAllocator::SegmentAllocator(
   std::string name,
   segment_type_t type,
   SegmentProvider &sp,
-  SegmentManager &sm)
+  SegmentManager &sm,
+  SegmentSeqAllocator &ssa)
   : name{name},
     type{type},
     segment_provider{sp},
-    segment_manager{sm}
+    segment_manager{sm},
+    segment_seq_allocator(ssa)
 {
   ceph_assert(type != segment_type_t::NULL_SEG);
   std::ostringstream oss;
@@ -29,23 +31,20 @@ SegmentAllocator::SegmentAllocator(
   reset();
 }
 
-void SegmentAllocator::set_next_segment_seq(segment_seq_t seq)
-{
-  LOG_PREFIX(SegmentAllocator::set_next_segment_seq);
-  INFO("{} next_segment_seq={}",
-       print_name, segment_seq_printer_t{seq});
-  assert(type == segment_seq_to_type(seq));
-  next_segment_seq = seq;
-}
-
 SegmentAllocator::open_ret
 SegmentAllocator::do_open()
 {
   LOG_PREFIX(SegmentAllocator::do_open);
   ceph_assert(!current_segment);
-  segment_seq_t new_segment_seq = get_new_segment_seq_and_increment();
+  segment_seq_t new_segment_seq =
+    segment_seq_allocator.get_and_inc_next_segment_seq();
+  auto meta = segment_manager.get_meta();
+  current_segment_nonce = ceph_crc32c(
+    new_segment_seq,
+    reinterpret_cast<const unsigned char *>(meta.seastore_id.bytes()),
+    sizeof(meta.seastore_id.uuid));
   auto new_segment_id = segment_provider.get_segment(
-      get_device_id(), new_segment_seq);
+      get_device_id(), new_segment_seq, type);
   return segment_manager.open(new_segment_id
   ).handle_error(
     open_ertr::pass_further{},
@@ -65,7 +64,8 @@ SegmentAllocator::do_open()
       new_segment_seq,
       segment_id,
       new_journal_tail,
-      current_segment_nonce};
+      current_segment_nonce,
+      type};
     INFO("{} writing header to new segment ... -- {}",
          print_name, header);
 
@@ -108,7 +108,8 @@ SegmentAllocator::do_open()
       }
       DEBUG("{} rolled new segment id={}",
             print_name, current_segment->get_segment_id());
-      ceph_assert(new_journal_seq.segment_seq == get_current_segment_seq());
+      ceph_assert(new_journal_seq.segment_seq ==
+        segment_provider.get_seq(current_segment->get_segment_id()));
       return new_journal_seq;
     });
   });
@@ -142,7 +143,7 @@ SegmentAllocator::write(ceph::bufferlist to_write)
   auto write_length = to_write.length();
   auto write_start_offset = written_to;
   auto write_start_seq = journal_seq_t{
-    get_current_segment_seq(),
+    segment_provider.get_seq(current_segment->get_segment_id()),
     paddr_t::make_seg_paddr(
       current_segment->get_segment_id(), write_start_offset)
   };
@@ -200,15 +201,11 @@ SegmentAllocator::close_segment(bool is_rolling)
   // Note: make sure no one can access the current segment once closing
   auto seg_to_close = std::move(current_segment);
   auto close_segment_id = seg_to_close->get_segment_id();
-  INFO("{} close segment id={}, seq={}, written_to={}, nonce={}",
-       print_name,
-       close_segment_id,
-       segment_seq_printer_t{get_current_segment_seq()},
-       written_to,
-       current_segment_nonce);
   if (is_rolling) {
     segment_provider.close_segment(close_segment_id);
   }
+  segment_seq_t cur_segment_seq =
+    segment_provider.get_seq(seg_to_close->get_segment_id());
   journal_seq_t cur_journal_tail;
   if (type == segment_type_t::JOURNAL) {
     cur_journal_tail = segment_provider.get_journal_tail_target();
@@ -216,16 +213,24 @@ SegmentAllocator::close_segment(bool is_rolling)
     cur_journal_tail = NO_DELTAS;
   }
   auto tail = segment_tail_t{
-    get_current_segment_seq(),
+    segment_provider.get_seq(close_segment_id),
     close_segment_id,
     cur_journal_tail,
     current_segment_nonce,
+    type,
     segment_provider.get_last_modified(
       close_segment_id).time_since_epoch().count(),
     segment_provider.get_last_rewritten(
       close_segment_id).time_since_epoch().count()};
   ceph::bufferlist bl;
   encode(tail, bl);
+  INFO("{} close segment id={}, seq={}, written_to={}, nonce={}, journal_tail={}",
+       print_name,
+       close_segment_id,
+       cur_segment_seq,
+       written_to,
+       current_segment_nonce,
+       tail.journal_tail);
 
   bufferptr bp(
     ceph::buffer::create_page_aligned(
index 0b862c3cee1fea5b0e12b154d4b6aac92ccc79c7..0a1b9812fd626696a6d81430025805b36d701134 100644 (file)
@@ -12,6 +12,7 @@
 
 #include "crimson/common/errorator.h"
 #include "crimson/os/seastore/segment_manager.h"
+#include "crimson/os/seastore/segment_seq_allocator.h"
 
 namespace crimson::os::seastore {
   class SegmentProvider;
@@ -32,7 +33,8 @@ class SegmentAllocator {
   SegmentAllocator(std::string name,
                    segment_type_t type,
                    SegmentProvider &sp,
-                   SegmentManager &sm);
+                   SegmentManager &sm,
+                   SegmentSeqAllocator &ssa);
 
   const std::string& get_name() const {
     return print_name;
@@ -75,8 +77,6 @@ class SegmentAllocator {
     return written_to;
   }
 
-  void set_next_segment_seq(segment_seq_t);
-
   // returns true iff the current segment has insufficient space
   bool needs_roll(std::size_t length) const {
     assert(can_write());
@@ -112,8 +112,6 @@ class SegmentAllocator {
     current_segment.reset();
     written_to = 0;
 
-    // segment type related special handling
-    reset_segment_seq();
     current_segment_nonce = 0;
   }
 
@@ -121,48 +119,6 @@ class SegmentAllocator {
   using close_segment_ertr = base_ertr;
   close_segment_ertr::future<> close_segment(bool is_rolling);
 
-  /*
-   * segment type related special handling
-   */
-
-  void reset_segment_seq() {
-    if (type == segment_type_t::JOURNAL) {
-      next_segment_seq = 0;
-    } else { // OOL
-      next_segment_seq = OOL_SEG_SEQ;
-    }
-  }
-
-  segment_seq_t get_current_segment_seq() const {
-    segment_seq_t ret;
-    if (type == segment_type_t::JOURNAL) {
-      assert(next_segment_seq != 0);
-      ret = next_segment_seq - 1;
-    } else { // OOL
-      ret = next_segment_seq;
-    }
-    assert(segment_seq_to_type(ret) == type);
-    return ret;
-  }
-
-  segment_seq_t get_new_segment_seq_and_increment() {
-    segment_seq_t new_segment_seq;
-    if (type == segment_type_t::JOURNAL) {
-      new_segment_seq = next_segment_seq++;
-      auto meta = segment_manager.get_meta();
-      current_segment_nonce = ceph_crc32c(
-        new_segment_seq,
-        reinterpret_cast<const unsigned char *>(meta.seastore_id.bytes()),
-        sizeof(meta.seastore_id.uuid));
-    } else { // OOL
-      new_segment_seq = next_segment_seq;
-      assert(current_segment_nonce == 0);
-    }
-    assert(new_segment_seq == get_current_segment_seq());
-    ceph_assert(segment_seq_to_type(new_segment_seq) == type);
-    return new_segment_seq;
-  }
-
   const std::string name;
   // device id is not available during construction,
   // so generate the print_name later.
@@ -172,9 +128,7 @@ class SegmentAllocator {
   SegmentManager &segment_manager;
   SegmentRef current_segment;
   seastore_off_t written_to;
-
-  // segment type related special handling
-  segment_seq_t next_segment_seq;
+  SegmentSeqAllocator &segment_seq_allocator;
   segment_nonce_t current_segment_nonce;
   //3. journal tail written to both segment_header_t and segment_tail_t
 };
index fd570531ffcdc3cee00358decd5e7621ba633ab5..8d8c25b6884cc1228572791e555858af5d8eded1 100644 (file)
@@ -31,10 +31,12 @@ SegmentedJournal::SegmentedJournal(
   ExtentReader &scanner,
   SegmentProvider &segment_provider)
   : segment_provider(segment_provider),
+    segment_seq_allocator(new SegmentSeqAllocator),
     journal_segment_allocator("JOURNAL",
                               segment_type_t::JOURNAL,
                               segment_provider,
-                              segment_manager),
+                              segment_manager,
+                             *segment_seq_allocator),
     record_submitter(crimson::common::get_conf<uint64_t>(
                        "seastore_journal_iodepth_limit"),
                      crimson::common::get_conf<uint64_t>(
@@ -78,7 +80,7 @@ SegmentedJournal::prep_replay_segments(
        rt.second.journal_segment_seq;
     });
 
-  journal_segment_allocator.set_next_segment_seq(
+  segment_seq_allocator->set_next_segment_seq(
     segments.rbegin()->second.journal_segment_seq + 1);
   std::for_each(
     segments.begin(),
@@ -147,12 +149,14 @@ SegmentedJournal::replay_segment(
   INFO("starting at {} -- {}", seq, header);
   return seastar::do_with(
     scan_valid_records_cursor(seq),
-    ExtentReader::found_record_handler_t([=, &handler](
+    ExtentReader::found_record_handler_t(
+      [s_type=header.type, &handler, this](
       record_locator_t locator,
       const record_group_header_t& header,
       const bufferlist& mdbuf)
       -> ExtentReader::scan_valid_records_ertr::future<>
     {
+      LOG_PREFIX(Journal::replay_segment);
       auto maybe_record_deltas_list = try_decode_deltas(
           header, mdbuf, locator.record_block_base);
       if (!maybe_record_deltas_list) {
@@ -165,6 +169,7 @@ SegmentedJournal::replay_segment(
       return seastar::do_with(
         std::move(*maybe_record_deltas_list),
         [write_result=locator.write_result,
+        s_type,
          this,
          FNAME,
          &handler](auto& record_deltas_list)
@@ -172,6 +177,7 @@ SegmentedJournal::replay_segment(
         return crimson::do_for_each(
           record_deltas_list,
           [write_result,
+          s_type,
            this,
            FNAME,
            &handler](record_deltas_t& record_deltas)
@@ -186,6 +192,7 @@ SegmentedJournal::replay_segment(
           return crimson::do_for_each(
             record_deltas.deltas,
             [locator,
+            s_type,
              this,
              FNAME,
              &handler](auto &p)
@@ -202,16 +209,13 @@ SegmentedJournal::replay_segment(
             if (delta.paddr != P_ADDR_NULL) {
               auto& seg_addr = delta.paddr.as_seg_paddr();
               auto delta_paddr_segment_seq = segment_provider.get_seq(seg_addr.get_segment_id());
-              auto delta_paddr_segment_type = segment_seq_to_type(delta_paddr_segment_seq);
-              auto locator_segment_seq = locator.write_result.start_seq.segment_seq;
-              if (delta_paddr_segment_type == segment_type_t::NULL_SEG ||
-                  (delta_paddr_segment_type == segment_type_t::JOURNAL &&
-                   delta_paddr_segment_seq > locator_segment_seq)) {
+              if (s_type == segment_type_t::NULL_SEG ||
+                  (delta_paddr_segment_seq != delta.ext_seq)) {
                 SUBDEBUG(seastore_cache,
-                         "delta is obsolete, delta_paddr_segment_seq={}, locator_segment_seq={} -- {}",
+                         "delta is obsolete, delta_paddr_segment_seq={}, -- {}",
                          segment_seq_printer_t{delta_paddr_segment_seq},
-                         segment_seq_printer_t{locator_segment_seq},
                          delta);
+               assert(delta_paddr_segment_seq > delta.ext_seq);
                 return replay_ertr::now();
               }
             }
index e699f330bfc892691fa3adf2f785dac2abb424ab..665a33ec93806f6f6f9973df239984c982272c98 100644 (file)
@@ -16,9 +16,9 @@
 #include "crimson/os/seastore/seastore_types.h"
 #include "crimson/osd/exceptions.h"
 #include "segment_allocator.h"
+#include "crimson/os/seastore/segment_seq_allocator.h"
 
 namespace crimson::os::seastore::journal {
-
 /**
  * Manages stream of atomically written records to a SegmentManager.
  */
@@ -46,6 +46,9 @@ public:
     write_pipeline = _write_pipeline;
   }
 
+  SegmentSeqAllocator& get_segment_seq_allocator() final {
+    return *segment_seq_allocator;
+  }
 private:
   submit_record_ret do_submit_record(
     record_t &&record,
@@ -53,6 +56,7 @@ private:
   );
 
   SegmentProvider& segment_provider;
+  SegmentSeqAllocatorRef segment_seq_allocator;
   SegmentAllocator journal_segment_allocator;
   RecordSubmitter record_submitter;
   ExtentReader& scanner;
index 0b02091b598e9b816fdb27facd05365f4b4409ac..8a3748cc8f25ff1e7f4dabd3fbdc85db63c032f2 100644 (file)
@@ -80,26 +80,13 @@ std::ostream& operator<<(std::ostream& out, segment_type_t t)
   }
 }
 
-segment_type_t segment_seq_to_type(segment_seq_t seq)
-{
-  if (seq <= MAX_VALID_SEG_SEQ) {
-    return segment_type_t::JOURNAL;
-  } else if (seq == OOL_SEG_SEQ) {
-    return segment_type_t::OOL;
-  } else {
-    assert(seq == NULL_SEG_SEQ);
-    return segment_type_t::NULL_SEG;
-  }
-}
-
 std::ostream& operator<<(std::ostream& out, segment_seq_printer_t seq)
 {
-  auto type = segment_seq_to_type(seq.seq);
-  switch(type) {
-  case segment_type_t::JOURNAL:
+  if (seq.seq == NULL_SEG_SEQ) {
+    return out << "NULL_SEG_SEQ";
+  } else {
+    assert(seq.seq <= MAX_VALID_SEG_SEQ);
     return out << seq.seq;
-  default:
-    return out << type;
   }
 }
 
@@ -211,6 +198,7 @@ std::ostream &operator<<(std::ostream &out, const delta_info_t &delta)
             << ", final_crc: " << delta.final_crc
             << ", length: " << delta.length
             << ", pversion: " << delta.pversion
+            << ", ext_seq: " << delta.ext_seq
             << ")";
 }
 
@@ -230,6 +218,7 @@ std::ostream &operator<<(std::ostream &out, const segment_header_t &header)
             << ", segment_id=" << header.physical_segment_id
             << ", journal_tail=" << header.journal_tail
             << ", segment_nonce=" << header.segment_nonce
+            << ", type=" << header.type
             << ")";
 }
 
index 5de23c5ad91b882591d84dad1ca838ad18565435..426ef043691d59f9c6e8b8321773c9fcac205737 100644 (file)
@@ -208,7 +208,6 @@ using segment_seq_t = uint32_t;
 static constexpr segment_seq_t MAX_SEG_SEQ =
   std::numeric_limits<segment_seq_t>::max();
 static constexpr segment_seq_t NULL_SEG_SEQ = MAX_SEG_SEQ;
-static constexpr segment_seq_t OOL_SEG_SEQ = MAX_SEG_SEQ - 1;
 static constexpr segment_seq_t MAX_VALID_SEG_SEQ = MAX_SEG_SEQ - 2;
 
 enum class segment_type_t {
@@ -219,8 +218,6 @@ enum class segment_type_t {
 
 std::ostream& operator<<(std::ostream& out, segment_type_t t);
 
-segment_type_t segment_seq_to_type(segment_seq_t seq);
-
 struct segment_seq_printer_t {
   segment_seq_t seq;
 };
@@ -760,10 +757,6 @@ struct journal_seq_t {
     return {segment_seq, offset.add_offset(o)};
   }
 
-  segment_type_t get_type() const {
-    return segment_seq_to_type(segment_seq);
-  }
-
   DENC(journal_seq_t, v, p) {
     DENC_START(1, 1, p);
     denc(v.segment_seq, p);
@@ -918,6 +911,7 @@ struct delta_info_t {
   uint32_t final_crc = 0;
   seastore_off_t length = NULL_SEG_OFF;         ///< extent length
   extent_version_t pversion;                   ///< prior version
+  segment_seq_t ext_seq;                      ///< seq of the extent's segment
   ceph::bufferlist bl;                         ///< payload
 
   DENC(delta_info_t, v, p) {
@@ -929,6 +923,7 @@ struct delta_info_t {
     denc(v.final_crc, p);
     denc(v.length, p);
     denc(v.pversion, p);
+    denc(v.ext_seq, p);
     denc(v.bl, p);
     DENC_FINISH(p);
   }
@@ -942,6 +937,7 @@ struct delta_info_t {
       final_crc == rhs.final_crc &&
       length == rhs.length &&
       pversion == rhs.pversion &&
+      ext_seq == rhs.ext_seq &&
       bl == rhs.bl
     );
   }
@@ -1300,8 +1296,10 @@ struct segment_header_t {
   journal_seq_t journal_tail;
   segment_nonce_t segment_nonce;
 
+  segment_type_t type;
+
   segment_type_t get_type() const {
-    return segment_seq_to_type(journal_segment_seq);
+    return type;
   }
 
   DENC(segment_header_t, v, p) {
@@ -1310,6 +1308,7 @@ struct segment_header_t {
     denc(v.physical_segment_id, p);
     denc(v.journal_tail, p);
     denc(v.segment_nonce, p);
+    denc(v.type, p);
     DENC_FINISH(p);
   }
 };
@@ -1321,15 +1320,23 @@ struct segment_tail_t {
 
   journal_seq_t journal_tail;
   segment_nonce_t segment_nonce;
+
+  segment_type_t type;
+
   mod_time_point_t last_modified;
   mod_time_point_t last_rewritten;
 
+  segment_type_t get_type() const {
+    return type;
+  }
+
   DENC(segment_tail_t, v, p) {
     DENC_START(1, 1, p);
     denc(v.journal_segment_seq, p);
     denc(v.physical_segment_id, p);
     denc(v.journal_tail, p);
     denc(v.segment_nonce, p);
+    denc(v.type, p);
     denc(v.last_modified, p);
     denc(v.last_rewritten, p);
     DENC_FINISH(p);
@@ -1752,3 +1759,40 @@ struct denc_traits<crimson::os::seastore::device_type_t> {
            reinterpret_cast<char*>(&o));
   }
 };
+
+template<>
+struct denc_traits<crimson::os::seastore::segment_type_t> {
+  static constexpr bool supported = true;
+  static constexpr bool featured = false;
+  static constexpr bool bounded = true;
+  static constexpr bool need_contiguous = false;
+
+  static void bound_encode(
+    const crimson::os::seastore::segment_type_t &o,
+    size_t& p,
+    uint64_t f=0) {
+    p += sizeof(crimson::os::seastore::segment_type_t);
+  }
+  template<class It>
+  static std::enable_if_t<!is_const_iterator_v<It>>
+  encode(
+    const crimson::os::seastore::segment_type_t &o,
+    It& p,
+    uint64_t f=0) {
+    get_pos_add<crimson::os::seastore::segment_type_t>(p) = o;
+  }
+  template<class It>
+  static std::enable_if_t<is_const_iterator_v<It>>
+  decode(
+    crimson::os::seastore::segment_type_t& o,
+    It& p,
+    uint64_t f=0) {
+    o = get_pos_add<crimson::os::seastore::segment_type_t>(p);
+  }
+  static void decode(
+    crimson::os::seastore::segment_type_t& o,
+    ceph::buffer::list::const_iterator &p) {
+    p.copy(sizeof(crimson::os::seastore::segment_type_t),
+           reinterpret_cast<char*>(&o));
+  }
+};
index fb795ceef84a7fb6c45cf2e3f3051a047191557b..df4f76ff5fd4c10d29d3f74e7136ee5c0ea956b7 100644 (file)
@@ -21,7 +21,7 @@ namespace crimson::os::seastore {
 
 void segment_info_set_t::segment_info_t::set_open(segment_seq_t seq) {
   assert(state == Segment::segment_state_t::EMPTY);
-  assert(segment_seq_to_type(seq) != segment_type_t::NULL_SEG);
+  assert(seq != NULL_SEG_SEQ);
   state = Segment::segment_state_t::OPEN;
   journal_segment_seq = seq;
 }
@@ -236,10 +236,12 @@ void SegmentCleaner::register_metrics()
 }
 
 segment_id_t SegmentCleaner::get_segment(
-    device_id_t device_id, segment_seq_t seq)
+    device_id_t device_id,
+    segment_seq_t seq,
+    segment_type_t type)
 {
   LOG_PREFIX(SegmentCleaner::get_segment);
-  assert(segment_seq_to_type(seq) != segment_type_t::NULL_SEG);
+  assert(seq != NULL_SEG_SEQ);
   for (auto it = segments.device_begin(device_id);
        it != segments.device_end(device_id);
        ++it) {
@@ -247,7 +249,7 @@ segment_id_t SegmentCleaner::get_segment(
     auto& segment_info = it->second;
     if (segment_info.is_empty()) {
       DEBUG("returning segment {} {}", seg_id, segment_seq_printer_t{seq});
-      mark_open(seg_id, seq);
+      mark_open(seg_id, seq, type);
       return seg_id;
     }
   }
@@ -294,8 +296,7 @@ void SegmentCleaner::update_journal_tail_committed(journal_seq_t committed)
 
 void SegmentCleaner::close_segment(segment_id_t segment)
 {
-  ceph_assert(segment_seq_to_type(segments[segment].journal_segment_seq) !=
-              segment_type_t::NULL_SEG);
+  ceph_assert(segments[segment].journal_segment_seq != NULL_SEG_SEQ);
   mark_closed(segment);
 }
 
@@ -574,7 +575,8 @@ SegmentCleaner::mount_ret SegmentCleaner::mount(
            }
            init_mark_segment_closed(
              segment_id,
-             header.journal_segment_seq);
+             header.journal_segment_seq,
+             header.type);
            return seastar::now();
          }).handle_error(
            crimson::ct_error::enodata::handle(
@@ -660,7 +662,8 @@ SegmentCleaner::scan_extents_ret SegmentCleaner::scan_nonfull_segment(
     }).safe_then([this, segment_id, header](auto) {
       init_mark_segment_closed(
        segment_id,
-       header.journal_segment_seq);
+       header.journal_segment_seq,
+       header.type);
       return seastar::now();
     });
   } else if (header.get_type() == segment_type_t::JOURNAL) {
@@ -673,7 +676,8 @@ SegmentCleaner::scan_extents_ret SegmentCleaner::scan_nonfull_segment(
   }
   init_mark_segment_closed(
     segment_id,
-    header.journal_segment_seq);
+    header.journal_segment_seq,
+    header.type);
   return seastar::now();
 }
 
index aed0c4e8ff135d762d718de9d21a80051e68851e..5f7a6a5e7c87fc0f8b82bbc578d015a6acbbfb11 100644 (file)
@@ -58,11 +58,13 @@ class segment_info_set_t {
     // Will be non-null for any segments in the current journal
     segment_seq_t journal_segment_seq = NULL_SEG_SEQ;
 
+    segment_type_t type = segment_type_t::NULL_SEG;
+
     seastar::lowres_system_clock::time_point last_modified;
     seastar::lowres_system_clock::time_point last_rewritten;
 
     segment_type_t get_type() const {
-      return segment_seq_to_type(journal_segment_seq);
+      return type;
     }
 
     void set_open(segment_seq_t);
@@ -287,7 +289,7 @@ private:
 class SegmentProvider {
 public:
   virtual segment_id_t get_segment(
-      device_id_t id, segment_seq_t seq) = 0;
+      device_id_t id, segment_seq_t seq, segment_type_t type) = 0;
 
   virtual void close_segment(segment_id_t) {}
 
@@ -728,7 +730,7 @@ public:
   mount_ret mount(device_id_t pdevice_id, std::vector<SegmentManager*>& sms);
 
   segment_id_t get_segment(
-      device_id_t id, segment_seq_t seq) final;
+      device_id_t id, segment_seq_t seq, segment_type_t type) final;
 
   void close_segment(segment_id_t segment) final;
 
@@ -1316,16 +1318,16 @@ private:
 
   void init_mark_segment_closed(
     segment_id_t segment,
-    segment_seq_t seq) {
+    segment_seq_t seq,
+    segment_type_t s_type) {
     crimson::get_logger(ceph_subsys_seastore_cleaner).debug(
       "SegmentCleaner::init_mark_segment_closed: segment {}, seq {}",
       segment,
       segment_seq_printer_t{seq});
-    ceph_assert(segment_seq_to_type(seq) != segment_type_t::NULL_SEG);
     mark_closed(segment);
     segments[segment].journal_segment_seq = seq;
-    auto s_type = segments[segment].get_type();
     assert(s_type != segment_type_t::NULL_SEG);
+    segments[segment].type = s_type;
     if (s_type == segment_type_t::JOURNAL) {
       assert(journal_device_id == segment.device_id());
       segments.new_journal_segment();
@@ -1390,7 +1392,7 @@ private:
     maybe_wake_gc_blocked_io();
   }
 
-  void mark_open(segment_id_t segment, segment_seq_t seq) {
+  void mark_open(segment_id_t segment, segment_seq_t seq, segment_type_t s_type) {
     assert(segment.device_id() ==
       segments[segment.device_id()]->device_id);
     assert(segment.device_segment_id() <
@@ -1400,8 +1402,8 @@ private:
     segments.segment_opened(segment);
     auto& segment_info = segments[segment];
     segment_info.set_open(seq);
+    segment_info.type = s_type;
 
-    auto s_type = segment_info.get_type();
     ceph_assert(s_type != segment_type_t::NULL_SEG);
     if (s_type == segment_type_t::JOURNAL) {
       segments.new_journal_segment();
diff --git a/src/crimson/os/seastore/segment_seq_allocator.h b/src/crimson/os/seastore/segment_seq_allocator.h
new file mode 100644 (file)
index 0000000..9003b3a
--- /dev/null
@@ -0,0 +1,33 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#pragma once
+
+#include "crimson/os/seastore/logging.h"
+#include "crimson/os/seastore/seastore_types.h"
+
+namespace crimson::os::seastore::journal {
+class SegmentedJournal;
+}
+
+namespace crimson::os::seastore {
+
+class SegmentSeqAllocator {
+public:
+  segment_seq_t get_and_inc_next_segment_seq() {
+    return next_segment_seq++;
+  }
+private:
+  void set_next_segment_seq(segment_seq_t seq) {
+    LOG_PREFIX(SegmentSeqAllocator::set_next_segment_seq);
+    SUBINFO(seastore_journal, "next_segment_seq={}", segment_seq_printer_t{seq});
+    next_segment_seq = seq;
+  }
+  segment_seq_t next_segment_seq = 0;
+  friend class journal::SegmentedJournal;
+};
+
+using SegmentSeqAllocatorRef =
+  std::unique_ptr<SegmentSeqAllocator>;
+
+};
index 7021293d204bd4560a819020f6a369d414e7593a..a01652601d0c1e52a87418f7d38f2fd2dcce39bd 100644 (file)
@@ -323,7 +323,7 @@ TransactionManager::submit_transaction_direct(
     return tref.get_handle().enter(write_pipeline.prepare);
   }).si_then([this, FNAME, &tref]() mutable
              -> submit_transaction_iertr::future<> {
-    auto record = cache->prepare_record(tref);
+    auto record = cache->prepare_record(tref, segment_cleaner.get());
 
     tref.get_handle().maybe_release_collection_lock();
 
index f1a5745322ad6fe27a6bbe4aa3ef09ee5c647fda..4c7ff591ca617267bf4f19a44cc6bace4125b3bc 100644 (file)
@@ -549,7 +549,8 @@ public:
       device_type_t::SEGMENTED,
       std::make_unique<SegmentedAllocator>(
        *segment_cleaner,
-       *sm));
+       *sm,
+       journal->get_segment_seq_allocator()));
   }
 
   ~TransactionManager();
index f73038da2e204ef7d75d345483e1f657c69e9745..3e24f5accca406854bc1067c316d92c44016e8a3 100644 (file)
@@ -41,6 +41,10 @@ struct btree_test_base :
 
   btree_test_base() = default;
 
+  std::map<segment_id_t, segment_seq_t> segment_seqs;
+
+
+
   seastar::lowres_system_clock::time_point get_last_modified(
     segment_id_t id) const final {
     return seastar::lowres_system_clock::time_point();
@@ -52,21 +56,30 @@ struct btree_test_base :
   }
   void update_segment_avail_bytes(paddr_t offset) final {}
 
-  segment_id_t get_segment(device_id_t id, segment_seq_t seq) final {
+  segment_id_t get_segment(
+    device_id_t id,
+    segment_seq_t seq,
+    segment_type_t) final
+  {
     auto ret = next;
     next = segment_id_t{
       next.device_id(),
       next.device_segment_id() + 1};
+    segment_seqs[ret] = seq;
     return ret;
   }
 
+  segment_seq_t get_seq(segment_id_t id) {
+    return segment_seqs[id];
+  }
+
   journal_seq_t get_journal_tail_target() const final { return journal_seq_t{}; }
   void update_journal_tail_committed(journal_seq_t committed) final {}
 
   virtual void complete_commit(Transaction &t) {}
   seastar::future<> submit_transaction(TransactionRef t)
   {
-    auto record = cache->prepare_record(*t);
+    auto record = cache->prepare_record(*t, this);
     return journal->submit_record(std::move(record), t->get_handle()).safe_then(
       [this, t=std::move(t)](auto submit_result) mutable {
        cache->complete_commit(
index 90bf0f4d485b6aa4110531f378b16ef57fdf1bb8..979883be44f35236cba19d4d7d5559568409e659 100644 (file)
@@ -31,7 +31,7 @@ struct cache_test_t : public seastar_test_suite_t {
 
   seastar::future<paddr_t> submit_transaction(
     TransactionRef t) {
-    auto record = cache->prepare_record(*t);
+    auto record = cache->prepare_record(*t, nullptr);
 
     bufferlist bl;
     for (auto &&block : record.extents) {
index 79eb1931851fc0f37bc39e38a9f36248eb0c5955..f9d5dc4f77dcc5257ee516601014fa432964e9ed 100644 (file)
@@ -80,6 +80,8 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
 
   segment_id_t next;
 
+  std::map<segment_id_t, segment_seq_t> segment_seqs;
+
   journal_test_t() = default;
 
   seastar::lowres_system_clock::time_point get_last_modified(
@@ -94,14 +96,23 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
 
   void update_segment_avail_bytes(paddr_t offset) final {}
 
-  segment_id_t get_segment(device_id_t id, segment_seq_t seq) final {
+  segment_id_t get_segment(
+    device_id_t id,
+    segment_seq_t seq,
+    segment_type_t) final
+  {
     auto ret = next;
     next = segment_id_t{
       next.device_id(),
       next.device_segment_id() + 1};
+    segment_seqs[ret] = seq;
     return ret;
   }
 
+  segment_seq_t get_seq(segment_id_t id) {
+    return segment_seqs[id];
+  }
+
   journal_seq_t get_journal_tail_target() const final { return journal_seq_t{}; }
   void update_journal_tail_committed(journal_seq_t paddr) final {}
 
@@ -224,6 +235,7 @@ struct journal_test_t : seastar_test_suite_t, SegmentProvider {
       0, 0,
       block_size,
       1,
+      MAX_SEG_SEQ,
       bl
     };
   }