]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/journal/cbj: introduce magic value to identify that written recor...
authormyoungwon oh <ohmyoungwon@gmail.com>
Sat, 22 Jul 2023 14:52:14 +0000 (14:52 +0000)
committerMatan Breizman <mbreizma@redhat.com>
Wed, 11 Oct 2023 11:49:52 +0000 (11:49 +0000)
The cbj would replay invalid records if the same write pattern (mkfs -> write -> mkfs -> write) repeats.
On the second mkfs(), current cbj wrongly recognizes the old records are valid beceause they have
correct seq. number at appropriate offset even though the records are expired.

To solve this, this commit introduces random value to identify the journal  during mkfs()

Signed-off-by: Myoungwon Oh <myoungwon.oh@samsung.com>
(cherry picked from commit 942ea6487f59e52d88050fb8deb8a2e6c0ca83c3)

src/crimson/os/seastore/journal/circular_bounded_journal.cc
src/crimson/os/seastore/journal/circular_bounded_journal.h
src/crimson/os/seastore/journal/circular_journal_space.cc
src/crimson/os/seastore/journal/circular_journal_space.h
src/test/crimson/seastore/test_cbjournal.cc

index a92b9ecbd6b75da4a2f4a6312829ea8e8c94fd30..0a85b5aaa5e63d85467155f22bd11bb761169df9 100644 (file)
@@ -117,7 +117,7 @@ RecordScanner::read_validate_record_metadata_ret CircularBoundedJournal::read_va
 {
   LOG_PREFIX(CircularBoundedJournal::read_validate_record_metadata);
   paddr_t start = cursor.seq.offset;
-  return read_record(start, 0
+  return read_record(start, nonce
   ).safe_then([FNAME, &cursor](auto ret) {
     if (!ret.has_value()) {
       return read_validate_record_metadata_ret(
@@ -234,7 +234,7 @@ Journal::replay_ret CircularBoundedJournal::replay_segment(
     [=, this, &cursor](auto &dhandler) {
       return scan_valid_records(
         cursor,
-       0,
+       cjs.get_cbj_header().magic,
         std::numeric_limits<size_t>::max(),
         dhandler).safe_then([](auto){}
       ).handle_error(
@@ -377,7 +377,7 @@ CircularBoundedJournal::return_record(record_group_header_t& header, bufferlist
 }
 
 CircularBoundedJournal::read_record_ret
-CircularBoundedJournal::read_record(paddr_t off, segment_seq_t expected_seq)
+CircularBoundedJournal::read_record(paddr_t off, segment_nonce_t magic)
 {
   LOG_PREFIX(CircularBoundedJournal::read_record);
   rbm_abs_addr addr = convert_paddr_to_abs_addr(off);
@@ -386,7 +386,7 @@ CircularBoundedJournal::read_record(paddr_t off, segment_seq_t expected_seq)
   DEBUG("reading record from abs addr {} read length {}", addr, read_length);
   auto bptr = bufferptr(ceph::buffer::create_page_aligned(read_length));
   return cjs.read(addr, bptr
-  ).safe_then([this, addr, bptr, expected_seq, FNAME]() mutable
+  ).safe_then([this, addr, bptr, magic, FNAME]() mutable
     -> read_record_ret {
     record_group_header_t h;
     bufferlist bl;
@@ -404,8 +404,7 @@ CircularBoundedJournal::read_record(paddr_t off, segment_seq_t expected_seq)
         h.dlength % get_block_size() != 0 ||
         addr + h.mdlength + h.dlength > get_journal_end() ||
         h.committed_to.segment_seq == NULL_SEG_SEQ ||
-        (expected_seq != NULL_SEG_SEQ &&
-         h.committed_to.segment_seq != expected_seq)) {
+       h.segment_nonce != magic) {
       return read_record_ret(
         read_record_ertr::ready_future_marker{},
         std::nullopt);
index 3c696f99704cc93f59736110469aec7025b41b00..311d9d6d269e0cc8f2c94cdc32e595764b330e9c 100644 (file)
@@ -135,7 +135,7 @@ public:
    * @param expected_seq
    *
    */
-  read_record_ret read_record(paddr_t offset, segment_seq_t expected_seq);
+  read_record_ret read_record(paddr_t offset, segment_nonce_t magic);
 
   read_record_ret return_record(record_group_header_t& header, bufferlist bl);
 
index 7565c281557635c4e91896f6f4f2564a7db52b77..f32b69d110ec1d396b5212b34483e2e7e7fd8a59 100644 (file)
@@ -18,8 +18,9 @@ std::ostream &operator<<(std::ostream &out,
     const CircularJournalSpace::cbj_header_t &header)
 {
   return out << "cbj_header_t(" 
-            << "dirty_tail=" << header.dirty_tail
+            << "dirty_tail=" << header.dirty_tail
             << ", alloc_tail=" << header.alloc_tail
+            << ", magic=" << header.magic
              << ")";
 }
 
@@ -86,6 +87,15 @@ CircularJournalSpace::write(ceph::bufferlist&& to_write) {
   );
 }
 
+segment_nonce_t calc_new_nonce(
+  uint32_t crc,
+  unsigned char const *data,
+  unsigned length)
+{
+  crc &= std::numeric_limits<uint32_t>::max() >> 1;
+  return ceph_crc32c(crc, data, length);
+}
+
 CircularJournalSpace::open_ret CircularJournalSpace::open(bool is_mkfs) {
   std::ostringstream oss;
   oss << device_id_printer_t{get_device_id()};
@@ -103,13 +113,18 @@ CircularJournalSpace::open_ret CircularJournalSpace::open(bool is_mkfs) {
          get_records_start(),
          device->get_device_id())};
     head.alloc_tail = head.dirty_tail;
+    auto meta = device->get_meta();
+    head.magic = calc_new_nonce(
+      std::rand() % std::numeric_limits<uint32_t>::max(),
+      reinterpret_cast<const unsigned char *>(meta.seastore_id.bytes()),
+      sizeof(meta.seastore_id.uuid));
     encode(head, bl);
     header = head;
     set_written_to(head.dirty_tail);
     initialized = true;
     DEBUG(
-      "initialize header block in CircularJournalSpace length {}",
-      bl.length());
+      "initialize header block in CircularJournalSpace length {}, head: {}",
+      bl.length(), header);
     return write_header(
     ).safe_then([this]() {
       return open_ret(
index 1e97f4efedc608853622508465a25144180afce1..1664e6bb5775f36351e125b8017a8e5f07e147be 100644 (file)
@@ -39,7 +39,7 @@ class CircularJournalSpace : public JournalAllocator {
   }
 
   segment_nonce_t get_nonce() const final {
-    return 0;
+    return header.magic;
   }
 
   bool needs_roll(std::size_t length) const final;
@@ -117,11 +117,13 @@ class CircularJournalSpace : public JournalAllocator {
     // start offset of CircularBoundedJournal in the device
     journal_seq_t dirty_tail;
     journal_seq_t alloc_tail;
+    segment_nonce_t magic;
 
     DENC(cbj_header_t, v, p) {
       DENC_START(1, 1, p);
       denc(v.dirty_tail, p);
       denc(v.alloc_tail, p);
+      denc(v.magic, p);
       DENC_FINISH(p);
     }
   };
index 05cd542230e21bb38d6bfcdc8c7298c427676df6..ddb738f4e5b00a8b6c3b13333954a9dbd614e726 100644 (file)
@@ -66,6 +66,7 @@ struct entry_validator_t {
   journal_seq_t last_seq;
   record_t record;
   rbm_abs_addr addr = 0;
+  segment_nonce_t magic = 0;
 
   template <typename... T>
   entry_validator_t(T&&... entry) : record(std::forward<T>(entry)...) {}
@@ -99,7 +100,7 @@ struct entry_validator_t {
       paddr_t paddr = convert_abs_addr_to_paddr(
        addr + offset,
        cbj.get_device_id());
-      auto [header, buf] = *(cbj.read_record(paddr, NULL_SEG_SEQ).unsafe_get0());
+      auto [header, buf] = *(cbj.read_record(paddr, magic).unsafe_get0());
       auto record = decode_record(buf);
       validate(*record);
       offset += header.mdlength + header.dlength;
@@ -167,6 +168,7 @@ struct cbjournal_test_t : public seastar_test_suite_t, JournalTrimmer
     entries.back().addr = 
       convert_paddr_to_abs_addr(w_result.start_seq.offset);
     entries.back().entries = 1;
+    entries.back().magic = cbj->get_cjs().get_cbj_header().magic;
     logger().debug("submit entry to addr {}", entries.back().addr);
     return entries.back().addr;
   }