]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
crimson/os/seastore/journal/cbj: introduce magic value to identify that written recor...
authormyoungwon oh <ohmyoungwon@gmail.com>
Sat, 22 Jul 2023 14:52:14 +0000 (14:52 +0000)
committermyoungwon oh <ohmyoungwon@gmail.com>
Wed, 16 Aug 2023 16:02:57 +0000 (16:02 +0000)
The cbj would replay invalid records if the same write pattern (mkfs -> write -> mkfs -> write) repeats.
On the second mkfs(), current cbj wrongly recognizes the old records are valid beceause they have
correct seq. number at appropriate offset even though the records are expired.

To solve this, this commit introduces random value to identify the journal  during mkfs()

Signed-off-by: Myoungwon Oh <myoungwon.oh@samsung.com>
src/crimson/os/seastore/journal/circular_bounded_journal.cc
src/crimson/os/seastore/journal/circular_bounded_journal.h
src/crimson/os/seastore/journal/circular_journal_space.cc
src/crimson/os/seastore/journal/circular_journal_space.h
src/test/crimson/seastore/test_cbjournal.cc

index a92b9ecbd6b75da4a2f4a6312829ea8e8c94fd30..0a85b5aaa5e63d85467155f22bd11bb761169df9 100644 (file)
@@ -117,7 +117,7 @@ RecordScanner::read_validate_record_metadata_ret CircularBoundedJournal::read_va
 {
   LOG_PREFIX(CircularBoundedJournal::read_validate_record_metadata);
   paddr_t start = cursor.seq.offset;
-  return read_record(start, 0
+  return read_record(start, nonce
   ).safe_then([FNAME, &cursor](auto ret) {
     if (!ret.has_value()) {
       return read_validate_record_metadata_ret(
@@ -234,7 +234,7 @@ Journal::replay_ret CircularBoundedJournal::replay_segment(
     [=, this, &cursor](auto &dhandler) {
       return scan_valid_records(
         cursor,
-       0,
+       cjs.get_cbj_header().magic,
         std::numeric_limits<size_t>::max(),
         dhandler).safe_then([](auto){}
       ).handle_error(
@@ -377,7 +377,7 @@ CircularBoundedJournal::return_record(record_group_header_t& header, bufferlist
 }
 
 CircularBoundedJournal::read_record_ret
-CircularBoundedJournal::read_record(paddr_t off, segment_seq_t expected_seq)
+CircularBoundedJournal::read_record(paddr_t off, segment_nonce_t magic)
 {
   LOG_PREFIX(CircularBoundedJournal::read_record);
   rbm_abs_addr addr = convert_paddr_to_abs_addr(off);
@@ -386,7 +386,7 @@ CircularBoundedJournal::read_record(paddr_t off, segment_seq_t expected_seq)
   DEBUG("reading record from abs addr {} read length {}", addr, read_length);
   auto bptr = bufferptr(ceph::buffer::create_page_aligned(read_length));
   return cjs.read(addr, bptr
-  ).safe_then([this, addr, bptr, expected_seq, FNAME]() mutable
+  ).safe_then([this, addr, bptr, magic, FNAME]() mutable
     -> read_record_ret {
     record_group_header_t h;
     bufferlist bl;
@@ -404,8 +404,7 @@ CircularBoundedJournal::read_record(paddr_t off, segment_seq_t expected_seq)
         h.dlength % get_block_size() != 0 ||
         addr + h.mdlength + h.dlength > get_journal_end() ||
         h.committed_to.segment_seq == NULL_SEG_SEQ ||
-        (expected_seq != NULL_SEG_SEQ &&
-         h.committed_to.segment_seq != expected_seq)) {
+       h.segment_nonce != magic) {
       return read_record_ret(
         read_record_ertr::ready_future_marker{},
         std::nullopt);
index 3c696f99704cc93f59736110469aec7025b41b00..311d9d6d269e0cc8f2c94cdc32e595764b330e9c 100644 (file)
@@ -135,7 +135,7 @@ public:
    * @param expected_seq
    *
    */
-  read_record_ret read_record(paddr_t offset, segment_seq_t expected_seq);
+  read_record_ret read_record(paddr_t offset, segment_nonce_t magic);
 
   read_record_ret return_record(record_group_header_t& header, bufferlist bl);
 
index fe81bef29aeae240371054159e2cb808aa7bb537..c36064acb12efb616763946a7a36d6bb58ae8d31 100644 (file)
@@ -18,8 +18,9 @@ std::ostream &operator<<(std::ostream &out,
     const CircularJournalSpace::cbj_header_t &header)
 {
   return out << "cbj_header_t(" 
-            << "dirty_tail=" << header.dirty_tail
+            << "dirty_tail=" << header.dirty_tail
             << ", alloc_tail=" << header.alloc_tail
+            << ", magic=" << header.magic
              << ")";
 }
 
@@ -86,6 +87,15 @@ CircularJournalSpace::write(ceph::bufferlist&& to_write) {
   );
 }
 
+segment_nonce_t calc_new_nonce(
+  uint32_t crc,
+  unsigned char const *data,
+  unsigned length)
+{
+  crc &= std::numeric_limits<uint32_t>::max() >> 1;
+  return ceph_crc32c(crc, data, length);
+}
+
 CircularJournalSpace::open_ret CircularJournalSpace::open(bool is_mkfs) {
   std::ostringstream oss;
   oss << device_id_printer_t{get_device_id()};
@@ -103,13 +113,18 @@ CircularJournalSpace::open_ret CircularJournalSpace::open(bool is_mkfs) {
          get_records_start(),
          device->get_device_id())};
     head.alloc_tail = head.dirty_tail;
+    auto meta = device->get_meta();
+    head.magic = calc_new_nonce(
+      std::rand() % std::numeric_limits<uint32_t>::max(),
+      reinterpret_cast<const unsigned char *>(meta.seastore_id.bytes()),
+      sizeof(meta.seastore_id.uuid));
     encode(head, bl);
     header = head;
     set_written_to(head.dirty_tail);
     initialized = true;
     DEBUG(
-      "initialize header block in CircularJournalSpace length {}",
-      bl.length());
+      "initialize header block in CircularJournalSpace length {}, head: {}",
+      bl.length(), header);
     return write_header(
     ).safe_then([this]() {
       return open_ret(
index d704b803514d6c19c503d402d809387af4fcc384..c88b65ad5e6b3ada736574222438a5b48a9c6384 100644 (file)
@@ -39,7 +39,7 @@ class CircularJournalSpace : public JournalAllocator {
   }
 
   segment_nonce_t get_nonce() const final {
-    return 0;
+    return header.magic;
   }
 
   bool needs_roll(std::size_t length) const final;
@@ -117,11 +117,13 @@ class CircularJournalSpace : public JournalAllocator {
     // start offset of CircularBoundedJournal in the device
     journal_seq_t dirty_tail;
     journal_seq_t alloc_tail;
+    segment_nonce_t magic;
 
     DENC(cbj_header_t, v, p) {
       DENC_START(1, 1, p);
       denc(v.dirty_tail, p);
       denc(v.alloc_tail, p);
+      denc(v.magic, p);
       DENC_FINISH(p);
     }
   };
index 05cd542230e21bb38d6bfcdc8c7298c427676df6..ddb738f4e5b00a8b6c3b13333954a9dbd614e726 100644 (file)
@@ -66,6 +66,7 @@ struct entry_validator_t {
   journal_seq_t last_seq;
   record_t record;
   rbm_abs_addr addr = 0;
+  segment_nonce_t magic = 0;
 
   template <typename... T>
   entry_validator_t(T&&... entry) : record(std::forward<T>(entry)...) {}
@@ -99,7 +100,7 @@ struct entry_validator_t {
       paddr_t paddr = convert_abs_addr_to_paddr(
        addr + offset,
        cbj.get_device_id());
-      auto [header, buf] = *(cbj.read_record(paddr, NULL_SEG_SEQ).unsafe_get0());
+      auto [header, buf] = *(cbj.read_record(paddr, magic).unsafe_get0());
       auto record = decode_record(buf);
       validate(*record);
       offset += header.mdlength + header.dlength;
@@ -167,6 +168,7 @@ struct cbjournal_test_t : public seastar_test_suite_t, JournalTrimmer
     entries.back().addr = 
       convert_paddr_to_abs_addr(w_result.start_seq.offset);
     entries.back().entries = 1;
+    entries.back().magic = cbj->get_cjs().get_cbj_header().magic;
     logger().debug("submit entry to addr {}", entries.back().addr);
     return entries.back().addr;
   }