From: myoungwon oh Date: Sat, 22 Jul 2023 14:52:14 +0000 (+0000) Subject: crimson/os/seastore/journal/cbj: introduce magic value to identify that written recor... X-Git-Tag: v19.0.0~659^2~4 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=942ea6487f59e52d88050fb8deb8a2e6c0ca83c3;p=ceph.git crimson/os/seastore/journal/cbj: introduce magic value to identify that written records are correct The cbj would replay invalid records if the same write pattern (mkfs -> write -> mkfs -> write) repeats. On the second mkfs(), current cbj wrongly recognizes the old records are valid beceause they have correct seq. number at appropriate offset even though the records are expired. To solve this, this commit introduces random value to identify the journal during mkfs() Signed-off-by: Myoungwon Oh --- diff --git a/src/crimson/os/seastore/journal/circular_bounded_journal.cc b/src/crimson/os/seastore/journal/circular_bounded_journal.cc index a92b9ecbd6b75..0a85b5aaa5e63 100644 --- a/src/crimson/os/seastore/journal/circular_bounded_journal.cc +++ b/src/crimson/os/seastore/journal/circular_bounded_journal.cc @@ -117,7 +117,7 @@ RecordScanner::read_validate_record_metadata_ret CircularBoundedJournal::read_va { LOG_PREFIX(CircularBoundedJournal::read_validate_record_metadata); paddr_t start = cursor.seq.offset; - return read_record(start, 0 + return read_record(start, nonce ).safe_then([FNAME, &cursor](auto ret) { if (!ret.has_value()) { return read_validate_record_metadata_ret( @@ -234,7 +234,7 @@ Journal::replay_ret CircularBoundedJournal::replay_segment( [=, this, &cursor](auto &dhandler) { return scan_valid_records( cursor, - 0, + cjs.get_cbj_header().magic, std::numeric_limits::max(), dhandler).safe_then([](auto){} ).handle_error( @@ -377,7 +377,7 @@ CircularBoundedJournal::return_record(record_group_header_t& header, bufferlist } CircularBoundedJournal::read_record_ret -CircularBoundedJournal::read_record(paddr_t off, segment_seq_t expected_seq) +CircularBoundedJournal::read_record(paddr_t off, segment_nonce_t magic) { LOG_PREFIX(CircularBoundedJournal::read_record); rbm_abs_addr addr = convert_paddr_to_abs_addr(off); @@ -386,7 +386,7 @@ CircularBoundedJournal::read_record(paddr_t off, segment_seq_t expected_seq) DEBUG("reading record from abs addr {} read length {}", addr, read_length); auto bptr = bufferptr(ceph::buffer::create_page_aligned(read_length)); return cjs.read(addr, bptr - ).safe_then([this, addr, bptr, expected_seq, FNAME]() mutable + ).safe_then([this, addr, bptr, magic, FNAME]() mutable -> read_record_ret { record_group_header_t h; bufferlist bl; @@ -404,8 +404,7 @@ CircularBoundedJournal::read_record(paddr_t off, segment_seq_t expected_seq) h.dlength % get_block_size() != 0 || addr + h.mdlength + h.dlength > get_journal_end() || h.committed_to.segment_seq == NULL_SEG_SEQ || - (expected_seq != NULL_SEG_SEQ && - h.committed_to.segment_seq != expected_seq)) { + h.segment_nonce != magic) { return read_record_ret( read_record_ertr::ready_future_marker{}, std::nullopt); diff --git a/src/crimson/os/seastore/journal/circular_bounded_journal.h b/src/crimson/os/seastore/journal/circular_bounded_journal.h index 3c696f99704cc..311d9d6d269e0 100644 --- a/src/crimson/os/seastore/journal/circular_bounded_journal.h +++ b/src/crimson/os/seastore/journal/circular_bounded_journal.h @@ -135,7 +135,7 @@ public: * @param expected_seq * */ - read_record_ret read_record(paddr_t offset, segment_seq_t expected_seq); + read_record_ret read_record(paddr_t offset, segment_nonce_t magic); read_record_ret return_record(record_group_header_t& header, bufferlist bl); diff --git a/src/crimson/os/seastore/journal/circular_journal_space.cc b/src/crimson/os/seastore/journal/circular_journal_space.cc index fe81bef29aeae..c36064acb12ef 100644 --- a/src/crimson/os/seastore/journal/circular_journal_space.cc +++ b/src/crimson/os/seastore/journal/circular_journal_space.cc @@ -18,8 +18,9 @@ std::ostream &operator<<(std::ostream &out, const CircularJournalSpace::cbj_header_t &header) { return out << "cbj_header_t(" - << ", dirty_tail=" << header.dirty_tail + << "dirty_tail=" << header.dirty_tail << ", alloc_tail=" << header.alloc_tail + << ", magic=" << header.magic << ")"; } @@ -86,6 +87,15 @@ CircularJournalSpace::write(ceph::bufferlist&& to_write) { ); } +segment_nonce_t calc_new_nonce( + uint32_t crc, + unsigned char const *data, + unsigned length) +{ + crc &= std::numeric_limits::max() >> 1; + return ceph_crc32c(crc, data, length); +} + CircularJournalSpace::open_ret CircularJournalSpace::open(bool is_mkfs) { std::ostringstream oss; oss << device_id_printer_t{get_device_id()}; @@ -103,13 +113,18 @@ CircularJournalSpace::open_ret CircularJournalSpace::open(bool is_mkfs) { get_records_start(), device->get_device_id())}; head.alloc_tail = head.dirty_tail; + auto meta = device->get_meta(); + head.magic = calc_new_nonce( + std::rand() % std::numeric_limits::max(), + reinterpret_cast(meta.seastore_id.bytes()), + sizeof(meta.seastore_id.uuid)); encode(head, bl); header = head; set_written_to(head.dirty_tail); initialized = true; DEBUG( - "initialize header block in CircularJournalSpace length {}", - bl.length()); + "initialize header block in CircularJournalSpace length {}, head: {}", + bl.length(), header); return write_header( ).safe_then([this]() { return open_ret( diff --git a/src/crimson/os/seastore/journal/circular_journal_space.h b/src/crimson/os/seastore/journal/circular_journal_space.h index d704b803514d6..c88b65ad5e6b3 100644 --- a/src/crimson/os/seastore/journal/circular_journal_space.h +++ b/src/crimson/os/seastore/journal/circular_journal_space.h @@ -39,7 +39,7 @@ class CircularJournalSpace : public JournalAllocator { } segment_nonce_t get_nonce() const final { - return 0; + return header.magic; } bool needs_roll(std::size_t length) const final; @@ -117,11 +117,13 @@ class CircularJournalSpace : public JournalAllocator { // start offset of CircularBoundedJournal in the device journal_seq_t dirty_tail; journal_seq_t alloc_tail; + segment_nonce_t magic; DENC(cbj_header_t, v, p) { DENC_START(1, 1, p); denc(v.dirty_tail, p); denc(v.alloc_tail, p); + denc(v.magic, p); DENC_FINISH(p); } }; diff --git a/src/test/crimson/seastore/test_cbjournal.cc b/src/test/crimson/seastore/test_cbjournal.cc index 05cd542230e21..ddb738f4e5b00 100644 --- a/src/test/crimson/seastore/test_cbjournal.cc +++ b/src/test/crimson/seastore/test_cbjournal.cc @@ -66,6 +66,7 @@ struct entry_validator_t { journal_seq_t last_seq; record_t record; rbm_abs_addr addr = 0; + segment_nonce_t magic = 0; template entry_validator_t(T&&... entry) : record(std::forward(entry)...) {} @@ -99,7 +100,7 @@ struct entry_validator_t { paddr_t paddr = convert_abs_addr_to_paddr( addr + offset, cbj.get_device_id()); - auto [header, buf] = *(cbj.read_record(paddr, NULL_SEG_SEQ).unsafe_get0()); + auto [header, buf] = *(cbj.read_record(paddr, magic).unsafe_get0()); auto record = decode_record(buf); validate(*record); offset += header.mdlength + header.dlength; @@ -167,6 +168,7 @@ struct cbjournal_test_t : public seastar_test_suite_t, JournalTrimmer entries.back().addr = convert_paddr_to_abs_addr(w_result.start_seq.offset); entries.back().entries = 1; + entries.back().magic = cbj->get_cjs().get_cbj_header().magic; logger().debug("submit entry to addr {}", entries.back().addr); return entries.back().addr; }