From: myoungwon oh Date: Sat, 22 Jul 2023 14:52:14 +0000 (+0000) Subject: crimson/os/seastore/journal/cbj: introduce magic value to identify that written recor... X-Git-Tag: v18.2.1~160^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=654e227bb0fadeeaff95b7cfdd853220a4dc1f37;p=ceph.git crimson/os/seastore/journal/cbj: introduce magic value to identify that written records are correct The cbj would replay invalid records if the same write pattern (mkfs -> write -> mkfs -> write) repeats. On the second mkfs(), current cbj wrongly recognizes the old records are valid beceause they have correct seq. number at appropriate offset even though the records are expired. To solve this, this commit introduces random value to identify the journal during mkfs() Signed-off-by: Myoungwon Oh (cherry picked from commit 942ea6487f59e52d88050fb8deb8a2e6c0ca83c3) --- diff --git a/src/crimson/os/seastore/journal/circular_bounded_journal.cc b/src/crimson/os/seastore/journal/circular_bounded_journal.cc index a92b9ecbd6b7..0a85b5aaa5e6 100644 --- a/src/crimson/os/seastore/journal/circular_bounded_journal.cc +++ b/src/crimson/os/seastore/journal/circular_bounded_journal.cc @@ -117,7 +117,7 @@ RecordScanner::read_validate_record_metadata_ret CircularBoundedJournal::read_va { LOG_PREFIX(CircularBoundedJournal::read_validate_record_metadata); paddr_t start = cursor.seq.offset; - return read_record(start, 0 + return read_record(start, nonce ).safe_then([FNAME, &cursor](auto ret) { if (!ret.has_value()) { return read_validate_record_metadata_ret( @@ -234,7 +234,7 @@ Journal::replay_ret CircularBoundedJournal::replay_segment( [=, this, &cursor](auto &dhandler) { return scan_valid_records( cursor, - 0, + cjs.get_cbj_header().magic, std::numeric_limits::max(), dhandler).safe_then([](auto){} ).handle_error( @@ -377,7 +377,7 @@ CircularBoundedJournal::return_record(record_group_header_t& header, bufferlist } CircularBoundedJournal::read_record_ret -CircularBoundedJournal::read_record(paddr_t off, segment_seq_t expected_seq) +CircularBoundedJournal::read_record(paddr_t off, segment_nonce_t magic) { LOG_PREFIX(CircularBoundedJournal::read_record); rbm_abs_addr addr = convert_paddr_to_abs_addr(off); @@ -386,7 +386,7 @@ CircularBoundedJournal::read_record(paddr_t off, segment_seq_t expected_seq) DEBUG("reading record from abs addr {} read length {}", addr, read_length); auto bptr = bufferptr(ceph::buffer::create_page_aligned(read_length)); return cjs.read(addr, bptr - ).safe_then([this, addr, bptr, expected_seq, FNAME]() mutable + ).safe_then([this, addr, bptr, magic, FNAME]() mutable -> read_record_ret { record_group_header_t h; bufferlist bl; @@ -404,8 +404,7 @@ CircularBoundedJournal::read_record(paddr_t off, segment_seq_t expected_seq) h.dlength % get_block_size() != 0 || addr + h.mdlength + h.dlength > get_journal_end() || h.committed_to.segment_seq == NULL_SEG_SEQ || - (expected_seq != NULL_SEG_SEQ && - h.committed_to.segment_seq != expected_seq)) { + h.segment_nonce != magic) { return read_record_ret( read_record_ertr::ready_future_marker{}, std::nullopt); diff --git a/src/crimson/os/seastore/journal/circular_bounded_journal.h b/src/crimson/os/seastore/journal/circular_bounded_journal.h index 3c696f99704c..311d9d6d269e 100644 --- a/src/crimson/os/seastore/journal/circular_bounded_journal.h +++ b/src/crimson/os/seastore/journal/circular_bounded_journal.h @@ -135,7 +135,7 @@ public: * @param expected_seq * */ - read_record_ret read_record(paddr_t offset, segment_seq_t expected_seq); + read_record_ret read_record(paddr_t offset, segment_nonce_t magic); read_record_ret return_record(record_group_header_t& header, bufferlist bl); diff --git a/src/crimson/os/seastore/journal/circular_journal_space.cc b/src/crimson/os/seastore/journal/circular_journal_space.cc index 7565c2815576..f32b69d110ec 100644 --- a/src/crimson/os/seastore/journal/circular_journal_space.cc +++ b/src/crimson/os/seastore/journal/circular_journal_space.cc @@ -18,8 +18,9 @@ std::ostream &operator<<(std::ostream &out, const CircularJournalSpace::cbj_header_t &header) { return out << "cbj_header_t(" - << ", dirty_tail=" << header.dirty_tail + << "dirty_tail=" << header.dirty_tail << ", alloc_tail=" << header.alloc_tail + << ", magic=" << header.magic << ")"; } @@ -86,6 +87,15 @@ CircularJournalSpace::write(ceph::bufferlist&& to_write) { ); } +segment_nonce_t calc_new_nonce( + uint32_t crc, + unsigned char const *data, + unsigned length) +{ + crc &= std::numeric_limits::max() >> 1; + return ceph_crc32c(crc, data, length); +} + CircularJournalSpace::open_ret CircularJournalSpace::open(bool is_mkfs) { std::ostringstream oss; oss << device_id_printer_t{get_device_id()}; @@ -103,13 +113,18 @@ CircularJournalSpace::open_ret CircularJournalSpace::open(bool is_mkfs) { get_records_start(), device->get_device_id())}; head.alloc_tail = head.dirty_tail; + auto meta = device->get_meta(); + head.magic = calc_new_nonce( + std::rand() % std::numeric_limits::max(), + reinterpret_cast(meta.seastore_id.bytes()), + sizeof(meta.seastore_id.uuid)); encode(head, bl); header = head; set_written_to(head.dirty_tail); initialized = true; DEBUG( - "initialize header block in CircularJournalSpace length {}", - bl.length()); + "initialize header block in CircularJournalSpace length {}, head: {}", + bl.length(), header); return write_header( ).safe_then([this]() { return open_ret( diff --git a/src/crimson/os/seastore/journal/circular_journal_space.h b/src/crimson/os/seastore/journal/circular_journal_space.h index 1e97f4efedc6..1664e6bb5775 100644 --- a/src/crimson/os/seastore/journal/circular_journal_space.h +++ b/src/crimson/os/seastore/journal/circular_journal_space.h @@ -39,7 +39,7 @@ class CircularJournalSpace : public JournalAllocator { } segment_nonce_t get_nonce() const final { - return 0; + return header.magic; } bool needs_roll(std::size_t length) const final; @@ -117,11 +117,13 @@ class CircularJournalSpace : public JournalAllocator { // start offset of CircularBoundedJournal in the device journal_seq_t dirty_tail; journal_seq_t alloc_tail; + segment_nonce_t magic; DENC(cbj_header_t, v, p) { DENC_START(1, 1, p); denc(v.dirty_tail, p); denc(v.alloc_tail, p); + denc(v.magic, p); DENC_FINISH(p); } }; diff --git a/src/test/crimson/seastore/test_cbjournal.cc b/src/test/crimson/seastore/test_cbjournal.cc index 05cd542230e2..ddb738f4e5b0 100644 --- a/src/test/crimson/seastore/test_cbjournal.cc +++ b/src/test/crimson/seastore/test_cbjournal.cc @@ -66,6 +66,7 @@ struct entry_validator_t { journal_seq_t last_seq; record_t record; rbm_abs_addr addr = 0; + segment_nonce_t magic = 0; template entry_validator_t(T&&... entry) : record(std::forward(entry)...) {} @@ -99,7 +100,7 @@ struct entry_validator_t { paddr_t paddr = convert_abs_addr_to_paddr( addr + offset, cbj.get_device_id()); - auto [header, buf] = *(cbj.read_record(paddr, NULL_SEG_SEQ).unsafe_get0()); + auto [header, buf] = *(cbj.read_record(paddr, magic).unsafe_get0()); auto record = decode_record(buf); validate(*record); offset += header.mdlength + header.dlength; @@ -167,6 +168,7 @@ struct cbjournal_test_t : public seastar_test_suite_t, JournalTrimmer entries.back().addr = convert_paddr_to_abs_addr(w_result.start_seq.offset); entries.back().entries = 1; + entries.back().magic = cbj->get_cjs().get_cbj_header().magic; logger().debug("submit entry to addr {}", entries.back().addr); return entries.back().addr; }