The cbj would replay invalid records if the same write pattern (mkfs -> write -> mkfs -> write) repeats.
On the second mkfs(), current cbj wrongly recognizes the old records are valid beceause they have
correct seq. number at appropriate offset even though the records are expired.
To solve this, this commit introduces random value to identify the journal during mkfs()
Signed-off-by: Myoungwon Oh <myoungwon.oh@samsung.com>
{
LOG_PREFIX(CircularBoundedJournal::read_validate_record_metadata);
paddr_t start = cursor.seq.offset;
- return read_record(start, 0
+ return read_record(start, nonce
).safe_then([FNAME, &cursor](auto ret) {
if (!ret.has_value()) {
return read_validate_record_metadata_ret(
[=, this, &cursor](auto &dhandler) {
return scan_valid_records(
cursor,
- 0,
+ cjs.get_cbj_header().magic,
std::numeric_limits<size_t>::max(),
dhandler).safe_then([](auto){}
).handle_error(
}
CircularBoundedJournal::read_record_ret
-CircularBoundedJournal::read_record(paddr_t off, segment_seq_t expected_seq)
+CircularBoundedJournal::read_record(paddr_t off, segment_nonce_t magic)
{
LOG_PREFIX(CircularBoundedJournal::read_record);
rbm_abs_addr addr = convert_paddr_to_abs_addr(off);
DEBUG("reading record from abs addr {} read length {}", addr, read_length);
auto bptr = bufferptr(ceph::buffer::create_page_aligned(read_length));
return cjs.read(addr, bptr
- ).safe_then([this, addr, bptr, expected_seq, FNAME]() mutable
+ ).safe_then([this, addr, bptr, magic, FNAME]() mutable
-> read_record_ret {
record_group_header_t h;
bufferlist bl;
h.dlength % get_block_size() != 0 ||
addr + h.mdlength + h.dlength > get_journal_end() ||
h.committed_to.segment_seq == NULL_SEG_SEQ ||
- (expected_seq != NULL_SEG_SEQ &&
- h.committed_to.segment_seq != expected_seq)) {
+ h.segment_nonce != magic) {
return read_record_ret(
read_record_ertr::ready_future_marker{},
std::nullopt);
* @param expected_seq
*
*/
- read_record_ret read_record(paddr_t offset, segment_seq_t expected_seq);
+ read_record_ret read_record(paddr_t offset, segment_nonce_t magic);
read_record_ret return_record(record_group_header_t& header, bufferlist bl);
const CircularJournalSpace::cbj_header_t &header)
{
return out << "cbj_header_t("
- << ", dirty_tail=" << header.dirty_tail
+ << "dirty_tail=" << header.dirty_tail
<< ", alloc_tail=" << header.alloc_tail
+ << ", magic=" << header.magic
<< ")";
}
);
}
+segment_nonce_t calc_new_nonce(
+ uint32_t crc,
+ unsigned char const *data,
+ unsigned length)
+{
+ crc &= std::numeric_limits<uint32_t>::max() >> 1;
+ return ceph_crc32c(crc, data, length);
+}
+
CircularJournalSpace::open_ret CircularJournalSpace::open(bool is_mkfs) {
std::ostringstream oss;
oss << device_id_printer_t{get_device_id()};
get_records_start(),
device->get_device_id())};
head.alloc_tail = head.dirty_tail;
+ auto meta = device->get_meta();
+ head.magic = calc_new_nonce(
+ std::rand() % std::numeric_limits<uint32_t>::max(),
+ reinterpret_cast<const unsigned char *>(meta.seastore_id.bytes()),
+ sizeof(meta.seastore_id.uuid));
encode(head, bl);
header = head;
set_written_to(head.dirty_tail);
initialized = true;
DEBUG(
- "initialize header block in CircularJournalSpace length {}",
- bl.length());
+ "initialize header block in CircularJournalSpace length {}, head: {}",
+ bl.length(), header);
return write_header(
).safe_then([this]() {
return open_ret(
}
segment_nonce_t get_nonce() const final {
- return 0;
+ return header.magic;
}
bool needs_roll(std::size_t length) const final;
// start offset of CircularBoundedJournal in the device
journal_seq_t dirty_tail;
journal_seq_t alloc_tail;
+ segment_nonce_t magic;
DENC(cbj_header_t, v, p) {
DENC_START(1, 1, p);
denc(v.dirty_tail, p);
denc(v.alloc_tail, p);
+ denc(v.magic, p);
DENC_FINISH(p);
}
};
journal_seq_t last_seq;
record_t record;
rbm_abs_addr addr = 0;
+ segment_nonce_t magic = 0;
template <typename... T>
entry_validator_t(T&&... entry) : record(std::forward<T>(entry)...) {}
paddr_t paddr = convert_abs_addr_to_paddr(
addr + offset,
cbj.get_device_id());
- auto [header, buf] = *(cbj.read_record(paddr, NULL_SEG_SEQ).unsafe_get0());
+ auto [header, buf] = *(cbj.read_record(paddr, magic).unsafe_get0());
auto record = decode_record(buf);
validate(*record);
offset += header.mdlength + header.dlength;
entries.back().addr =
convert_paddr_to_abs_addr(w_result.start_seq.offset);
entries.back().entries = 1;
+ entries.back().magic = cbj->get_cjs().get_cbj_header().magic;
logger().debug("submit entry to addr {}", entries.back().addr);
return entries.back().addr;
}