From: Samuel Just Date: Wed, 3 Apr 2013 22:44:39 +0000 (-0700) Subject: FileJournal: introduce start_seq header entry X-Git-Tag: v0.62~90 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6ef9d87a02872fc7312c1c8f48abf9b48457f6e4;p=ceph.git FileJournal: introduce start_seq header entry FileStore::header_t::start_seq now encodes the op seq which may be written at FileStore::header_t::start. This way, FileStore::open() can pass a valid sequence number to read_entry for validation. Otherwise, read_entry has no way of knowing whether a failure of a read at header.start indicates that the journal was empty, or that the entry is corrupt. With start_seq, read_entry can assume corruption if start_seq <= committed_up_to. Fixes: #4527 Signed-off-by: Samuel Just Reviewed-by: Sage Weil --- diff --git a/src/os/FileJournal.cc b/src/os/FileJournal.cc index 576c965fb3b..f5e8f34137e 100644 --- a/src/os/FileJournal.cc +++ b/src/os/FileJournal.cc @@ -387,7 +387,12 @@ int FileJournal::create() header.alignment = block_size; else header.alignment = 16; // at least stay word aligned on 64bit machines... + header.start = get_top(); + /* FileStore::mkfs initializes the fs op sequence file at 1. Therefore, + * the first entry written must be at sequence 2.*/ + header.start_seq = 2; + print_header(); // static zeroed buffer for alignment padding @@ -519,7 +524,7 @@ int FileJournal::open(uint64_t fs_op_seq) // find next entry read_pos = header.start; - uint64_t seq = 0; + uint64_t seq = header.start_seq; while (1) { bufferlist bl; off64_t old_pos = read_pos; @@ -1522,8 +1527,10 @@ void FileJournal::committed_thru(uint64_t seq) } if (!journalq.empty()) { header.start = journalq.front().second; + header.start_seq = journalq.front().first + 1; } else { header.start = write_pos; + header.start_seq = journaled_seq + 1; } must_write_header = true; print_header(); diff --git a/src/os/FileJournal.h b/src/os/FileJournal.h index b28161f844f..7b1777928cd 100644 --- a/src/os/FileJournal.h +++ b/src/os/FileJournal.h @@ -109,10 +109,11 @@ public: int64_t max_size; // max size of journal ring buffer int64_t start; // offset of first entry uint64_t committed_up_to; // committed up to + uint64_t start_seq; // entry at header.start header_t() : flags(0), block_size(0), alignment(0), max_size(0), start(0), - committed_up_to(0) {} + committed_up_to(0), start_seq(0) {} void clear() { start = block_size; @@ -123,7 +124,7 @@ public: } void encode(bufferlist& bl) const { - __u32 v = 3; + __u32 v = 4; ::encode(v, bl); bufferlist em; { @@ -134,6 +135,7 @@ public: ::encode(max_size, em); ::encode(start, em); ::encode(committed_up_to, em); + ::encode(start_seq, em); } ::encode(em, bl); } @@ -153,6 +155,7 @@ public: ::decode(max_size, bl); ::decode(start, bl); committed_up_to = 0; + start_seq = 0; return; } bufferlist em; @@ -164,10 +167,16 @@ public: ::decode(alignment, t); ::decode(max_size, t); ::decode(start, t); + if (v > 2) ::decode(committed_up_to, t); else committed_up_to = 0; + + if (v > 3) + ::decode(start_seq, t); + else + start_seq = 0; } } header;