From 386f2d7c829422695a1b1f41bd3f17ca3eef1f61 Mon Sep 17 00:00:00 2001 From: John Spray Date: Thu, 11 Sep 2014 14:07:59 +0100 Subject: [PATCH] mds: update segment references during journal rewrite ... to avoid leaving log events that reference log segments by offsets which no longer exist. Signed-off-by: John Spray --- src/mds/LogSegment.h | 4 ++- src/mds/MDLog.cc | 59 ++++++++++++++++++++++++++++++++++++++ src/mds/MDLog.h | 2 +- src/mds/MDS.h | 2 +- src/mds/events/EMetaBlob.h | 5 +++- src/mds/journal.cc | 36 +++++++++++++++++++++++ 6 files changed, 104 insertions(+), 4 deletions(-) diff --git a/src/mds/LogSegment.h b/src/mds/LogSegment.h index 7d1dce588ffaa..035cc81a72abc 100644 --- a/src/mds/LogSegment.h +++ b/src/mds/LogSegment.h @@ -33,9 +33,11 @@ class CDentry; class MDS; struct MDSlaveUpdate; +typedef uint64_t log_segment_seq_t; + class LogSegment { public: - const uint64_t seq; + const log_segment_seq_t seq; uint64_t offset, end; int num_events; diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index 837a47597b315..0beb6d68f2069 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -850,6 +850,13 @@ void MDLog::_reformat_journal(JournalPointer const &jp_in, Journaler *old_journa // write them to the new journal. int r = 0; + // In old format journals before event_seq was introduced, the serialized + // offset of a SubtreeMap message in the log is used as the unique ID for + // a log segment. Because we change serialization, this will end up changing + // for us, so we have to explicitly update the fields that point back to that + // log segment. + std::map segment_pos_rewrite; + // The logic in here borrowed from replay_thread expects mds_lock to be held, // e.g. between checking readable and doing wait_for_readable so that journaler // state doesn't change in between. @@ -879,11 +886,63 @@ void MDLog::_reformat_journal(JournalPointer const &jp_in, Journaler *old_journa // Read one serialized LogEvent assert(old_journal->is_readable()); bufferlist bl; + uint64_t le_pos = old_journal->get_read_pos(); bool r = old_journal->try_read_entry(bl); if (!r && old_journal->get_error()) continue; assert(r); + // Update segment_pos_rewrite + LogEvent *le = LogEvent::decode(bl); + if (le) { + bool modified = false; + + if (le->get_type() == EVENT_SUBTREEMAP || + le->get_type() == EVENT_RESETJOURNAL) { + ESubtreeMap *sle = dynamic_cast(le); + if (sle == NULL || sle->event_seq == 0) { + // A non-explicit event seq: the effective sequence number + // of this segment is it's position in the old journal and + // the new effective sequence number will be its position + // in the new journal. + segment_pos_rewrite[le_pos] = new_journal->get_write_pos(); + dout(20) << __func__ << " discovered segment seq mapping " + << le_pos << " -> " << new_journal->get_write_pos() << dendl; + } + } else { + event_seq++; + } + + // Rewrite segment references if necessary + EMetaBlob *blob = le->get_metablob(); + if (blob) { + modified = blob->rewrite_truncate_finish(mds, segment_pos_rewrite); + } + + // Zero-out expire_pos in subtreemap because offsets have changed + // (expire_pos is just an optimization so it's safe to eliminate it) + if (le->get_type() == EVENT_SUBTREEMAP) { + dout(20) << __func__ << " zeroing expire_pos in subtreemap event at " << le_pos << dendl; + ESubtreeMap *sle = dynamic_cast(le); + assert(sle != NULL); + sle->expire_pos = 0; + modified = true; + } + + if (modified) { + bl.clear(); + le->encode_with_header(bl); + } + + delete le; + } else { + // Failure from LogEvent::decode, our job is to change the journal wrapper, + // not validate the contents, so pass it through. + dout(1) << __func__ << " transcribing un-decodable LogEvent at old position " + << old_journal->get_read_pos() << ", new position " << new_journal->get_write_pos() + << dendl; + } + // Write (buffered, synchronous) one serialized LogEvent events_transcribed += 1; new_journal->append_entry(bl); diff --git a/src/mds/MDLog.h b/src/mds/MDLog.h index 31a17efe26aa8..2d00ed21380a7 100644 --- a/src/mds/MDLog.h +++ b/src/mds/MDLog.h @@ -220,7 +220,7 @@ public: return segments.rbegin()->second; } - LogSegment *get_segment(uint64_t seq) { + LogSegment *get_segment(log_segment_seq_t seq) { if (segments.count(seq)) return segments[seq]; return NULL; diff --git a/src/mds/MDS.h b/src/mds/MDS.h index b77f60a29fd5f..a51a5f9b032dc 100644 --- a/src/mds/MDS.h +++ b/src/mds/MDS.h @@ -345,7 +345,7 @@ private: void handle_signal(int signum); // who am i etc - int get_nodeid() { return whoami; } + int get_nodeid() const { return whoami; } uint64_t get_metadata_pool() { return mdsmap->get_metadata_pool(); } MDSMap *get_mds_map() { return mdsmap; } diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index 86889c58e282a..dd9c1cdc3926a 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -20,6 +20,7 @@ #include "../CInode.h" #include "../CDir.h" #include "../CDentry.h" +#include "../LogSegment.h" #include "include/triple.h" #include "include/interval_set.h" @@ -305,7 +306,7 @@ private: // inodes i've truncated list truncate_start; // start truncate - map truncate_finish; // finished truncate (started in segment blah) + map truncate_finish; // finished truncate (started in segment blah) public: vector destroyed_inodes; @@ -374,6 +375,8 @@ private: void add_truncate_finish(inodeno_t ino, uint64_t segoff) { truncate_finish[ino] = segoff; } + + bool rewrite_truncate_finish(MDS const *mds, std::map const &old_to_new); void add_destroyed_inode(inodeno_t ino) { destroyed_inodes.push_back(ino); diff --git a/src/mds/journal.cc b/src/mds/journal.cc index f1698b0692818..c67421d5dafbe 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -3022,3 +3022,39 @@ void ENoOp::replay(MDS *mds) { dout(4) << "ENoOp::replay, " << pad_size << " bytes skipped in journal" << dendl; } + +/** + * If re-formatting an old journal that used absolute log position + * references as segment sequence numbers, use this function to update + * it. + * + * @param mds + * MDS instance, just used for logging + * @param old_to_new + * Map of old journal segment segment sequence numbers to new journal segment sequence numbers + * + * @return + * True if the event was modified. + */ +bool EMetaBlob::rewrite_truncate_finish(MDS const *mds, + std::map const &old_to_new) +{ + bool modified = false; + map new_trunc_finish; + for (std::map::iterator i = truncate_finish.begin(); + i != truncate_finish.end(); ++i) { + if (old_to_new.count(i->second)) { + dout(20) << __func__ << " applying segment seq mapping " + << i->second << " -> " << old_to_new.find(i->second)->second << dendl; + new_trunc_finish[i->first] = old_to_new.find(i->second)->second; + modified = true; + } else { + dout(20) << __func__ << " no segment seq mapping found for " + << i->second << dendl; + new_trunc_finish[i->first] = i->second; + } + } + truncate_finish = new_trunc_finish; + + return modified; +} -- 2.39.5