From 55ed85b194659d56c715b24f4b407a4020b0361b Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 27 Jun 2014 14:08:54 +0800 Subject: [PATCH] mds: introduce sequence number for log events Use the sequence number to index log segments. Later commit will defer encoding/submitting log events to separate thread, log events' offsets are unknown before they are encoded. Signed-off-by: Yan, Zheng --- src/mds/LogSegment.h | 6 +++--- src/mds/MDCache.cc | 2 +- src/mds/MDLog.cc | 28 +++++++++++++++++++++++----- src/mds/MDLog.h | 15 ++++++++++----- src/mds/events/EMetaBlob.h | 6 +++--- src/mds/events/ESubtreeMap.h | 3 ++- src/mds/journal.cc | 15 ++++++++------- 7 files changed, 50 insertions(+), 25 deletions(-) diff --git a/src/mds/LogSegment.h b/src/mds/LogSegment.h index a2b8ace387c80..1df103b0514c6 100644 --- a/src/mds/LogSegment.h +++ b/src/mds/LogSegment.h @@ -35,9 +35,9 @@ struct MDSlaveUpdate; class LogSegment { public: + const uint64_t seq; uint64_t offset, end; int num_events; - uint64_t trimmable_at; // dirty items elist dirty_dirfrags, new_dirfrags; @@ -70,8 +70,8 @@ class LogSegment { void try_to_expire(MDS *mds, C_GatherBuilder &gather_bld, int op_prio); // cons - LogSegment(loff_t off) : - offset(off), end(off), num_events(0), trimmable_at(0), + LogSegment(uint64_t _seq, loff_t off=-1) : + seq(_seq), offset(off), end(off), num_events(0), dirty_dirfrags(member_offset(CDir, item_dirty)), new_dirfrags(member_offset(CDir, item_new)), dirty_inodes(member_offset(CInode, item_dirty)), diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index e3e352a80669e..80cfd8958fffc 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -6115,7 +6115,7 @@ void MDCache::truncate_inode_finish(CInode *in, LogSegment *ls) CDentry *dn = in->get_projected_parent_dn(); le->metablob.add_dir_context(dn->get_dir()); le->metablob.add_primary_dentry(dn, in, true); - le->metablob.add_truncate_finish(in->ino(), ls->offset); + le->metablob.add_truncate_finish(in->ino(), ls->seq); journal_dirty_inode(mut.get(), &le->metablob, in); mds->mdlog->submit_entry(le, new C_MDC_TruncateLogged(this, in, mut)); diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index 11019633956a2..1a7d3c99def3e 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -171,6 +171,14 @@ void MDLog::start_entry(LogEvent *e) assert(cur_event == NULL); cur_event = e; e->set_start_off(get_write_pos()); + + event_seq++; + + EMetaBlob *metablob = e->get_metablob(); + if (metablob) { + metablob->event_seq = event_seq; + metablob->last_subtree_map = get_last_segment_seq(); + } } void MDLog::cancel_entry(LogEvent *le) @@ -294,9 +302,10 @@ void MDLog::start_new_segment() void MDLog::prepare_new_segment() { - dout(7) << __func__ << " at " << journaler->get_write_pos() << dendl; + uint64_t seq = event_seq + 1; + dout(7) << __func__ << " seq " << seq << dendl; - segments[journaler->get_write_pos()] = new LogSegment(journaler->get_write_pos()); + segments[seq] = new LogSegment(seq, journaler->get_write_pos()); logger->inc(l_mdl_segadd); logger->set(l_mdl_seg, segments.size()); @@ -310,7 +319,9 @@ void MDLog::prepare_new_segment() void MDLog::journal_segment_subtree_map(Context *onsync) { dout(7) << __func__ << dendl; - submit_entry(mds->mdcache->create_subtree_map(), onsync); + ESubtreeMap *sle = mds->mdcache->create_subtree_map(); + sle->event_seq = get_last_segment_seq(); + submit_entry(sle, onsync); } void MDLog::trim(int m) @@ -430,7 +441,7 @@ void MDLog::_trim_expired_segments() logger->inc(l_mdl_segtrm); logger->inc(l_mdl_evtrm, ls->num_events); - segments.erase(ls->offset); + segments.erase(ls->seq); delete ls; trimmed = true; } @@ -835,8 +846,15 @@ void MDLog::_replay_thread() // new segment? if (le->get_type() == EVENT_SUBTREEMAP || le->get_type() == EVENT_RESETJOURNAL) { - segments[pos] = new LogSegment(pos); + ESubtreeMap *sle = dynamic_cast(le); + if (sle && sle->event_seq > 0) + event_seq = sle->event_seq; + else + event_seq = pos; + segments[event_seq] = new LogSegment(event_seq, pos); logger->set(l_mdl_seg, segments.size()); + } else { + event_seq++; } // have we seen an import map yet? diff --git a/src/mds/MDLog.h b/src/mds/MDLog.h index a04b2f8251bb5..318c6344e7dc0 100644 --- a/src/mds/MDLog.h +++ b/src/mds/MDLog.h @@ -117,6 +117,7 @@ protected: map segments; set expiring_segments; set expired_segments; + uint64_t event_seq; int expiring_events; int expired_events; @@ -126,6 +127,10 @@ protected: friend class MDCache; public: + uint64_t get_last_segment_seq() { + assert(!segments.empty()); + return segments.rbegin()->first; + } uint64_t get_last_segment_offset() { assert(!segments.empty()); return segments.rbegin()->first; @@ -168,7 +173,7 @@ public: replay_thread(this), already_replayed(false), recovery_thread(this), - expiring_events(0), expired_events(0), + event_seq(0), expiring_events(0), expired_events(0), cur_event(NULL) { } ~MDLog(); @@ -176,7 +181,7 @@ public: // -- segments -- void start_new_segment(); void prepare_new_segment(); - void journal_segment_subtree_map(Context *onsync=0); + void journal_segment_subtree_map(Context *onsync); LogSegment *peek_current_segment() { return segments.empty() ? NULL : segments.rbegin()->second; @@ -187,9 +192,9 @@ public: return segments.rbegin()->second; } - LogSegment *get_segment(uint64_t off) { - if (segments.count(off)) - return segments[off]; + LogSegment *get_segment(uint64_t seq) { + if (segments.count(seq)) + return segments[seq]; return NULL; } diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index be0021d75ac06..86889c58e282a 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -326,7 +326,7 @@ private: static void generate_test_instances(list& ls); // soft stateadd uint64_t last_subtree_map; - uint64_t my_offset; + uint64_t event_seq; // for replay, in certain cases //LogSegment *_segment; @@ -425,7 +425,7 @@ private: in = dn->get_projected_linkage()->get_inode(); // make note of where this inode was last journaled - in->last_journaled = my_offset; + in->last_journaled = event_seq; //cout << "journaling " << in->inode.ino << " at " << my_offset << std::endl; inode_t *pi = in->get_projected_inode(); @@ -478,7 +478,7 @@ private: void add_root(bool dirty, CInode *in, inode_t *pi=0, fragtree_t *pdft=0, bufferlist *psnapbl=0, map *px=0) { - in->last_journaled = my_offset; + in->last_journaled = event_seq; //cout << "journaling " << in->inode.ino << " at " << my_offset << std::endl; if (!pi) pi = in->get_projected_inode(); diff --git a/src/mds/events/ESubtreeMap.h b/src/mds/events/ESubtreeMap.h index 1349dfce91146..4dea7ecc7991e 100644 --- a/src/mds/events/ESubtreeMap.h +++ b/src/mds/events/ESubtreeMap.h @@ -24,8 +24,9 @@ public: map > subtrees; set ambiguous_subtrees; uint64_t expire_pos; + uint64_t event_seq; - ESubtreeMap() : LogEvent(EVENT_SUBTREEMAP), expire_pos(0) { } + ESubtreeMap() : LogEvent(EVENT_SUBTREEMAP), expire_pos(0), event_seq(0) { } void print(ostream& out) const { out << "ESubtreeMap " << subtrees.size() << " subtrees " diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 5a3d1045b4973..1db55d11e6ad1 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -291,10 +291,8 @@ void LogSegment::try_to_expire(MDS *mds, C_GatherBuilder &gather_bld, int op_pri // EMetaBlob EMetaBlob::EMetaBlob(MDLog *mdlog) : opened_ino(0), renamed_dirino(0), - inotablev(0), sessionmapv(0), - allocated_ino(0), - last_subtree_map(mdlog ? mdlog->get_last_segment_offset() : 0), - my_offset(mdlog ? mdlog->get_write_pos() : 0) //, _segment(0) + inotablev(0), sessionmapv(0), allocated_ino(0), + last_subtree_map(0), event_seq(0) { } void EMetaBlob::add_dir_context(CDir *dir, int mode) @@ -339,7 +337,7 @@ void EMetaBlob::add_dir_context(CDir *dir, int mode) } // was the inode journaled in this blob? - if (my_offset && diri->last_journaled == my_offset) { + if (event_seq && diri->last_journaled == event_seq) { dout(20) << "EMetaBlob::add_dir_context(" << dir << ") already have diri this blob " << *diri << dendl; break; } @@ -2415,18 +2413,19 @@ void ESlaveUpdate::replay(MDS *mds) void ESubtreeMap::encode(bufferlist& bl) const { - ENCODE_START(5, 5, bl); + ENCODE_START(6, 5, bl); ::encode(stamp, bl); ::encode(metablob, bl); ::encode(subtrees, bl); ::encode(ambiguous_subtrees, bl); ::encode(expire_pos, bl); + ::encode(event_seq, bl); ENCODE_FINISH(bl); } void ESubtreeMap::decode(bufferlist::iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(5, 5, 5, bl); + DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl); if (struct_v >= 2) ::decode(stamp, bl); ::decode(metablob, bl); @@ -2435,6 +2434,8 @@ void ESubtreeMap::decode(bufferlist::iterator &bl) ::decode(ambiguous_subtrees, bl); if (struct_v >= 3) ::decode(expire_pos, bl); + if (struct_v >= 6) + ::decode(event_seq, bl); DECODE_FINISH(bl); } -- 2.39.5