From: Yan, Zheng Date: Tue, 21 Jun 2016 09:17:56 +0000 (+0800) Subject: mds: journal snap inodes that need flush when expiring log segment X-Git-Tag: v11.0.1~721^2~14 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=306153b3d012832bdfa20402077fa60a9a5d626c;p=ceph-ci.git mds: journal snap inodes that need flush when expiring log segment Treat snap inodes that need flush in the same way as open files. When MDS recovers, this make sure that journal replay bring snap inodes that need flush into the cache Signed-off-by: Yan, Zheng --- diff --git a/src/mds/CInode.h b/src/mds/CInode.h index b4d9ba0602c..b7f5125d60a 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -560,8 +560,8 @@ protected: compact_map mds_caps_wanted; // [auth] mds -> caps wanted int replica_caps_wanted; // [replica] what i've requested from auth - compact_map > client_snap_caps; // [auth] [snap] dirty metadata we still need from the head public: + compact_map > client_snap_caps; // [auth] [snap] dirty metadata we still need from the head compact_map > client_need_snapflush; void add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client); diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index ff423074349..ec3d499a6ae 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -1751,8 +1751,11 @@ void Locker::file_update_finish(CInode *in, MutationRef& mut, bool share, client } else ++p; } - if (gather) + if (gather) { + if (in->client_snap_caps.empty()) + in->item_open_file.remove_myself(); eval_cap_gather(in, &need_issue); + } } else { if (cap && (cap->wanted() & ~cap->pending()) && need_issue.count(in) == 0) { // if we won't issue below anyway diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index b71e1d4a699..55280be8684 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -1704,7 +1704,10 @@ void MDCache::journal_cow_dentry(MutationImpl *mut, EMetaBlob *metablob, CDentry *olddn = dn->dir->add_primary_dentry(dn->name, oldin, oldfirst, follows); oldin->inode.version = olddn->pre_dirty(); dout(10) << " olddn " << *olddn << dendl; - metablob->add_primary_dentry(olddn, 0, true); + bool need_snapflush = !oldin->client_snap_caps.empty(); + if (need_snapflush) + mut->ls->open_files.push_back(&oldin->item_open_file); + metablob->add_primary_dentry(olddn, 0, true, false, false, need_snapflush); mut->add_cow_dentry(olddn); } else { assert(dnl->is_remote()); @@ -5501,14 +5504,21 @@ void MDCache::clean_open_file_lists() p != mds->mdlog->segments.end(); ++p) { LogSegment *ls = p->second; - + elist::iterator q = ls->open_files.begin(member_offset(CInode, item_open_file)); while (!q.end()) { CInode *in = *q; ++q; - if (!in->is_any_caps_wanted()) { - dout(10) << " unlisting unwanted/capless inode " << *in << dendl; - in->item_open_file.remove_myself(); + if (in->last == CEPH_NOSNAP) { + if (!in->is_any_caps_wanted()) { + dout(10) << " unlisting unwanted/capless inode " << *in << dendl; + in->item_open_file.remove_myself(); + } + } else if (in->last != CEPH_NOSNAP) { + if (in->client_snap_caps.empty()) { + dout(10) << " unlisting flushed snap inode " << *in << dendl; + in->item_open_file.remove_myself(); + } } } } diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index 77f8c3c43be..777152e246a 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -62,6 +62,7 @@ public: static const int STATE_DIRTY = (1<<0); static const int STATE_DIRTYPARENT = (1<<1); static const int STATE_DIRTYPOOL = (1<<2); + static const int STATE_NEED_SNAPFLUSH = (1<<3); typedef compact_map old_inodes_t; string dn; // dentry snapid_t dnfirst, dnlast; @@ -109,6 +110,7 @@ public: bool is_dirty() const { return (state & STATE_DIRTY); } bool is_dirty_parent() const { return (state & STATE_DIRTYPARENT); } bool is_dirty_pool() const { return (state & STATE_DIRTYPOOL); } + bool need_snapflush() const { return (state & STATE_NEED_SNAPFLUSH); } void print(ostream& out) const { out << " fullbit dn " << dn << " [" << dnfirst << "," << dnlast << "] dnv " << dnv @@ -420,11 +422,13 @@ private: // return remote pointer to to-be-journaled inode void add_primary_dentry(CDentry *dn, CInode *in, bool dirty, - bool dirty_parent=false, bool dirty_pool=false) { + bool dirty_parent=false, bool dirty_pool=false, + bool need_snapflush=false) { __u8 state = 0; if (dirty) state |= fullbit::STATE_DIRTY; if (dirty_parent) state |= fullbit::STATE_DIRTYPARENT; if (dirty_pool) state |= fullbit::STATE_DIRTYPOOL; + if (need_snapflush) state |= fullbit::STATE_NEED_SNAPFLUSH; add_primary_dentry(add_dir(dn->get_dir(), false), dn, in, state); } void add_primary_dentry(dirlump& lump, CDentry *dn, CInode *in, __u8 state) { diff --git a/src/mds/events/EOpen.h b/src/mds/events/EOpen.h index c48d7350546..601652a5e95 100644 --- a/src/mds/events/EOpen.h +++ b/src/mds/events/EOpen.h @@ -22,6 +22,7 @@ class EOpen : public LogEvent { public: EMetaBlob metablob; vector inos; + vector snap_inos; EOpen() : LogEvent(EVENT_OPEN) { } explicit EOpen(MDLog *mdlog) : @@ -37,7 +38,10 @@ public: if (!in->is_base()) { metablob.add_dir_context(in->get_projected_parent_dn()->get_dir()); metablob.add_primary_dentry(in->get_projected_parent_dn(), 0, false); - inos.push_back(in->ino()); + if (in->last == CEPH_NOSNAP) + inos.push_back(in->ino()); + else + snap_inos.push_back(in->vino()); } } void add_ino(inodeno_t ino) { diff --git a/src/mds/journal.cc b/src/mds/journal.cc index ccd8950dedd..3954b612b08 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -147,7 +147,7 @@ void LogSegment::try_to_expire(MDSRank *mds, MDSGatherBuilder &gather_bld, int o assert(g_conf->mds_kill_journal_expire_at != 2); - // open files + // open files and snap inodes if (!open_files.empty()) { assert(!mds->mdlog->is_capped()); // hmm FIXME EOpen *le = 0; @@ -156,9 +156,9 @@ void LogSegment::try_to_expire(MDSRank *mds, MDSGatherBuilder &gather_bld, int o elist::iterator p = open_files.begin(member_offset(CInode, item_open_file)); while (!p.end()) { CInode *in = *p; - assert(in->last == CEPH_NOSNAP); ++p; - if (in->is_auth() && !in->is_ambiguous_auth() && in->is_any_caps()) { + if (in->last == CEPH_NOSNAP && in->is_auth() && + !in->is_ambiguous_auth() && in->is_any_caps()) { if (in->is_any_caps_wanted()) { dout(20) << "try_to_expire requeueing open file " << *in << dendl; if (!le) { @@ -172,6 +172,15 @@ void LogSegment::try_to_expire(MDSRank *mds, MDSGatherBuilder &gather_bld, int o dout(20) << "try_to_expire not requeueing and delisting unwanted file " << *in << dendl; in->item_open_file.remove_myself(); } + } else if (in->last != CEPH_NOSNAP && !in->client_snap_caps.empty()) { + // journal snap inodes that need flush. This simplify the mds failover hanlding + dout(20) << "try_to_expire requeueing snap needflush inode " << *in << dendl; + if (!le) { + le = new EOpen(mds->mdlog); + mds->mdlog->start_entry(le); + } + le->add_clean_inode(in); + ls->open_files.push_back(&in->item_open_file); } else { /* * we can get a capless inode here if we replay an open file, the client fails to @@ -1332,6 +1341,8 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) in->_mark_dirty(logseg); if (p->is_dirty_parent()) in->_mark_dirty_parent(logseg, p->is_dirty_pool()); + if (p->need_snapflush()) + logseg->open_files.push_back(&in->item_open_file); if (dn->is_auth()) in->state_set(CInode::STATE_AUTH); else @@ -2120,10 +2131,11 @@ void EUpdate::replay(MDSRank *mds) // EOpen void EOpen::encode(bufferlist &bl, uint64_t features) const { - ENCODE_START(3, 3, bl); + ENCODE_START(4, 3, bl); ::encode(stamp, bl); ::encode(metablob, bl, features); ::encode(inos, bl); + ::encode(snap_inos, bl); ENCODE_FINISH(bl); } @@ -2133,6 +2145,8 @@ void EOpen::decode(bufferlist::iterator &bl) { ::decode(stamp, bl); ::decode(metablob, bl); ::decode(inos, bl); + if (struct_v >= 4) + ::decode(snap_inos, bl); DECODE_FINISH(bl); } @@ -2167,12 +2181,18 @@ void EOpen::replay(MDSRank *mds) metablob.replay(mds, _segment); // note which segments inodes belong to, so we don't have to start rejournaling them - for (vector::iterator p = inos.begin(); - p != inos.end(); - ++p) { - CInode *in = mds->mdcache->get_inode(*p); + for (const auto &ino : inos) { + CInode *in = mds->mdcache->get_inode(ino); + if (!in) { + dout(0) << "EOpen.replay ino " << ino << " not in metablob" << dendl; + assert(in); + } + _segment->open_files.push_back(&in->item_open_file); + } + for (const auto &vino : snap_inos) { + CInode *in = mds->mdcache->get_inode(vino); if (!in) { - dout(0) << "EOpen.replay ino " << *p << " not in metablob" << dendl; + dout(0) << "EOpen.replay ino " << vino << " not in metablob" << dendl; assert(in); } _segment->open_files.push_back(&in->item_open_file);