From: Yan, Zheng Date: Wed, 19 Mar 2014 11:56:26 +0000 (+0800) Subject: mds: handle interaction between slave rollback and fragmenting X-Git-Tag: v0.79~52^2~21 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6963a8f9cb8392a091e3bbfadc0e4c6e76d9206e;p=ceph.git mds: handle interaction between slave rollback and fragmenting For slave rename and rmdir events, the MDS needs to preserve non-auth dirfrag where the renamed inode originally lives in until slave commit event is encountered. Current method to handle this is use MDCache:: uncommitted_slave_rename_olddir to track any non-auth dirfrag that need to be preserved. This method does not works well if any preserved dirfrag gets fragmented by log event (such as ESubtreeMap) between the slave prepare event and the slave commit event. The fix is tracking inode of dirfrag instead of tracking dirfrag that need to preserved directly. Signed-off-by: Yan, Zheng --- diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index a26633a44d21..92361ff29628 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -3163,7 +3163,7 @@ void MDCache::add_uncommitted_slave_update(metareqid_t reqid, int master, MDSlav { assert(uncommitted_slave_updates[master].count(reqid) == 0); uncommitted_slave_updates[master][reqid] = su; - for(set::iterator p = su->olddirs.begin(); p != su->olddirs.end(); ++p) + for(set::iterator p = su->olddirs.begin(); p != su->olddirs.end(); ++p) uncommitted_slave_rename_olddir[*p]++; for(set::iterator p = su->unlinked.begin(); p != su->unlinked.end(); ++p) uncommitted_slave_unlink[*p]++; @@ -3178,25 +3178,38 @@ void MDCache::finish_uncommitted_slave_update(metareqid_t reqid, int master) if (uncommitted_slave_updates[master].empty()) uncommitted_slave_updates.erase(master); // discard the non-auth subtree we renamed out of - for(set::iterator p = su->olddirs.begin(); p != su->olddirs.end(); ++p) { - CDir *dir = *p; - uncommitted_slave_rename_olddir[dir]--; - if (uncommitted_slave_rename_olddir[dir] == 0) { - uncommitted_slave_rename_olddir.erase(dir); - CDir *root = get_subtree_root(dir); - if (root->get_dir_auth() == CDIR_AUTH_UNDEF) - try_trim_non_auth_subtree(root); - } + for(set::iterator p = su->olddirs.begin(); p != su->olddirs.end(); ++p) { + CInode *diri = *p; + map::iterator it = uncommitted_slave_rename_olddir.find(diri); + assert(it != uncommitted_slave_rename_olddir.end()); + it->second--; + if (it->second == 0) { + uncommitted_slave_rename_olddir.erase(it); + list ls; + diri->get_dirfrags(ls); + for (list::iterator q = ls.begin(); q != ls.end(); ++q) { + CDir *root = get_subtree_root(*q); + if (root->get_dir_auth() == CDIR_AUTH_UNDEF) { + try_trim_non_auth_subtree(root); + if (*q != root) + break; + } + } + } else + assert(it->second > 0); } // removed the inodes that were unlinked by slave update for(set::iterator p = su->unlinked.begin(); p != su->unlinked.end(); ++p) { CInode *in = *p; - uncommitted_slave_unlink[in]--; - if (uncommitted_slave_unlink[in] == 0) { - uncommitted_slave_unlink.erase(in); + map::iterator it = uncommitted_slave_unlink.find(in); + assert(it != uncommitted_slave_unlink.end()); + it->second--; + if (it->second == 0) { + uncommitted_slave_unlink.erase(it); if (!in->get_projected_parent_dn()) mds->mdcache->remove_inode_recursive(in); - } + } else + assert(it->second > 0); } delete su; } @@ -6673,7 +6686,7 @@ bool MDCache::trim_non_auth_subtree(CDir *dir) { dout(10) << "trim_non_auth_subtree(" << dir << ") " << *dir << dendl; - if (uncommitted_slave_rename_olddir.count(dir) || // preserve the dir for rollback + if (uncommitted_slave_rename_olddir.count(dir->inode) || // preserve the dir for rollback my_ambiguous_imports.count(dir->dirfrag())) return true; @@ -6694,7 +6707,7 @@ bool MDCache::trim_non_auth_subtree(CDir *dir) for (list::iterator subdir = subdirs.begin(); subdir != subdirs.end(); ++subdir) { - if (uncommitted_slave_rename_olddir.count(*subdir) || // preserve the dir for rollback + if (uncommitted_slave_rename_olddir.count((*subdir)->inode) || // preserve the dir for rollback my_ambiguous_imports.count((*subdir)->dirfrag()) || (*subdir)->is_subtree_root()) { keep_inode = true; diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 49c609511a98..d6b81795604c 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -317,7 +317,7 @@ protected: map > > other_ambiguous_imports; map > uncommitted_slave_updates; // slave: for replay. - map uncommitted_slave_rename_olddir; // slave: preserve the non-auth dir until seeing commit. + map uncommitted_slave_rename_olddir; // slave: preserve the non-auth dir until seeing commit. map uncommitted_slave_unlink; // slave: preserve the unlinked inode until seeing commit. // track master requests whose slaves haven't acknowledged commit @@ -617,6 +617,13 @@ public: return NULL; return in->get_dirfrag(df.frag); } + CDir* get_dirfrag(inodeno_t ino, const string& dn) { + CInode *in = get_inode(ino); + if (!in) + return NULL; + frag_t fg = in->pick_dirfrag(dn); + return in->get_dirfrag(fg); + } CDir* get_force_dirfrag(dirfrag_t df) { CInode *diri = get_inode(df.ino); if (!diri) diff --git a/src/mds/Mutation.h b/src/mds/Mutation.h index bc60a776597d..92750bef2f40 100644 --- a/src/mds/Mutation.h +++ b/src/mds/Mutation.h @@ -328,7 +328,7 @@ struct MDSlaveUpdate { bufferlist rollback; elist::item item; Context *waiter; - set olddirs; + set olddirs; set unlinked; MDSlaveUpdate(int oo, bufferlist &rbl, elist &list) : origop(oo), diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 7416319be0e1..47af825736ed 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -5304,6 +5304,8 @@ void Server::do_rmdir_rollback(bufferlist &rbl, int master, MDRequest *mdr) assert(mdr || mds->is_resolve()); CDir *dir = mds->mdcache->get_dirfrag(rollback.src_dir); + if (!dir) + dir = mds->mdcache->get_dirfrag(rollback.src_dir.ino, rollback.src_dname); assert(dir); CDentry *dn = dir->lookup(rollback.src_dname); assert(dn); @@ -6928,6 +6930,8 @@ void Server::do_rename_rollback(bufferlist &rbl, int master, MDRequest *mdr, CDentry *srcdn = NULL; CDir *srcdir = mds->mdcache->get_dirfrag(rollback.orig_src.dirfrag); + if (!srcdir) + srcdir = mds->mdcache->get_dirfrag(rollback.orig_src.dirfrag.ino, rollback.orig_src.dname); if (srcdir) { dout(10) << " srcdir " << *srcdir << dendl; srcdn = srcdir->lookup(rollback.orig_src.dname); @@ -6941,6 +6945,8 @@ void Server::do_rename_rollback(bufferlist &rbl, int master, MDRequest *mdr, CDentry *destdn = NULL; CDir *destdir = mds->mdcache->get_dirfrag(rollback.orig_dest.dirfrag); + if (!destdir) + destdir = mds->mdcache->get_dirfrag(rollback.orig_dest.dirfrag.ino, rollback.orig_dest.dname); if (destdir) { dout(10) << " destdir " << *destdir << dendl; destdn = destdir->lookup(rollback.orig_dest.dname); diff --git a/src/mds/journal.cc b/src/mds/journal.cc index c7550e746758..0c6f6694a231 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -1208,7 +1208,7 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) assert(dir); // preserve subtree bound until slave commit if (dir->get_dir_auth() == CDIR_AUTH_UNDEF) - slaveup->olddirs.insert(dir); + slaveup->olddirs.insert(dir->inode); } } @@ -1218,7 +1218,7 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) CDir *root = mds->mdcache->get_subtree_root(olddir); if (root->get_dir_auth() == CDIR_AUTH_UNDEF) { if (slaveup) // preserve the old dir until slave commit - slaveup->olddirs.insert(olddir); + slaveup->olddirs.insert(olddir->inode); else mds->mdcache->try_trim_non_auth_subtree(root); }