From 0aed0d48c7b22bc1a700765529a7dcfac4f0a19d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 6 Nov 2013 09:42:43 +0800 Subject: [PATCH] mds: handle cache rejoin corner case A recovering MDS may receives strong cache rejoin from a survivor, then the survivor restarts, the recovering MDS receives week cache rejoin from the same MDS. Before processing the week cache rejoin, we should scour replicas added by the obsoleted strong cache rejoin. Signed-off-by: Yan, Zheng --- src/mds/MDCache.cc | 19 +++++++++++-------- src/mds/MDCache.h | 4 ++-- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index fd129fd531950..18f57910fc5bc 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -3893,6 +3893,10 @@ void MDCache::handle_cache_rejoin_weak(MMDSCacheRejoin *weak) } else { assert(mds->is_rejoin()); + // we may have already received a strong rejoin from the sender. + rejoin_scour_survivor_replicas(from, NULL, acked_inodes, gather_locks); + assert(gather_locks.empty()); + // check cap exports. for (map >::iterator p = weak->cap_exports.begin(); p != weak->cap_exports.end(); @@ -4048,7 +4052,7 @@ void MDCache::handle_cache_rejoin_weak(MMDSCacheRejoin *weak) ack->add_inode_base(in); } - rejoin_scour_survivor_replicas(from, ack, gather_locks, acked_inodes); + rejoin_scour_survivor_replicas(from, ack, acked_inodes, gather_locks); mds->send_message(ack, weak->get_connection()); for (set::iterator p = gather_locks.begin(); p != gather_locks.end(); ++p) @@ -4200,13 +4204,11 @@ bool MDCache::parallel_fetch_traverse_dir(inodeno_t ino, filepath& path, * ack, the replica dne, and we can remove it from our replica maps. */ void MDCache::rejoin_scour_survivor_replicas(int from, MMDSCacheRejoin *ack, - set& gather_locks, - set& acked_inodes) + set& acked_inodes, + set& gather_locks) { dout(10) << "rejoin_scour_survivor_replicas from mds." << from << dendl; - // FIXME: what about root and stray inodes. - for (hash_map::iterator p = inode_map.begin(); p != inode_map.end(); ++p) { @@ -4215,7 +4217,7 @@ void MDCache::rejoin_scour_survivor_replicas(int from, MMDSCacheRejoin *ack, // inode? if (in->is_auth() && in->is_replica(from) && - acked_inodes.count(p->second->vino()) == 0) { + (ack == NULL || acked_inodes.count(p->second->vino()) == 0)) { inode_remove_replica(in, from, gather_locks); dout(10) << " rem " << *in << dendl; } @@ -4231,7 +4233,7 @@ void MDCache::rejoin_scour_survivor_replicas(int from, MMDSCacheRejoin *ack, if (dir->is_auth() && dir->is_replica(from) && - ack->strong_dirfrags.count(dir->dirfrag()) == 0) { + (ack == NULL || ack->strong_dirfrags.count(dir->dirfrag()) == 0)) { dir->remove_replica(from); dout(10) << " rem " << *dir << dendl; } @@ -4243,7 +4245,8 @@ void MDCache::rejoin_scour_survivor_replicas(int from, MMDSCacheRejoin *ack, CDentry *dn = p->second; if (dn->is_replica(from) && - (ack->strong_dentries.count(dir->dirfrag()) == 0 || + (ack == NULL || + ack->strong_dentries.count(dir->dirfrag()) == 0 || ack->strong_dentries[dir->dirfrag()].count(string_snap_t(dn->name, dn->last)) == 0)) { dentry_remove_replica(dn, from, gather_locks); dout(10) << " rem " << *dn << dendl; diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index b1037cd6ea21f..3463efcf750ee 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -435,8 +435,8 @@ protected: CDir* rejoin_invent_dirfrag(dirfrag_t df); void handle_cache_rejoin_strong(MMDSCacheRejoin *m); void rejoin_scour_survivor_replicas(int from, MMDSCacheRejoin *ack, - set& gather_locks, - set& acked_inodes); + set& acked_inodes, + set& gather_locks); void handle_cache_rejoin_ack(MMDSCacheRejoin *m); void handle_cache_rejoin_purge(MMDSCacheRejoin *m); void handle_cache_rejoin_missing(MMDSCacheRejoin *m); -- 2.39.5