From a4ed7ea8b8e384ea3c18da06c1be6eab6f5f9966 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 12 Mar 2013 16:51:53 +0800 Subject: [PATCH] mds: send lock action message when auth MDS is in proper state. For rejoining object, don't send lock ACK message because lock states are still uncertain. The lock ACK may confuse object's auth MDS and trigger assertion. If object's auth MDS is not active, just skip sending NUDGE, REQRDLOCK and REQSCATTER messages. MDCache::handle_mds_recovery() will take care of them. Also defer caps release message until clientreplay or active Signed-off-by: Yan, Zheng Reviewed-by: Greg Farnum --- src/mds/Locker.cc | 46 ++++++++++++++++++++++++++++++---------------- src/mds/MDCache.cc | 13 +++++++++++-- 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index bed6d2ad63b04..eab7ed686bc1c 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -658,6 +658,13 @@ void Locker::eval_gather(SimpleLock *lock, bool first, bool *pneed_issue, listget_parent()->authority().first; + if (lock->get_parent()->is_rejoining() && + mds->mdsmap->get_state(auth) == MDSMap::STATE_REJOIN) { + dout(7) << "eval_gather finished gather, but still rejoining " + << *lock->get_parent() << dendl; + return; + } + if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN) { switch (lock->get_state()) { case LOCK_SYNC_LOCK: @@ -1050,9 +1057,11 @@ bool Locker::_rdlock_kick(SimpleLock *lock, bool as_anon) } else { // request rdlock state change from auth int auth = lock->get_parent()->authority().first; - dout(10) << "requesting rdlock from auth on " - << *lock << " on " << *lock->get_parent() << dendl; - mds->send_message_mds(new MLock(lock, LOCK_AC_REQRDLOCK, mds->get_nodeid()), auth); + if (mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) { + dout(10) << "requesting rdlock from auth on " + << *lock << " on " << *lock->get_parent() << dendl; + mds->send_message_mds(new MLock(lock, LOCK_AC_REQRDLOCK, mds->get_nodeid()), auth); + } return false; } } @@ -1272,9 +1281,11 @@ bool Locker::wrlock_start(SimpleLock *lock, MDRequest *mut, bool nowait) // replica. // auth should be auth_pinned (see acquire_locks wrlock weird mustpin case). int auth = lock->get_parent()->authority().first; - dout(10) << "requesting scatter from auth on " - << *lock << " on " << *lock->get_parent() << dendl; - mds->send_message_mds(new MLock(lock, LOCK_AC_REQSCATTER, mds->get_nodeid()), auth); + if (mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) { + dout(10) << "requesting scatter from auth on " + << *lock << " on " << *lock->get_parent() << dendl; + mds->send_message_mds(new MLock(lock, LOCK_AC_REQSCATTER, mds->get_nodeid()), auth); + } break; } } @@ -1899,13 +1910,19 @@ void Locker::request_inode_file_caps(CInode *in) } int auth = in->authority().first; + if (in->is_rejoining() && + mds->mdsmap->get_state(auth) == MDSMap::STATE_REJOIN) { + mds->wait_for_active_peer(auth, new C_MDL_RequestInodeFileCaps(this, in)); + return; + } + dout(7) << "request_inode_file_caps " << ccap_string(wanted) << " was " << ccap_string(in->replica_caps_wanted) << " on " << *in << " to mds." << auth << dendl; in->replica_caps_wanted = wanted; - if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN) + if (mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) mds->send_message_mds(new MInodeFileCaps(in->ino(), in->replica_caps_wanted), auth); } @@ -1924,14 +1941,6 @@ void Locker::handle_inode_file_caps(MInodeFileCaps *m) assert(in); assert(in->is_auth()); - if (mds->is_rejoin() && - in->is_rejoining()) { - dout(7) << "handle_inode_file_caps still rejoining " << *in << ", dropping " << *m << dendl; - m->put(); - return; - } - - dout(7) << "handle_inode_file_caps replica mds." << from << " wants caps " << ccap_string(m->get_caps()) << " on " << *in << dendl; if (m->get_caps()) @@ -2850,6 +2859,11 @@ void Locker::handle_client_cap_release(MClientCapRelease *m) client_t client = m->get_source().num(); dout(10) << "handle_client_cap_release " << *m << dendl; + if (!mds->is_clientreplay() && !mds->is_active() && !mds->is_stopping()) { + mds->wait_for_replay(new C_MDS_RetryMessage(mds, m)); + return; + } + for (vector::iterator p = m->caps.begin(); p != m->caps.end(); ++p) { inodeno_t ino((uint64_t)p->ino); CInode *in = mdcache->get_inode(ino); @@ -3859,7 +3873,7 @@ void Locker::scatter_nudge(ScatterLock *lock, Context *c, bool forcelockchange) << *lock << " on " << *p << dendl; // request unscatter? int auth = lock->get_parent()->authority().first; - if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_ACTIVE) + if (mds->mdsmap->is_clientreplay_or_active_or_stopping(auth)) mds->send_message_mds(new MLock(lock, LOCK_AC_NUDGE, mds->get_nodeid()), auth); // wait... diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 10254a4f7dfc3..9e1d80f096243 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -3325,8 +3325,10 @@ void MDCache::recalc_auth_bits() if (root) { root->inode_auth.first = mds->mdsmap->get_root(); - if (mds->whoami != root->inode_auth.first) + if (mds->whoami != root->inode_auth.first) { root->state_clear(CInode::STATE_AUTH); + root->state_set(CInode::STATE_REJOINING); + } } set subtree_inodes; @@ -3340,8 +3342,10 @@ void MDCache::recalc_auth_bits() ++p) { CInode *inode = p->first->get_inode(); - if (inode->is_mdsdir() && inode->ino() != MDS_INO_MDSDIR(mds->get_nodeid())) + if (inode->is_mdsdir() && inode->ino() != MDS_INO_MDSDIR(mds->get_nodeid())) { inode->state_clear(CInode::STATE_AUTH); + inode->state_set(CInode::STATE_REJOINING); + } list dfq; // dirfrag queue dfq.push_back(p->first); @@ -3546,6 +3550,7 @@ void MDCache::rejoin_send_rejoins() root->filelock.get_state(), root->nestlock.get_state(), root->dirfragtreelock.get_state()); + root->state_set(CInode::STATE_REJOINING); if (root->is_dirty_scattered()) { dout(10) << " sending scatterlock state on root " << *root << dendl; p->second->add_scatterlock_state(root); @@ -3559,6 +3564,7 @@ void MDCache::rejoin_send_rejoins() in->filelock.get_state(), in->nestlock.get_state(), in->dirfragtreelock.get_state()); + in->state_set(CInode::STATE_REJOINING); } } } @@ -3698,6 +3704,7 @@ void MDCache::rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin) // STRONG dout(15) << " add_strong_dirfrag " << *dir << dendl; rejoin->add_strong_dirfrag(dir->dirfrag(), dir->get_replica_nonce(), dir->get_dir_rep()); + dir->state_set(CDir::STATE_REJOINING); for (CDir::map_t::iterator p = dir->items.begin(); p != dir->items.end(); @@ -3711,6 +3718,7 @@ void MDCache::rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin) dnl->is_remote() ? dnl->get_remote_d_type():0, dn->get_replica_nonce(), dn->lock.get_state()); + dn->state_set(CDentry::STATE_REJOINING); if (dnl->is_primary()) { CInode *in = dnl->get_inode(); dout(15) << " add_strong_inode " << *in << dendl; @@ -3720,6 +3728,7 @@ void MDCache::rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin) in->filelock.get_state(), in->nestlock.get_state(), in->dirfragtreelock.get_state()); + in->state_set(CInode::STATE_REJOINING); in->get_nested_dirfrags(nested); if (in->is_dirty_scattered()) { dout(10) << " sending scatterlock state on " << *in << dendl; -- 2.39.5