From: Yan, Zheng Date: Fri, 11 Aug 2017 09:52:19 +0000 (+0800) Subject: mds: make mksnap/setlayout wait for unsafe requests on other mds X-Git-Tag: v12.2.3~153^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ebd7a6670fbb9724bf72f7b0587e5c18ade03a48;p=ceph.git mds: make mksnap/setlayout wait for unsafe requests on other mds This guarantees replayed unsafe requests (on other mds) and mksnap/setlayout get processed in proper order. Signed-off-by: "Yan, Zheng" (cherry picked from commit dee3711bd745704fba224dc6f90e88c04cacff58) --- diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index a0ccf96016be..42b9be6624d5 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -229,13 +229,51 @@ bool Locker::acquire_locks(MDRequestRef& mdr, // xlocks for (set::iterator p = xlocks.begin(); p != xlocks.end(); ++p) { - dout(20) << " must xlock " << **p << " " << *(*p)->get_parent() << dendl; - sorted.insert(*p); - mustpin.insert((*p)->get_parent()); + SimpleLock *lock = *p; + + if ((lock->get_type() == CEPH_LOCK_ISNAP || + lock->get_type() == CEPH_LOCK_IPOLICY) && + mds->is_cluster_degraded() && + mdr->is_master() && + !mdr->is_replay()) { + // waiting for recovering mds, to guarantee replayed requests and mksnap/setlayout + // get processed in proper order. + bool wait = false; + if (lock->get_parent()->is_auth()) { + if (!mdr->locks.count(lock)) { + set ls; + lock->get_parent()->list_replicas(ls); + for (auto m : ls) { + if (mds->mdsmap->get_state(m) < MDSMap::STATE_ACTIVE) { + wait = true; + break; + } + } + } + } else { + // if the lock is the latest locked one, it's possible that slave mds got the lock + // while there are recovering mds. + if (!mdr->locks.count(lock) || lock == *mdr->locks.rbegin()) + wait = true; + } + if (wait) { + dout(10) << " must xlock " << *lock << " " << *lock->get_parent() + << ", waiting for cluster recovered" << dendl; + mds->locker->drop_locks(mdr.get(), NULL); + mdr->drop_local_auth_pins(); + mds->wait_for_cluster_recovered(new C_MDS_RetryRequest(mdcache, mdr)); + return false; + } + } + + dout(20) << " must xlock " << *lock << " " << *lock->get_parent() << dendl; + + sorted.insert(lock); + mustpin.insert(lock->get_parent()); // augment xlock with a versionlock? if ((*p)->get_type() == CEPH_LOCK_DN) { - CDentry *dn = (CDentry*)(*p)->get_parent(); + CDentry *dn = (CDentry*)lock->get_parent(); if (!dn->is_auth()) continue; @@ -252,9 +290,9 @@ bool Locker::acquire_locks(MDRequestRef& mdr, sorted.insert(&dn->versionlock); } } - if ((*p)->get_type() > CEPH_LOCK_IVERSION) { + if (lock->get_type() > CEPH_LOCK_IVERSION) { // inode version lock? - CInode *in = (CInode*)(*p)->get_parent(); + CInode *in = (CInode*)lock->get_parent(); if (!in->is_auth()) continue; if (mdr->is_master()) { @@ -762,19 +800,26 @@ void Locker::drop_non_rdlocks(MutationImpl *mut, set *pneed_issue) issue_caps_set(*pneed_issue); } -void Locker::drop_rdlocks(MutationImpl *mut, set *pneed_issue) +void Locker::drop_rdlocks_for_early_reply(MutationImpl *mut) { - set my_need_issue; - if (!pneed_issue) - pneed_issue = &my_need_issue; + set need_issue; - _drop_rdlocks(mut, pneed_issue); + for (auto p = mut->rdlocks.begin(); p != mut->rdlocks.end(); ) { + SimpleLock *lock = *p; + ++p; + // make later mksnap/setlayout (at other mds) wait for this unsafe request + if (lock->get_type() == CEPH_LOCK_ISNAP || + lock->get_type() == CEPH_LOCK_IPOLICY) + continue; + bool ni = false; + rdlock_finish(lock, mut, &ni); + if (ni) + need_issue.insert(static_cast(lock->get_parent())); + } - if (pneed_issue == &my_need_issue) - issue_caps_set(*pneed_issue); + issue_caps_set(need_issue); } - // generics void Locker::eval_gather(SimpleLock *lock, bool first, bool *pneed_issue, list *pfinishers) diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 032d5ae83e66..4f47344dbea0 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -85,7 +85,7 @@ public: void drop_locks(MutationImpl *mut, set *pneed_issue=0); void set_xlocks_done(MutationImpl *mut, bool skip_dentry=false); void drop_non_rdlocks(MutationImpl *mut, set *pneed_issue=0); - void drop_rdlocks(MutationImpl *mut, set *pneed_issue=0); + void drop_rdlocks_for_early_reply(MutationImpl *mut); void eval_gather(SimpleLock *lock, bool first=false, bool *need_issue=0, list *pfinishers=0); void eval(SimpleLock *lock, bool *need_issue); diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 38c44523e95c..b2eb35402936 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -1172,7 +1172,7 @@ void Server::journal_and_reply(MDRequestRef& mdr, CInode *in, CDentry *dn, LogEv mdlog->flush(); } } else if (mdr->did_early_reply) - mds->locker->drop_rdlocks(mdr.get()); + mds->locker->drop_rdlocks_for_early_reply(mdr.get()); else mdlog->flush(); }