From: Yan, Zheng Date: Fri, 10 Jan 2014 02:58:41 +0000 (+0800) Subject: mds: allow acquiring wrlock and remote wrlock at the same time X-Git-Tag: v0.78~165^2~31 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d0df8413fb8d567ecc846f7653d410c097b053cb;p=ceph.git mds: allow acquiring wrlock and remote wrlock at the same time Rename may move file from one dirfrag to another dirfrag of the same directory inode. If two dirfrags belong to different auth MDS, both MDS should hold wrlocks on filelock/nestlock of the directory inode. If a lock is in both wrlocks list and remote_wrlocks list, current Locker::acquire_locks() only acquires the local wrlock. The auth MDS of the source dirfrag doesn't have the wrlock, so slave request of the operation may modify the dirfrag after fragstat/neststat of the dirfrag have already been gathered. It corrupts the dirstat/neststat accounting. Signed-off-by: Yan, Zheng --- diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 4976ef3e8051..e00bddcf1e8a 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -368,6 +368,8 @@ bool Locker::acquire_locks(MDRequest *mdr, for (set::iterator p = sorted.begin(); p != sorted.end(); ++p) { + bool need_wrlock = !!wrlocks.count(*p); + bool need_remote_wrlock = !!(remote_wrlocks && remote_wrlocks->count(*p)); // already locked? if (existing != mdr->locks.end() && *existing == *p) { @@ -378,21 +380,23 @@ bool Locker::acquire_locks(MDRequest *mdr, dout(10) << " already xlocked " << *have << " " << *have->get_parent() << dendl; continue; } - if (wrlocks.count(have) && mdr->wrlocks.count(have)) { - dout(10) << " already wrlocked " << *have << " " << *have->get_parent() << dendl; - continue; + if (mdr->remote_wrlocks.count(have)) { + if (!need_remote_wrlock || + mdr->remote_wrlocks[have] != (*remote_wrlocks)[have]) { + dout(10) << " unlocking remote_wrlock on wrong mds." << mdr->remote_wrlocks[have] + << " " << *have << " " << *have->get_parent() << dendl; + remote_wrlock_finish(have, mdr->remote_wrlocks[have], mdr); + } } - if (remote_wrlocks && remote_wrlocks->count(have) && - mdr->remote_wrlocks.count(have)) { - if (mdr->remote_wrlocks[have] == (*remote_wrlocks)[have]) { - dout(10) << " already remote_wrlocked " << *have << " " << *have->get_parent() << dendl; + if (need_wrlock || need_remote_wrlock) { + if (need_wrlock == !!mdr->wrlocks.count(have) && + need_remote_wrlock == !!mdr->remote_wrlocks.count(have)) { + if (need_wrlock) + dout(10) << " already wrlocked " << *have << " " << *have->get_parent() << dendl; + if (need_remote_wrlock) + dout(10) << " already remote_wrlocked " << *have << " " << *have->get_parent() << dendl; continue; } - dout(10) << " unlocking remote_wrlock on wrong mds." << mdr->remote_wrlocks[have] - << " (want mds." << (*remote_wrlocks)[have] << ") " - << *have << " " << *have->get_parent() << dendl; - remote_wrlock_finish(have, mdr->remote_wrlocks[have], mdr); - // continue... } if (rdlocks.count(have) && mdr->rdlocks.count(have)) { dout(10) << " already rdlocked " << *have << " " << *have->get_parent() << dendl; @@ -401,19 +405,37 @@ bool Locker::acquire_locks(MDRequest *mdr, } // hose any stray locks + if (*existing == *p) { + assert(need_wrlock || need_remote_wrlock); + SimpleLock *lock = *existing; + if (mdr->wrlocks.count(lock)) { + if (!need_wrlock) + dout(10) << " unlocking extra " << *lock << " " << *lock->get_parent() << dendl; + else if (need_remote_wrlock) // acquire remote_wrlock first + dout(10) << " unlocking out-of-order " << *lock << " " << *lock->get_parent() << dendl; + bool need_issue = false; + wrlock_finish(lock, mdr, &need_issue); + if (need_issue) + issue_set.insert(static_cast(lock->get_parent())); + } + ++existing; + } while (existing != mdr->locks.end()) { SimpleLock *stray = *existing; ++existing; dout(10) << " unlocking out-of-order " << *stray << " " << *stray->get_parent() << dendl; bool need_issue = false; - if (mdr->xlocks.count(stray)) + if (mdr->xlocks.count(stray)) { xlock_finish(stray, mdr, &need_issue); - else if (mdr->wrlocks.count(stray)) - wrlock_finish(stray, mdr, &need_issue); - else if (mdr->remote_wrlocks.count(stray)) - remote_wrlock_finish(stray, mdr->remote_wrlocks[stray], mdr); - else + } else if (mdr->rdlocks.count(stray)) { rdlock_finish(stray, mdr, &need_issue); + } else { + // may have acquired both wrlock and remore wrlock + if (mdr->wrlocks.count(stray)) + wrlock_finish(stray, mdr, &need_issue); + if (mdr->remote_wrlocks.count(stray)) + remote_wrlock_finish(stray, mdr->remote_wrlocks[stray], mdr); + } if (need_issue) issue_set.insert(static_cast(stray->get_parent())); } @@ -426,13 +448,23 @@ bool Locker::acquire_locks(MDRequest *mdr, if (!xlock_start(*p, mdr)) goto out; dout(10) << " got xlock on " << **p << " " << *(*p)->get_parent() << dendl; - } else if (wrlocks.count(*p)) { - if (!wrlock_start(*p, mdr)) + } else if (need_wrlock || need_remote_wrlock) { + if (need_remote_wrlock && !mdr->remote_wrlocks.count(*p)) { + remote_wrlock_start(*p, (*remote_wrlocks)[*p], mdr); goto out; - dout(10) << " got wrlock on " << **p << " " << *(*p)->get_parent() << dendl; - } else if (remote_wrlocks && remote_wrlocks->count(*p)) { - remote_wrlock_start(*p, (*remote_wrlocks)[*p], mdr); - goto out; + } + if (need_wrlock && !mdr->wrlocks.count(*p)) { + if (need_remote_wrlock && !(*p)->can_wrlock(mdr->get_client())) { + // can't take the wrlock because the scatter lock is gathering. need to + // release the remote wrlock, so that the gathering process can finish. + remote_wrlock_finish(*p, mdr->remote_wrlocks[*p], mdr); + remote_wrlock_start(*p, (*remote_wrlocks)[*p], mdr); + goto out; + } + if (!wrlock_start(*p, mdr)) + goto out; + dout(10) << " got wrlock on " << **p << " " << *(*p)->get_parent() << dendl; + } } else { if (!rdlock_start(*p, mdr)) goto out; @@ -446,14 +478,17 @@ bool Locker::acquire_locks(MDRequest *mdr, ++existing; dout(10) << " unlocking extra " << *stray << " " << *stray->get_parent() << dendl; bool need_issue = false; - if (mdr->xlocks.count(stray)) + if (mdr->xlocks.count(stray)) { xlock_finish(stray, mdr, &need_issue); - else if (mdr->wrlocks.count(stray)) - wrlock_finish(stray, mdr, &need_issue); - else if (mdr->remote_wrlocks.count(stray)) - remote_wrlock_finish(stray, mdr->remote_wrlocks[stray], mdr); - else + } else if (mdr->rdlocks.count(stray)) { rdlock_finish(stray, mdr, &need_issue); + } else { + // may have acquired both wrlock and remore wrlock + if (mdr->wrlocks.count(stray)) + wrlock_finish(stray, mdr, &need_issue); + if (mdr->remote_wrlocks.count(stray)) + remote_wrlock_finish(stray, mdr->remote_wrlocks[stray], mdr); + } if (need_issue) issue_set.insert(static_cast(stray->get_parent())); } @@ -513,9 +548,11 @@ void Locker::_drop_non_rdlocks(Mutation *mut, set *pneed_issue) } while (!mut->remote_wrlocks.empty()) { - slaves.insert(mut->remote_wrlocks.begin()->second); - mut->locks.erase(mut->remote_wrlocks.begin()->first); - mut->remote_wrlocks.erase(mut->remote_wrlocks.begin()); + map::iterator p = mut->remote_wrlocks.begin(); + slaves.insert(p->second); + if (mut->wrlocks.count(p->first) == 0) + mut->locks.erase(p->first); + mut->remote_wrlocks.erase(p); } while (!mut->wrlocks.empty()) { @@ -1260,15 +1297,16 @@ bool Locker::wrlock_start(SimpleLock *lock, MDRequest *mut, bool nowait) dout(10) << "wrlock_start " << *lock << " on " << *lock->get_parent() << dendl; - bool want_scatter = lock->get_parent()->is_auth() && - (static_cast(lock->get_parent()))->has_subtree_root_dirfrag(); - CInode *in = static_cast(lock->get_parent()); client_t client = mut->get_client(); - + bool want_scatter = !nowait && lock->get_parent()->is_auth() && + (in->has_subtree_root_dirfrag() || + static_cast(lock)->get_scatter_wanted()); + while (1) { // wrlock? - if (lock->can_wrlock(client)) { + if (lock->can_wrlock(client) && + (!want_scatter || lock->get_state() == LOCK_MIX)) { lock->get_wrlock(); mut->wrlocks.insert(lock); mut->locks.insert(lock); @@ -1325,7 +1363,8 @@ void Locker::wrlock_finish(SimpleLock *lock, Mutation *mut, bool *pneed_issue) lock->put_wrlock(); if (mut) { mut->wrlocks.erase(lock); - mut->locks.erase(lock); + if (mut->remote_wrlocks.count(lock) == 0) + mut->locks.erase(lock); } if (!lock->is_wrlocked()) { @@ -1368,7 +1407,8 @@ void Locker::remote_wrlock_finish(SimpleLock *lock, int target, Mutation *mut) { // drop ref mut->remote_wrlocks.erase(lock); - mut->locks.erase(lock); + if (mut->wrlocks.count(lock) == 0) + mut->locks.erase(lock); dout(7) << "remote_wrlock_finish releasing remote wrlock on mds." << target << " " << *lock->get_parent() << dendl; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 167a85e75f8e..d1c4356cf62a 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -1916,8 +1916,7 @@ void MDCache::predirty_journal_parents(Mutation *mut, EMetaBlob *blob, pf->version = parent->pre_dirty(); if (do_parent_mtime || linkunlink) { - assert(mut->wrlocks.count(&pin->filelock) || - mut->is_slave()); // we are slave. master will have wrlocked the dir. + assert(mut->wrlocks.count(&pin->filelock)); assert(cfollows == CEPH_NOSNAP); // update stale fragstat?