From: Yan, Zheng Date: Thu, 7 Mar 2019 08:30:32 +0000 (+0800) Subject: mds: avoid revoking Fsx from loner during directory fragmentation X-Git-Tag: v15.0.0~203^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e68f1acb695a6b94ddd799b06a472bdf93d71813;p=ceph.git mds: avoid revoking Fsx from loner during directory fragmentation MDS wrlock filelock during directory fragmentation and subtree migration. The purpose of the wrlock is preventing scatter gather process. MDS does not modify directory contents after filelock is locked. MDS can pretend that wrlock is being acquired by loner. So it has a higher change to get the wrlock. Signed-off-by: "Yan, Zheng" --- diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 5239dd270f0..976c21b0bfb 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -301,10 +301,11 @@ bool Locker::acquire_locks(MDRequestRef& mdr, } } else if (p.is_wrlock()) { dout(20) << " must wrlock " << *lock << " " << *object << dendl; + client_t _client = p.is_state_pin() ? lock->get_excl_client() : client; if (object->is_auth()) { mustpin.insert(object); } else if (!object->is_auth() && - !lock->can_wrlock(client) && // we might have to request a scatter + !lock->can_wrlock(_client) && // we might have to request a scatter !mdr->is_slave()) { // if we are slave (remote_wrlock), the master already authpinned dout(15) << " will also auth_pin " << *object << " in case we need to request a scatter" << dendl; @@ -547,7 +548,8 @@ bool Locker::acquire_locks(MDRequestRef& mdr, } if (need_wrlock) { marker.message = "failed to wrlock, waiting"; - if (need_remote_wrlock && !p.lock->can_wrlock(mdr->get_client())) { + client_t _client = p.is_state_pin() ? p.lock->get_excl_client() : client; + if (need_remote_wrlock && !p.lock->can_wrlock(_client)) { marker.message = "failed to wrlock, dropping remote wrlock and waiting"; // can't take the wrlock because the scatter lock is gathering. need to // release the remote wrlock, so that the gathering process can finish. @@ -1447,8 +1449,9 @@ void Locker::wrlock_force(SimpleLock *lock, MutationRef& mut) mut->locks.emplace(lock, MutationImpl::LockOp::WRLOCK); } -bool Locker::wrlock_start(SimpleLock *lock, MDRequestRef& mut, bool nowait) +bool Locker::wrlock_start(const MutationImpl::LockOp &op, MDRequestRef& mut, bool nowait) { + SimpleLock *lock = op.lock; if (lock->get_type() == CEPH_LOCK_IVERSION || lock->get_type() == CEPH_LOCK_DVERSION) return local_wrlock_start(static_cast(lock), mut); @@ -1456,7 +1459,7 @@ bool Locker::wrlock_start(SimpleLock *lock, MDRequestRef& mut, bool nowait) dout(10) << "wrlock_start " << *lock << " on " << *lock->get_parent() << dendl; CInode *in = static_cast(lock->get_parent()); - client_t client = mut->get_client(); + client_t client = op.is_state_pin() ? lock->get_excl_client() : mut->get_client(); bool want_scatter = !nowait && lock->get_parent()->is_auth() && (in->has_subtree_or_exporting_dirfrag() || static_cast(lock)->get_scatter_wanted()); diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 47b5b779edd..de35c954a67 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -110,7 +110,7 @@ public: void rdlock_take_set(MutationImpl::LockOpVec& lov, MutationRef& mut); void wrlock_force(SimpleLock *lock, MutationRef& mut); - bool wrlock_start(SimpleLock *lock, MDRequestRef& mut, bool nowait=false); + bool wrlock_start(const MutationImpl::LockOp &op, MDRequestRef& mut, bool nowait=false); void wrlock_finish(const MutationImpl::lock_iterator& it, MutationImpl *mut, bool *pneed_issue); void remote_wrlock_start(SimpleLock *lock, mds_rank_t target, MDRequestRef& mut); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index a2f9a84c42b..903e724e0bf 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -11489,8 +11489,8 @@ void MDCache::dispatch_fragment_dir(MDRequestRef& mdr) MutationImpl::LockOpVec lov; lov.add_wrlock(&diri->dirfragtreelock); // prevent a racing gather on any other scatterlocks too - lov.add_wrlock(&diri->nestlock); - lov.add_wrlock(&diri->filelock); + lov.lock_scatter_gather(&diri->nestlock); + lov.lock_scatter_gather(&diri->filelock); if (!mds->locker->acquire_locks(mdr, lov, NULL, true)) if (!mdr->aborted) return; diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 847e2e0d47e..180fb99b2de 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -1072,8 +1072,8 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count) // are not auth MDS of the subtree root at the time they receive the // lock messages. So the auth MDS of the subtree root inode may get no // or duplicated fragstat/neststat for the subtree root dirfrag. - lov.add_wrlock(&dir->get_inode()->filelock); - lov.add_wrlock(&dir->get_inode()->nestlock); + lov.lock_scatter_gather(&dir->get_inode()->filelock); + lov.lock_scatter_gather(&dir->get_inode()->nestlock); if (dir->get_inode()->is_auth()) { dir->get_inode()->filelock.set_scatter_wanted(); dir->get_inode()->nestlock.set_scatter_wanted(); @@ -1265,8 +1265,9 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid) get_export_lock_set(dir, lov); if ((diri->is_auth() && diri->is_frozen()) || !mds->locker->can_rdlock_set(lov) || - !diri->filelock.can_wrlock(-1) || - !diri->nestlock.can_wrlock(-1)) { + // for pinning scatter gather. loner has a higher chance to get wrlock + !diri->filelock.can_wrlock(diri->get_loner()) || + !diri->nestlock.can_wrlock(diri->get_loner())) { dout(7) << "export_dir couldn't acquire all needed locks, failing. " << *dir << dendl; export_try_cancel(dir); @@ -2550,8 +2551,9 @@ void Migrator::handle_export_prep(const MExportDirPrep::const_ref &m, bool did_a dout(7) << " all ready, noting auth and freezing import region" << dendl; if (!mds->mdcache->is_readonly() && - diri->filelock.can_wrlock(-1) && - diri->nestlock.can_wrlock(-1)) { + // for pinning scatter gather. loner has a higher chance to get wrlock + diri->filelock.can_wrlock(diri->get_loner()) && + diri->nestlock.can_wrlock(diri->get_loner())) { it->second.mut = new MutationImpl(); // force some locks. hacky. mds->locker->wrlock_force(&dir->inode->filelock, it->second.mut); diff --git a/src/mds/Mutation.h b/src/mds/Mutation.h index fa5d3c715ec..3e12e570096 100644 --- a/src/mds/Mutation.h +++ b/src/mds/Mutation.h @@ -67,6 +67,7 @@ public: WRLOCK = 2, XLOCK = 4, REMOTE_WRLOCK = 8, + STATE_PIN = 16, // no RW after locked, just pin lock state }; SimpleLock* lock; mutable unsigned flags; @@ -85,6 +86,7 @@ public: flags &= ~REMOTE_WRLOCK; wrlock_target = MDS_RANK_NONE; } + bool is_state_pin() const { return !!(flags & STATE_PIN); } }; struct LockOpVec : public vector { @@ -102,6 +104,9 @@ public: ceph_assert(rank != MDS_RANK_NONE); emplace_back(lock, LockOp::REMOTE_WRLOCK, rank); } + void lock_scatter_gather(SimpleLock *lock) { + emplace_back(lock, LockOp::WRLOCK | LockOp::STATE_PIN); + } void sort_and_merge(); LockOpVec() {