MDS wrlock filelock during directory fragmentation and subtree migration.
The purpose of the wrlock is preventing scatter gather process. MDS does
not modify directory contents after filelock is locked. MDS can pretend
that wrlock is being acquired by loner. So it has a higher change to get
the wrlock.
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
}
} else if (p.is_wrlock()) {
dout(20) << " must wrlock " << *lock << " " << *object << dendl;
+ client_t _client = p.is_state_pin() ? lock->get_excl_client() : client;
if (object->is_auth()) {
mustpin.insert(object);
} else if (!object->is_auth() &&
- !lock->can_wrlock(client) && // we might have to request a scatter
+ !lock->can_wrlock(_client) && // we might have to request a scatter
!mdr->is_slave()) { // if we are slave (remote_wrlock), the master already authpinned
dout(15) << " will also auth_pin " << *object
<< " in case we need to request a scatter" << dendl;
}
if (need_wrlock) {
marker.message = "failed to wrlock, waiting";
- if (need_remote_wrlock && !p.lock->can_wrlock(mdr->get_client())) {
+ client_t _client = p.is_state_pin() ? p.lock->get_excl_client() : client;
+ if (need_remote_wrlock && !p.lock->can_wrlock(_client)) {
marker.message = "failed to wrlock, dropping remote wrlock and waiting";
// can't take the wrlock because the scatter lock is gathering. need to
// release the remote wrlock, so that the gathering process can finish.
mut->locks.emplace(lock, MutationImpl::LockOp::WRLOCK);
}
-bool Locker::wrlock_start(SimpleLock *lock, MDRequestRef& mut, bool nowait)
+bool Locker::wrlock_start(const MutationImpl::LockOp &op, MDRequestRef& mut, bool nowait)
{
+ SimpleLock *lock = op.lock;
if (lock->get_type() == CEPH_LOCK_IVERSION ||
lock->get_type() == CEPH_LOCK_DVERSION)
return local_wrlock_start(static_cast<LocalLock*>(lock), mut);
dout(10) << "wrlock_start " << *lock << " on " << *lock->get_parent() << dendl;
CInode *in = static_cast<CInode *>(lock->get_parent());
- client_t client = mut->get_client();
+ client_t client = op.is_state_pin() ? lock->get_excl_client() : mut->get_client();
bool want_scatter = !nowait && lock->get_parent()->is_auth() &&
(in->has_subtree_or_exporting_dirfrag() ||
static_cast<ScatterLock*>(lock)->get_scatter_wanted());
void rdlock_take_set(MutationImpl::LockOpVec& lov, MutationRef& mut);
void wrlock_force(SimpleLock *lock, MutationRef& mut);
- bool wrlock_start(SimpleLock *lock, MDRequestRef& mut, bool nowait=false);
+ bool wrlock_start(const MutationImpl::LockOp &op, MDRequestRef& mut, bool nowait=false);
void wrlock_finish(const MutationImpl::lock_iterator& it, MutationImpl *mut, bool *pneed_issue);
void remote_wrlock_start(SimpleLock *lock, mds_rank_t target, MDRequestRef& mut);
MutationImpl::LockOpVec lov;
lov.add_wrlock(&diri->dirfragtreelock);
// prevent a racing gather on any other scatterlocks too
- lov.add_wrlock(&diri->nestlock);
- lov.add_wrlock(&diri->filelock);
+ lov.lock_scatter_gather(&diri->nestlock);
+ lov.lock_scatter_gather(&diri->filelock);
if (!mds->locker->acquire_locks(mdr, lov, NULL, true))
if (!mdr->aborted)
return;
// are not auth MDS of the subtree root at the time they receive the
// lock messages. So the auth MDS of the subtree root inode may get no
// or duplicated fragstat/neststat for the subtree root dirfrag.
- lov.add_wrlock(&dir->get_inode()->filelock);
- lov.add_wrlock(&dir->get_inode()->nestlock);
+ lov.lock_scatter_gather(&dir->get_inode()->filelock);
+ lov.lock_scatter_gather(&dir->get_inode()->nestlock);
if (dir->get_inode()->is_auth()) {
dir->get_inode()->filelock.set_scatter_wanted();
dir->get_inode()->nestlock.set_scatter_wanted();
get_export_lock_set(dir, lov);
if ((diri->is_auth() && diri->is_frozen()) ||
!mds->locker->can_rdlock_set(lov) ||
- !diri->filelock.can_wrlock(-1) ||
- !diri->nestlock.can_wrlock(-1)) {
+ // for pinning scatter gather. loner has a higher chance to get wrlock
+ !diri->filelock.can_wrlock(diri->get_loner()) ||
+ !diri->nestlock.can_wrlock(diri->get_loner())) {
dout(7) << "export_dir couldn't acquire all needed locks, failing. "
<< *dir << dendl;
export_try_cancel(dir);
dout(7) << " all ready, noting auth and freezing import region" << dendl;
if (!mds->mdcache->is_readonly() &&
- diri->filelock.can_wrlock(-1) &&
- diri->nestlock.can_wrlock(-1)) {
+ // for pinning scatter gather. loner has a higher chance to get wrlock
+ diri->filelock.can_wrlock(diri->get_loner()) &&
+ diri->nestlock.can_wrlock(diri->get_loner())) {
it->second.mut = new MutationImpl();
// force some locks. hacky.
mds->locker->wrlock_force(&dir->inode->filelock, it->second.mut);
WRLOCK = 2,
XLOCK = 4,
REMOTE_WRLOCK = 8,
+ STATE_PIN = 16, // no RW after locked, just pin lock state
};
SimpleLock* lock;
mutable unsigned flags;
flags &= ~REMOTE_WRLOCK;
wrlock_target = MDS_RANK_NONE;
}
+ bool is_state_pin() const { return !!(flags & STATE_PIN); }
};
struct LockOpVec : public vector<LockOp> {
ceph_assert(rank != MDS_RANK_NONE);
emplace_back(lock, LockOp::REMOTE_WRLOCK, rank);
}
+ void lock_scatter_gather(SimpleLock *lock) {
+ emplace_back(lock, LockOp::WRLOCK | LockOp::STATE_PIN);
+ }
void sort_and_merge();
LockOpVec() {