From 43c43a5331d848cb6ace1bd24464bb839fc7eca7 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Sep 2019 11:36:57 +0800 Subject: [PATCH] mds: take snaplock and policylock during path traverse. To take locks in top-down order for a MDRequest, we need to first take snap/policy rdlocks on ancestor inodes of the request's base inode. It's not convenient to use Locker::acquire_locks() to do the job because path to request's base inode can change before all of these locks are rdlocked. This patch introduces Locker::try_rdlock_snap_layout(), which tries taking snap/policy rdlocks on request's base inode and its ancestors all at the same time. MDCache::path_traverse() calls this function at first, then uses Locker::acquire_locks() to take snaplock on components of request's path. This patch also reorders inode locks, put snaplock and policy at the front. Because some requests (such as setattr) may xlock other locks after taking snaplock/policylock. Signed-off-by: "Yan, Zheng" --- src/include/ceph_fs.h | 28 +++++---- src/mds/Locker.cc | 91 ++++++++++++++++------------ src/mds/Locker.h | 7 +-- src/mds/MDCache.cc | 47 ++++++++++----- src/mds/MDCache.h | 4 +- src/mds/Migrator.cc | 11 +++- src/mds/Mutation.h | 12 +++- src/mds/Server.cc | 137 +++++++++++++++++++++--------------------- src/mds/Server.h | 7 +-- src/mds/SimpleLock.h | 3 +- 10 files changed, 196 insertions(+), 151 deletions(-) diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index d5b56e0df80..c941e12d4b4 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -318,19 +318,21 @@ extern const char *ceph_mds_state_name(int s); * - they also define the lock ordering by the MDS * - a few of these are internal to the mds */ -#define CEPH_LOCK_DVERSION 1 -#define CEPH_LOCK_DN 2 -#define CEPH_LOCK_IVERSION 16 /* mds internal */ -#define CEPH_LOCK_ISNAP 32 -#define CEPH_LOCK_IFILE 64 -#define CEPH_LOCK_IAUTH 128 -#define CEPH_LOCK_ILINK 256 -#define CEPH_LOCK_IDFT 512 /* dir frag tree */ -#define CEPH_LOCK_INEST 1024 /* mds internal */ -#define CEPH_LOCK_IXATTR 2048 -#define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ -#define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ -#define CEPH_LOCK_IPOLICY 16384 /* policy lock on dirs. MDS internal */ +#define CEPH_LOCK_DN (1 << 0) +#define CEPH_LOCK_DVERSION (1 << 1) +#define CEPH_LOCK_ISNAP (1 << 4) /* snapshot lock. MDS internal */ +#define CEPH_LOCK_IPOLICY (1 << 5) /* policy lock on dirs. MDS internal */ +#define CEPH_LOCK_IFILE (1 << 6) +#define CEPH_LOCK_INEST (1 << 7) /* mds internal */ +#define CEPH_LOCK_IDFT (1 << 8) /* dir frag tree */ +#define CEPH_LOCK_IAUTH (1 << 9) +#define CEPH_LOCK_ILINK (1 << 10) +#define CEPH_LOCK_IXATTR (1 << 11) +#define CEPH_LOCK_IFLOCK (1 << 12) /* advisory file locks */ +#define CEPH_LOCK_IVERSION (1 << 13) /* mds internal */ + +#define CEPH_LOCK_IFIRST CEPH_LOCK_ISNAP + /* client_session ops */ enum { diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 6f97169d8f5..3b33e9fbcd8 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -136,42 +136,62 @@ void Locker::send_lock_message(SimpleLock *lock, int msg, const bufferlist &data } } - - - -void Locker::include_snap_rdlocks(CInode *in, MutationImpl::LockOpVec& lov) +bool Locker::try_rdlock_snap_layout(CInode *in, MDRequestRef& mdr, + int n, bool want_layout) { + dout(10) << __func__ << " " << *mdr << " " << *in << dendl; // rdlock ancestor snaps - CInode *t = in; - while (t->get_projected_parent_dn()) { - t = t->get_projected_parent_dn()->get_dir()->get_inode(); - lov.add_rdlock(&t->snaplock); - } - lov.add_rdlock(&in->snaplock); -} + bool found_locked = false; + bool found_layout = false; + + if (want_layout) + ceph_assert(n == 0); + + client_t client = mdr->get_client(); -void Locker::include_snap_rdlocks_wlayout(CInode *in, MutationImpl::LockOpVec& lov, - file_layout_t **layout) -{ - //rdlock ancestor snaps CInode *t = in; - lov.add_rdlock(&in->snaplock); - lov.add_rdlock(&in->policylock); - bool found_layout = false; - while (t) { - lov.add_rdlock(&t->snaplock); - if (!found_layout) { - lov.add_rdlock(&t->policylock); + while (true) { + if (!found_locked && mdr->is_rdlocked(&t->snaplock)) + found_locked = true; + + if (!found_locked) { + if (!t->snaplock.can_rdlock(client)) { + t->snaplock.add_waiter(SimpleLock::WAIT_RD, new C_MDS_RetryRequest(mdcache, mdr)); + goto failed; + } + t->snaplock.get_rdlock(); + mdr->locks.emplace(&t->snaplock, MutationImpl::LockOp::RDLOCK); + dout(20) << " got rdlock on " << t->snaplock << " " << *t << dendl; + } + if (want_layout && !found_layout) { + if (!mdr->is_rdlocked(&t->policylock)) { + if (!t->policylock.can_rdlock(client)) { + t->policylock.add_waiter(SimpleLock::WAIT_RD, new C_MDS_RetryRequest(mdcache, mdr)); + goto failed; + } + t->policylock.get_rdlock(); + mdr->locks.emplace(&t->policylock, MutationImpl::LockOp::RDLOCK); + dout(20) << " got rdlock on " << t->policylock << " " << *t << dendl; + } if (t->get_projected_inode()->has_layout()) { - *layout = &t->get_projected_inode()->layout; - found_layout = true; + mdr->dir_layout = t->get_projected_inode()->layout; + found_layout = true; } } - if (t->get_projected_parent_dn() && - t->get_projected_parent_dn()->get_dir()) - t = t->get_projected_parent_dn()->get_dir()->get_inode(); - else t = NULL; + CDentry* pdn = t->get_projected_parent_dn(); + if (!pdn) + break; + t = pdn->get_dir()->get_inode(); } + + return true; + +failed: + dout(10) << __func__ << " failed" << dendl; + + drop_locks(mdr.get(), nullptr); + mdr->drop_local_auth_pins(); + return false; } struct MarkEventOnDestruct { @@ -195,11 +215,6 @@ bool Locker::acquire_locks(MDRequestRef& mdr, CInode *auth_pin_freeze, bool auth_pin_nonblocking) { - if (mdr->done_locking && - !mdr->is_slave()) { // not on slaves! master requests locks piecemeal. - dout(10) << "acquire_locks " << *mdr << " - done locking" << dendl; - return true; // at least we had better be! - } dout(10) << "acquire_locks " << *mdr << dendl; MarkEventOnDestruct marker(mdr, "failed to acquire_locks"); @@ -270,7 +285,7 @@ bool Locker::acquire_locks(MDRequestRef& mdr, lov.add_xlock(&dn->versionlock, i + 1); } } - if (lock->get_type() > CEPH_LOCK_IVERSION) { + if (lock->get_type() >= CEPH_LOCK_IFIRST && lock->get_type() != CEPH_LOCK_IVERSION) { // inode version lock? CInode *in = static_cast(object); if (!in->is_auth()) @@ -527,7 +542,6 @@ bool Locker::acquire_locks(MDRequestRef& mdr, } } - mdr->done_locking = true; mdr->set_mds_stamp(ceph_clock_now()); result = true; marker.message = "acquired locks"; @@ -663,7 +677,7 @@ void Locker::drop_locks(MutationImpl *mut, set *pneed_issue) if (pneed_issue == &my_need_issue) issue_caps_set(*pneed_issue); - mut->done_locking = false; + mut->locking_state = 0; } void Locker::drop_non_rdlocks(MutationImpl *mut, set *pneed_issue) @@ -1097,6 +1111,7 @@ void Locker::try_eval(SimpleLock *lock, bool *pneed_issue) if (lock->get_type() != CEPH_LOCK_DN && lock->get_type() != CEPH_LOCK_ISNAP && + lock->get_type() != CEPH_LOCK_IPOLICY && p->is_freezing()) { dout(7) << "try_eval " << *lock << " freezing, waiting on " << *p << dendl; p->add_waiter(MDSCacheObject::WAIT_UNFREEZE, new C_Locker_Eval(this, p, lock->get_type())); @@ -1623,6 +1638,7 @@ void Locker::_finish_xlock(SimpleLock *lock, client_t xlocker, bool *pneed_issue ceph_assert(!lock->is_stable()); if (lock->get_type() != CEPH_LOCK_DN && lock->get_type() != CEPH_LOCK_ISNAP && + lock->get_type() != CEPH_LOCK_IPOLICY && lock->get_num_rdlocks() == 0 && lock->get_num_wrlocks() == 0 && !lock->is_leased() && @@ -4060,7 +4076,8 @@ void Locker::simple_eval(SimpleLock *lock, bool *need_issue) if (lock->get_parent()->is_freezing_or_frozen()) { // dentry/snap lock in unreadable state can block path traverse if ((lock->get_type() != CEPH_LOCK_DN && - lock->get_type() != CEPH_LOCK_ISNAP) || + lock->get_type() != CEPH_LOCK_ISNAP && + lock->get_type() != CEPH_LOCK_IPOLICY) || lock->get_state() == LOCK_SYNC || lock->get_parent()->is_frozen()) return; diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 53fb4fcbb67..c824fab81f6 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -51,15 +51,14 @@ public: void nudge_log(SimpleLock *lock); - void include_snap_rdlocks(CInode *in, MutationImpl::LockOpVec& lov); - void include_snap_rdlocks_wlayout(CInode *in, MutationImpl::LockOpVec& lov, - file_layout_t **layout); - bool acquire_locks(MDRequestRef& mdr, MutationImpl::LockOpVec& lov, CInode *auth_pin_freeze=NULL, bool auth_pin_nonblocking=false); + bool try_rdlock_snap_layout(CInode *in, MDRequestRef& mdr, + int n=0, bool want_layout=false); + void notify_freeze_waiter(MDSCacheObject *o); void cancel_locking(MutationImpl *mut, std::set *pneed_issue); void drop_locks(MutationImpl *mut, std::set *pneed_issue=0); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 0e7f8a6b733..06fed62cd36 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -8069,6 +8069,7 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf, bool last_xlocked = (flags & MDS_TRAVERSE_LAST_XLOCKED); bool want_dentry = (flags & MDS_TRAVERSE_WANT_DENTRY); bool want_auth = (flags & MDS_TRAVERSE_WANT_AUTH); + bool rdlock_snap = (flags & (MDS_TRAVERSE_RDLOCK_SNAP | MDS_TRAVERSE_RDLOCK_SNAP2)); if (forward) ceph_assert(mdr); // forward requires a request @@ -8101,12 +8102,24 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf, return 1; } + if (rdlock_snap) { + int n = (flags & MDS_TRAVERSE_RDLOCK_SNAP2) ? 1 : 0; + if ((n == 0 && !(mdr->locking_state & MutationImpl::SNAP_LOCKED)) || + (n == 1 && !(mdr->locking_state & MutationImpl::SNAP2_LOCKED))) { + bool want_layout = (flags & MDS_TRAVERSE_WANT_DIRLAYOUT); + if (!mds->locker->try_rdlock_snap_layout(cur, mdr, n, want_layout)) + return 1; + } + } + // start trace if (pdnvec) pdnvec->clear(); if (pin) *pin = cur; + MutationImpl::LockOpVec lov; + unsigned depth = 0; while (depth < path.depth()) { dout(12) << "traverse: path seg depth " << depth << " '" << path[depth] @@ -8135,18 +8148,8 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf, snapid = realm->resolve_snapname(path[depth], cur->ino()); dout(10) << "traverse: snap " << path[depth] << " -> " << snapid << dendl; if (!snapid) { - CInode *t = cur; - while (t) { - // if snaplock isn't readable, it's possible that other mds is creating - // snapshot, but snap update message hasn't been received. - if (!t->snaplock.can_read(client)) { - dout(10) << " non-readable snaplock on " << *t << dendl; - t->snaplock.add_waiter(SimpleLock::WAIT_RD, cf.build()); - return 1; - } - CDentry *pdn = t->get_projected_parent_dn(); - t = pdn ? pdn->get_dir()->get_inode() : NULL; - } + if (pdnvec) + pdnvec->clear(); // do not confuse likes of rdlock_path_pin_ref(); return -ENOENT; } mdr->snapid = snapid; @@ -8278,6 +8281,15 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf, return 1; } + if (rdlock_snap && !(want_dentry && depth == path.depth() - 1)) { + lov.clear(); + lov.add_rdlock(&cur->snaplock); + if (!mds->locker->acquire_locks(mdr, lov)) { + dout(10) << "traverse: failed to rdlock " << cur->snaplock << " " << *cur << dendl; + return 1; + } + } + // add to trace, continue. touch_inode(cur); if (pin) @@ -8400,6 +8412,12 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf, dout(10) << "path_traverse finish on snapid " << snapid << dendl; if (mdr) ceph_assert(mdr->snapid == snapid); + + if (flags & MDS_TRAVERSE_RDLOCK_SNAP) + mdr->locking_state |= MutationImpl::SNAP_LOCKED; + else if (flags & MDS_TRAVERSE_RDLOCK_SNAP2) + mdr->locking_state |= MutationImpl::SNAP2_LOCKED; + return 0; } @@ -9563,7 +9581,7 @@ void MDCache::request_kill(MDRequestRef& mdr) // rollback slave requests is tricky. just let the request proceed. if (mdr->has_more() && (!mdr->more()->witnessed.empty() || !mdr->more()->waiting_on_slave.empty())) { - if (!mdr->done_locking) { + if (!(mdr->locking_state & MutationImpl::ALL_LOCKED)) { ceph_assert(mdr->more()->witnessed.empty()); mdr->aborted = true; dout(10) << "request_kill " << *mdr << " -- waiting for slave reply, delaying" << dendl; @@ -12906,10 +12924,7 @@ void MDCache::upgrade_inode_snaprealm_work(MDRequestRef& mdr) } MutationImpl::LockOpVec lov; - mds->locker->include_snap_rdlocks(in, lov); - lov.erase_rdlock(&in->snaplock); lov.add_xlock(&in->snaplock); - if (!mds->locker->acquire_locks(mdr, lov)) return; diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 5cf1e35db64..6963208a594 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -115,7 +115,9 @@ static const int MDS_TRAVERSE_DISCOVER = (1 << 0); static const int MDS_TRAVERSE_LAST_XLOCKED = (1 << 1); static const int MDS_TRAVERSE_WANT_DENTRY = (1 << 2); static const int MDS_TRAVERSE_WANT_AUTH = (1 << 3); - +static const int MDS_TRAVERSE_RDLOCK_SNAP = (1 << 4); +static const int MDS_TRAVERSE_RDLOCK_SNAP2 = (1 << 5); +static const int MDS_TRAVERSE_WANT_DIRLAYOUT = (1 << 6); // flags for predirty_journal_parents() static const int PREDIRTY_PRIMARY = 1; // primary dn, adjust nested accounting diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 3b2a51dcb3b..9ab9824527e 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -3158,6 +3158,8 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp, map >& peer_exports, list& updated_scatterlocks) { + CInode *in; + bool added = false; DECODE_START(1, blp); dout(15) << __func__ << " on " << *dn << dendl; @@ -3166,8 +3168,7 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp, decode(ino, blp); decode(last, blp); - bool added = false; - CInode *in = cache->get_inode(ino, last); + in = cache->get_inode(ino, last); if (!in) { in = new CInode(mds->mdcache, true, 1, last); added = true; @@ -3179,6 +3180,8 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp, // caps decode_import_inode_caps(in, true, blp, peer_exports); + DECODE_FINISH(blp); + // link before state -- or not! -sage if (dn->get_linkage()->get_inode() != in) { ceph_assert(!dn->get_linkage()->get_inode()); @@ -3221,7 +3224,9 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp, in->snaplock.get_state() != LOCK_SYNC) mds->locker->try_eval(&in->snaplock, NULL); - DECODE_FINISH(blp); + if (in->policylock.is_stable() && + in->policylock.get_state() != LOCK_SYNC) + mds->locker->try_eval(&in->policylock, NULL); } void Migrator::decode_import_inode_caps(CInode *in, bool auth_cap, diff --git a/src/mds/Mutation.h b/src/mds/Mutation.h index 316fb445201..456779a1752 100644 --- a/src/mds/Mutation.h +++ b/src/mds/Mutation.h @@ -170,7 +170,14 @@ public: // if this flag is set, do not attempt to acquire further locks. // (useful for wrlock, which may be a moving auth target) - bool done_locking = false; + enum { + SNAP_LOCKED = 1, + SNAP2_LOCKED = 2, + PATH_LOCKED = 4, + ALL_LOCKED = 8, + }; + int locking_state = 0; + bool committing = false; bool aborted = false; bool killed = false; @@ -278,10 +285,11 @@ struct MDRequestImpl : public MutationImpl { // -- i am a client (master) request cref_t client_request; // client request (if any) + file_layout_t dir_layout; // store up to two sets of dn vectors, inode pointers, for request path1 and path2. vector dn[2]; - CDentry *straydn; CInode *in[2]; + CDentry *straydn; snapid_t snapid; CInode *tracei; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index e05344ccf45..3c883225d4f 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3110,7 +3110,6 @@ CDentry* Server::prepare_stray_dentry(MDRequestRef& mdr, CInode *in) if (straydn->get_name() == straydname) return straydn; - ceph_assert(!mdr->done_locking); mdr->unpin(straydn); } @@ -3306,13 +3305,13 @@ CInode* Server::rdlock_path_pin_ref(MDRequestRef& mdr, int n, bool want_auth, bool no_want_auth, /* for readdir, who doesn't want auth _even_if_ it's a snapped dir */ - file_layout_t **layout, + bool want_layout, bool no_lookup) // true if we cannot return a null dentry lease { const filepath& refpath = n ? mdr->get_filepath2() : mdr->get_filepath(); dout(10) << "rdlock_path_pin_ref " << *mdr << " " << refpath << dendl; - if (mdr->done_locking) + if (mdr->locking_state & MutationImpl::PATH_LOCKED) return mdr->in[n]; if (!no_want_auth && refpath.is_last_snap()) @@ -3320,9 +3319,11 @@ CInode* Server::rdlock_path_pin_ref(MDRequestRef& mdr, int n, // traverse CF_MDS_MDRContextFactory cf(mdcache, mdr); - int flags = 0; + int flags = n == 0 ? MDS_TRAVERSE_RDLOCK_SNAP : MDS_TRAVERSE_RDLOCK_SNAP2; if (want_auth) flags |= MDS_TRAVERSE_WANT_AUTH; + if (want_layout) + flags |= MDS_TRAVERSE_WANT_DIRLAYOUT; int r = mdcache->path_traverse(mdr, cf, refpath, flags, &mdr->dn[n], &mdr->in[n]); if (r > 0) return nullptr; // delayed @@ -3376,10 +3377,6 @@ CInode* Server::rdlock_path_pin_ref(MDRequestRef& mdr, int n, for (int i=0; i<(int)mdr->dn[n].size(); i++) lov.add_rdlock(&mdr->dn[n][i]->lock); - if (layout) - mds->locker->include_snap_rdlocks_wlayout(ref, lov, layout); - else - mds->locker->include_snap_rdlocks(ref, lov); // set and pin ref mdr->pin(ref); @@ -3396,16 +3393,22 @@ CInode* Server::rdlock_path_pin_ref(MDRequestRef& mdr, int n, CDentry* Server::rdlock_path_xlock_dentry(MDRequestRef& mdr, int n, MutationImpl::LockOpVec& lov, bool okexist, bool alwaysxlock, - file_layout_t **layout) + bool want_layout) { const filepath& refpath = n ? mdr->get_filepath2() : mdr->get_filepath(); dout(10) << "rdlock_path_xlock_dentry " << *mdr << " " << refpath << dendl; - if (mdr->done_locking) + if (mdr->locking_state & MutationImpl::PATH_LOCKED) return mdr->dn[n].back(); CF_MDS_MDRContextFactory cf(mdcache, mdr); int flags = MDS_TRAVERSE_WANT_DENTRY | MDS_TRAVERSE_WANT_AUTH; + if (n == 0) + flags |= MDS_TRAVERSE_RDLOCK_SNAP; + else + flags |= MDS_TRAVERSE_RDLOCK_SNAP2; + if (want_layout) + flags |= MDS_TRAVERSE_WANT_DIRLAYOUT; int r = mdcache->path_traverse(mdr, cf, refpath, flags, &mdr->dn[n]); if (r > 0) return nullptr; // delayed @@ -3466,10 +3469,6 @@ CDentry* Server::rdlock_path_xlock_dentry(MDRequestRef& mdr, int n, lov.add_rdlock(&dn->lock); // existing dn, rdlock lov.add_wrlock(&dn->get_dir()->inode->filelock); // also, wrlock on dir mtime lov.add_wrlock(&dn->get_dir()->inode->nestlock); // also, wrlock on dir mtime - if (layout) - mds->locker->include_snap_rdlocks_wlayout(dn->get_dir()->inode, lov, layout); - else - mds->locker->include_snap_rdlocks(dn->get_dir()->inode, lov); return dn; } @@ -3607,7 +3606,7 @@ void Server::handle_client_getattr(MDRequestRef& mdr, bool is_lookup) ref->filelock.get_num_wrlocks() > 0 || !ref->filelock.can_read(mdr->get_client())) { lov.add_rdlock(&ref->filelock); - mdr->done_locking = false; + mdr->locking_state &= ~MutationImpl::ALL_LOCKED; } } @@ -3865,7 +3864,7 @@ void Server::handle_client_open(MDRequestRef& mdr) if (cur->is_frozen() || cur->state_test(CInode::STATE_EXPORTINGCAPS)) { ceph_assert(!need_auth); - mdr->done_locking = false; + mdr->locking_state &= ~MutationImpl::PATH_LOCKED; CInode *cur = rdlock_path_pin_ref(mdr, 0, lov, true); if (!cur) return; @@ -4097,9 +4096,7 @@ void Server::handle_client_openc(MDRequestRef& mdr) } MutationImpl::LockOpVec lov; - file_layout_t *dir_layout = nullptr; - CDentry *dn = rdlock_path_xlock_dentry(mdr, 0, lov, - !excl, false, &dir_layout); + CDentry *dn = rdlock_path_xlock_dentry(mdr, 0, lov, !excl, false, true); if (!dn) return; if (mdr->snapid != CEPH_NOSNAP) { respond_to_request(mdr, -EROFS); @@ -4107,8 +4104,8 @@ void Server::handle_client_openc(MDRequestRef& mdr) } // set layout file_layout_t layout; - if (dir_layout) - layout = *dir_layout; + if (mdr->dir_layout != file_layout_t()) + layout = mdr->dir_layout; else layout = mdcache->default_file_layout; @@ -4982,8 +4979,7 @@ void Server::handle_client_setdirlayout(MDRequestRef& mdr) { const cref_t &req = mdr->client_request; MutationImpl::LockOpVec lov; - file_layout_t *dir_layout = nullptr; - CInode *cur = rdlock_path_pin_ref(mdr, 0, lov, true, false, &dir_layout); + CInode *cur = rdlock_path_pin_ref(mdr, 0, lov, true, false, true); if (!cur) return; if (mdr->snapid != CEPH_NOSNAP) { @@ -5005,8 +5001,8 @@ void Server::handle_client_setdirlayout(MDRequestRef& mdr) file_layout_t layout; if (old_pi->has_layout()) layout = old_pi->layout; - else if (dir_layout) - layout = *dir_layout; + else if (mdr->dir_layout != file_layout_t()) + layout = mdr->dir_layout; else layout = mdcache->default_file_layout; @@ -5264,7 +5260,6 @@ int Server::check_layout_vxattr(MDRequestRef& mdr, } void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur, - file_layout_t *dir_layout, MutationImpl::LockOpVec& lov) { const cref_t &req = mdr->client_request; @@ -5293,8 +5288,8 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur, file_layout_t layout; if (cur->get_projected_inode()->has_layout()) layout = cur->get_projected_inode()->layout; - else if (dir_layout) - layout = *dir_layout; + else if (mdr->dir_layout != file_layout_t()) + layout = mdr->dir_layout; else layout = mdcache->default_file_layout; @@ -5424,7 +5419,6 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur, } void Server::handle_remove_vxattr(MDRequestRef& mdr, CInode *cur, - file_layout_t *dir_layout, MutationImpl::LockOpVec& lov) { const cref_t &req = mdr->client_request; @@ -5473,7 +5467,7 @@ void Server::handle_remove_vxattr(MDRequestRef& mdr, CInode *cur, // null/none value (empty string, means default layout). Is equivalent // to a setxattr with empty string: pass through the empty payload of // the rmxattr request to do this. - handle_set_vxattr(mdr, cur, dir_layout, lov); + handle_set_vxattr(mdr, cur, lov); return; } @@ -5507,9 +5501,8 @@ void Server::handle_client_setxattr(MDRequestRef& mdr) MutationImpl::LockOpVec lov; CInode *cur; - file_layout_t *dir_layout = NULL; if (name.compare(0, 15, "ceph.dir.layout") == 0) - cur = rdlock_path_pin_ref(mdr, 0, lov, true, false, &dir_layout); + cur = rdlock_path_pin_ref(mdr, 0, lov, true, false, true); else cur = rdlock_path_pin_ref(mdr, 0, lov, true); if (!cur) @@ -5524,7 +5517,7 @@ void Server::handle_client_setxattr(MDRequestRef& mdr) // magic ceph.* namespace? if (name.compare(0, 5, "ceph.") == 0) { - handle_set_vxattr(mdr, cur, dir_layout, lov); + handle_set_vxattr(mdr, cur, lov); return; } @@ -5605,10 +5598,9 @@ void Server::handle_client_removexattr(MDRequestRef& mdr) std::string name(req->get_path2()); MutationImpl::LockOpVec lov; - file_layout_t *dir_layout = nullptr; CInode *cur; if (name == "ceph.dir.layout") - cur = rdlock_path_pin_ref(mdr, 0, lov, true, false, &dir_layout); + cur = rdlock_path_pin_ref(mdr, 0, lov, true, false, true); else cur = rdlock_path_pin_ref(mdr, 0, lov, true); if (!cur) @@ -5620,7 +5612,7 @@ void Server::handle_client_removexattr(MDRequestRef& mdr) } if (name.compare(0, 5, "ceph.") == 0) { - handle_remove_vxattr(mdr, cur, dir_layout, lov); + handle_remove_vxattr(mdr, cur, lov); return; } @@ -5718,9 +5710,7 @@ void Server::handle_client_mknod(MDRequestRef& mdr) const cref_t &req = mdr->client_request; client_t client = mdr->get_client(); MutationImpl::LockOpVec lov; - file_layout_t *dir_layout = nullptr; - CDentry *dn = rdlock_path_xlock_dentry(mdr, 0, lov, false, false, - &dir_layout); + CDentry *dn = rdlock_path_xlock_dentry(mdr, 0, lov, false, false, true); if (!dn) return; if (mdr->snapid != CEPH_NOSNAP) { respond_to_request(mdr, -EROFS); @@ -5743,8 +5733,8 @@ void Server::handle_client_mknod(MDRequestRef& mdr) // set layout file_layout_t layout; - if (dir_layout && S_ISREG(mode)) - layout = *dir_layout; + if (S_ISREG(mode) && mdr->dir_layout != file_layout_t()) + layout = mdr->dir_layout; else layout = mdcache->default_file_layout; @@ -6709,7 +6699,8 @@ void Server::handle_client_unlink(MDRequestRef& mdr) lov.add_xlock(&straydn->lock); } - mds->locker->include_snap_rdlocks(diri, lov); + // FIXME + // mds->locker->include_snap_rdlocks(diri, lov); lov.add_xlock(&in->snaplock); if (in->is_dir()) lov.add_rdlock(&in->filelock); // to verify it's empty @@ -7583,7 +7574,8 @@ void Server::handle_client_rename(MDRequestRef& mdr) lov.add_wrlock(&srcdir->inode->filelock); lov.add_wrlock(&srcdir->inode->nestlock); } - mds->locker->include_snap_rdlocks(srcdir->inode, lov); + // FIXME + // mds->locker->include_snap_rdlocks(srcdir->inode, lov); // straydn? if (straydn) { @@ -9510,9 +9502,7 @@ void Server::handle_client_lssnap(MDRequestRef& mdr) dout(10) << "lssnap on " << *diri << dendl; // lock snap - MutationImpl::LockOpVec lov; - mds->locker->include_snap_rdlocks(diri, lov); - if (!mds->locker->acquire_locks(mdr, lov)) + if (!mds->locker->try_rdlock_snap_layout(diri, mdr)) return; if (!check_access(mdr, diri, MAY_READ)) @@ -9651,14 +9641,18 @@ void Server::handle_client_mksnap(MDRequestRef& mdr) dout(10) << "mksnap " << snapname << " on " << *diri << dendl; // lock snap - MutationImpl::LockOpVec lov; - - mds->locker->include_snap_rdlocks(diri, lov); - lov.erase_rdlock(&diri->snaplock); - lov.add_xlock(&diri->snaplock); + if (!(mdr->locking_state & MutationImpl::ALL_LOCKED)) { + MutationImpl::LockOpVec lov; + lov.add_xlock(&diri->snaplock); + if (!mds->locker->acquire_locks(mdr, lov)) + return; - if (!mds->locker->acquire_locks(mdr, lov)) - return; + if (CDentry *pdn = diri->get_projected_parent_dn(); pdn) { + if (!mds->locker->try_rdlock_snap_layout(pdn->get_dir()->get_inode(), mdr)) + return; + } + mdr->locking_state |= MutationImpl::ALL_LOCKED; + } if (!check_access(mdr, diri, MAY_WRITE|MAY_SNAPSHOT)) return; @@ -9820,13 +9814,17 @@ void Server::handle_client_rmsnap(MDRequestRef& mdr) snapid_t snapid = diri->snaprealm->resolve_snapname(snapname, diri->ino()); dout(10) << " snapname " << snapname << " is " << snapid << dendl; - MutationImpl::LockOpVec lov; - mds->locker->include_snap_rdlocks(diri, lov); - lov.erase_rdlock(&diri->snaplock); - lov.add_xlock(&diri->snaplock); - - if (!mds->locker->acquire_locks(mdr, lov)) - return; + if (!(mdr->locking_state & MutationImpl::ALL_LOCKED)) { + MutationImpl::LockOpVec lov; + lov.add_xlock(&diri->snaplock); + if (!mds->locker->acquire_locks(mdr, lov)) + return; + if (CDentry *pdn = diri->get_projected_parent_dn(); pdn) { + if (!mds->locker->try_rdlock_snap_layout(pdn->get_dir()->get_inode(), mdr)) + return; + } + mdr->locking_state |= MutationImpl::ALL_LOCKED; + } if (!check_access(mdr, diri, MAY_WRITE|MAY_SNAPSHOT)) return; @@ -9969,14 +9967,17 @@ void Server::handle_client_renamesnap(MDRequestRef& mdr) dout(10) << " snapname " << srcname << " is " << snapid << dendl; // lock snap - MutationImpl::LockOpVec lov; - - mds->locker->include_snap_rdlocks(diri, lov); - lov.erase_rdlock(&diri->snaplock); - lov.add_xlock(&diri->snaplock); - - if (!mds->locker->acquire_locks(mdr, lov)) - return; + if (!(mdr->locking_state & MutationImpl::ALL_LOCKED)) { + MutationImpl::LockOpVec lov; + lov.add_xlock(&diri->snaplock); + if (!mds->locker->acquire_locks(mdr, lov)) + return; + if (CDentry *pdn = diri->get_projected_parent_dn(); pdn) { + if (!mds->locker->try_rdlock_snap_layout(pdn->get_dir()->get_inode(), mdr)) + return; + } + mdr->locking_state |= MutationImpl::ALL_LOCKED; + } if (!check_access(mdr, diri, MAY_WRITE|MAY_SNAPSHOT)) return; diff --git a/src/mds/Server.h b/src/mds/Server.h index 3b49c47dda5..f46d033e72d 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -216,12 +216,11 @@ public: CInode* rdlock_path_pin_ref(MDRequestRef& mdr, int n, MutationImpl::LockOpVec& lov, bool want_auth, bool no_want_auth=false, - file_layout_t **layout=nullptr, - bool no_lookup=false); + bool want_layout=false, bool no_lookup=false); CDentry* rdlock_path_xlock_dentry(MDRequestRef& mdr, int n, MutationImpl::LockOpVec& lov, bool okexist, bool alwaysxlock, - file_layout_t **layout=nullptr); + bool want_layout=false); CDir* try_open_auth_dirfrag(CInode *diri, frag_t fg, MDRequestRef& mdr); @@ -249,10 +248,8 @@ public: string value, file_layout_t *layout); void handle_set_vxattr(MDRequestRef& mdr, CInode *cur, - file_layout_t *dir_layout, MutationImpl::LockOpVec& lov); void handle_remove_vxattr(MDRequestRef& mdr, CInode *cur, - file_layout_t *dir_layout, MutationImpl::LockOpVec& lov); void handle_client_setxattr(MDRequestRef& mdr); void handle_client_removexattr(MDRequestRef& mdr); diff --git a/src/mds/SimpleLock.h b/src/mds/SimpleLock.h index 27220223031..8a3de2977f1 100644 --- a/src/mds/SimpleLock.h +++ b/src/mds/SimpleLock.h @@ -142,10 +142,9 @@ public: case CEPH_LOCK_INEST: return "inest"; case CEPH_LOCK_IXATTR: return "ixattr"; case CEPH_LOCK_ISNAP: return "isnap"; - case CEPH_LOCK_INO: return "ino"; case CEPH_LOCK_IFLOCK: return "iflock"; case CEPH_LOCK_IPOLICY: return "ipolicy"; - default: ceph_abort(); return std::string_view(); + default: return "unknown"; } } -- 2.39.5