From e6ee6c4fbcaadcb6ac0bce99f6955d696459c651 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 6 Jan 2011 14:28:01 -0800 Subject: [PATCH] mds: take rdlocks on bounding dftlocks; clean up migrator lock code We need to take an rdlock on bounding dirfrags during migration for a rather irritating reason: when we export the bound inode, we need to send scatterlock state for the dirfrags as well, so that the new auth also gets the correct info. If we race with a refragment, this info is useless, as we can't redivvy it up. And it's needed for the scatterlocks to work properly: when the auth is in a sync/lock state it keeps each dirfrag's portion in the local (auth OR replica) dirfrag. So: take a rdlock on the bounding dirfrags to avoid this. Clean up the Locker bulk rdlock interface while we're at it to be more general and useful. Also, while we're here, do an rdlock_try at this point. Note that we still are going to fail more often than before, since dftlocks will frequently be scattered if there has been a recent fragmentation. There is some inevitable conflict here between refragmentation (which wants dftlock in MIX) and exports (which want it SYNC). TODO. Signed-off-by: Sage Weil --- src/mds/Locker.cc | 86 ++++++++++++++++++++------------------------- src/mds/Locker.h | 8 ++--- src/mds/Migrator.cc | 63 +++++++++++++++++++++------------ src/mds/Migrator.h | 2 ++ 4 files changed, 86 insertions(+), 73 deletions(-) diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 2486aafcedb21..1427040a2cbb9 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -813,9 +813,10 @@ bool Locker::rdlock_try(SimpleLock *lock, client_t client, Context *con) return true; // wait! - dout(7) << "rdlock_try waiting on " << *lock << " on " << *lock->get_parent() << dendl; - if (con) + if (con) { + dout(7) << "rdlock_try waiting on " << *lock << " on " << *lock->get_parent() << dendl; lock->add_waiter(SimpleLock::WAIT_STABLE|SimpleLock::WAIT_RD, con); + } return false; } @@ -909,6 +910,42 @@ void Locker::rdlock_finish(SimpleLock *lock, Mutation *mut) } +bool Locker::can_rdlock_set(set& locks) +{ + dout(10) << "can_rdlock_set " << locks << dendl; + for (set::iterator p = locks.begin(); p != locks.end(); ++p) + if (!(*p)->can_rdlock(-1)) { + dout(10) << "can_rdlock_set can't rdlock " << *p << " on " << *(*p)->get_parent() << dendl; + return false; + } + return true; +} + +bool Locker::rdlock_try_set(set& locks) +{ + dout(10) << "rdlock_try_set " << locks << dendl; + for (set::iterator p = locks.begin(); p != locks.end(); ++p) + if (!rdlock_try(*p, -1, NULL)) { + dout(10) << "rdlock_try_set can't rdlock " << *p << " on " << *(*p)->get_parent() << dendl; + return false; + } + return true; +} + +void Locker::rdlock_take_set(set& locks) +{ + dout(10) << "rdlock_take_set " << locks << dendl; + for (set::iterator p = locks.begin(); p != locks.end(); ++p) + (*p)->get_rdlock(); +} + +void Locker::rdlock_finish_set(set& locks) +{ + dout(10) << "rdlock_finish_set " << locks << dendl; + for (set::iterator p = locks.begin(); p != locks.end(); ++p) + rdlock_finish(*p, 0); +} + // ------------------ // wrlock @@ -3186,51 +3223,6 @@ void Locker::simple_xlock(SimpleLock *lock) -// dentry specific helpers - -/** dentry_can_rdlock_trace - * see if we can _anonymously_ rdlock an entire trace. - * if not, and req is specified, wait and retry that message. - */ -bool Locker::dentry_can_rdlock_trace(vector& trace) -{ - // verify dentries are rdlockable. - // we do this because - // - we're being less aggressive about locks acquisition, and - // - we're not acquiring the locks in order! - for (vector::iterator it = trace.begin(); - it != trace.end(); - it++) { - CDentry *dn = *it; - if (!dn->lock.can_rdlock(-1)) { - dout(10) << "can_rdlock_trace can't rdlock " << *dn << dendl; - return false; - } - } - return true; -} - -void Locker::dentry_anon_rdlock_trace_start(vector& trace) -{ - // grab dentry rdlocks - for (vector::iterator it = trace.begin(); - it != trace.end(); - it++) { - dout(10) << "dentry_anon_rdlock_trace_start rdlocking " << (*it)->lock << " " << **it << dendl; - (*it)->lock.get_rdlock(); - } -} - - -void Locker::dentry_anon_rdlock_trace_finish(vector& trace) -{ - for (vector::iterator it = trace.begin(); - it != trace.end(); - it++) - rdlock_finish(&(*it)->lock, 0); -} - - // ========================================================================== // scatter lock diff --git a/src/mds/Locker.h b/src/mds/Locker.h index af044f36192f0..67bf68588e284 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -124,6 +124,10 @@ public: bool rdlock_try(SimpleLock *lock, client_t client, Context *c); bool rdlock_start(SimpleLock *lock, MDRequest *mut, bool as_anon=false); void rdlock_finish(SimpleLock *lock, Mutation *mut); + bool can_rdlock_set(set& locks); + bool rdlock_try_set(set& locks); + void rdlock_take_set(set& locks); + void rdlock_finish_set(set& locks); void wrlock_force(SimpleLock *lock, Mutation *mut); bool wrlock_start(SimpleLock *lock, MDRequest *mut, bool nowait=false); @@ -147,10 +151,6 @@ protected: void simple_excl(SimpleLock *lock, bool *need_issue=0); void simple_xlock(SimpleLock *lock); -public: - bool dentry_can_rdlock_trace(vector& trace); - void dentry_anon_rdlock_trace_start(vector& trace); - void dentry_anon_rdlock_trace_finish(vector& trace); // scatter public: diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index f438b1ac3aa98..35b01c957b9cd 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -544,6 +544,32 @@ public: }; +void Migrator::get_export_lock_set(CDir *dir, set& locks) +{ + // path + vector trace; + cache->make_trace(trace, dir->inode); + for (vector::iterator it = trace.begin(); + it != trace.end(); + it++) + locks.insert(&(*it)->lock); + + // bound dftlocks: + // NOTE: We need to take an rdlock on bounding dirfrags during + // migration for a rather irritating reason: when we export the + // bound inode, we need to send scatterlock state for the dirfrags + // as well, so that the new auth also gets the correct info. If we + // race with a refragment, this info is useless, as we can't + // redivvy it up. And it's needed for the scatterlocks to work + // properly: when the auth is in a sync/lock state it keeps each + // dirfrag's portion in the local (auth OR replica) dirfrag. + set wouldbe_bounds; + cache->get_wouldbe_subtree_bounds(dir, wouldbe_bounds); + for (set::iterator p = wouldbe_bounds.begin(); p != wouldbe_bounds.end(); ++p) + locks.insert(&(*p)->get_inode()->dirfragtreelock); +} + + /** export_dir(dir, dest) * public method to initiate an export. * will fail if the directory is freezing, frozen, unpinnable, or root. @@ -580,11 +606,11 @@ void Migrator::export_dir(CDir *dir, int dest) return; } - // pin path? - vector trace; - cache->make_trace(trace, dir->inode); - if (!mds->locker->dentry_can_rdlock_trace(trace)) { - dout(7) << "export_dir couldn't pin path, failing." << dendl; + // locks? + set locks; + get_export_lock_set(dir, locks); + if (!mds->locker->rdlock_try_set(locks)) { + dout(7) << "export_dir can't rdlock needed locks, failing." << dendl; return; } @@ -645,16 +671,13 @@ void Migrator::export_frozen(CDir *dir) assert(dir->get_cum_auth_pins() == 0); int dest = export_peer[dir]; - - // ok, try to grab all my locks. - - // pin path? - vector trace; - cache->make_trace(trace, dir->inode); - CInode *diri = dir->inode; - if (!mds->locker->dentry_can_rdlock_trace(trace)) { - dout(7) << "export_dir couldn't rdlock path or rd|wrlock parent inode file+nest+dftlock, failing. " + + // ok, try to grab all my locks. + set locks; + get_export_lock_set(dir, locks); + if (!mds->locker->can_rdlock_set(locks)) { + dout(7) << "export_dir couldn't rdlock all needed locks, failing. " << *diri << dendl; // .. unwind .. @@ -669,10 +692,7 @@ void Migrator::export_frozen(CDir *dir) mds->send_message_mds(new MExportDirCancel(dir->dirfrag()), dest); return; } - - dout(10) << " taking locks on path, parent inode scatterlocks" << dendl; - // rdlock path - mds->locker->dentry_anon_rdlock_trace_start(trace); + mds->locker->rdlock_take_set(locks); cache->show_subtrees(); @@ -1364,10 +1384,9 @@ void Migrator::export_unlock(CDir *dir) { dout(10) << "export_unlock " << *dir << dendl; - // unpin the path - vector trace; - cache->make_trace(trace, dir->inode); - mds->locker->dentry_anon_rdlock_trace_finish(trace); + set locks; + get_export_lock_set(dir, locks); + mds->locker->rdlock_finish_set(locks); list ls; mds->queue_waiters(ls); diff --git a/src/mds/Migrator.h b/src/mds/Migrator.h index fa02007765bac..f3b13955aa936 100644 --- a/src/mds/Migrator.h +++ b/src/mds/Migrator.h @@ -183,6 +183,8 @@ public: export_queue.clear(); } + void get_export_lock_set(CDir *dir, set& locks); + void encode_export_inode(CInode *in, bufferlist& bl, map& exported_client_map); void encode_export_inode_caps(CInode *in, bufferlist& bl, -- 2.39.5