From 89b8283f73dbf33bb07c36b3ef53d64980312199 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 18 Dec 2008 11:31:36 -0800 Subject: [PATCH] mds: basic filelock fixes --- src/TODO | 1 + src/mds/FileLock.h | 9 +++- src/mds/Locker.cc | 102 +++++++++++++++++++++++++++++++++++++++++---- src/mds/Locker.h | 1 + src/mds/MDS.cc | 2 +- 5 files changed, 104 insertions(+), 11 deletions(-) diff --git a/src/TODO b/src/TODO index 250576cc454a..097b8ae886df 100644 --- a/src/TODO +++ b/src/TODO @@ -104,6 +104,7 @@ userspace client - fix readdir vs fragment race by keeping a separate frag pos, and ignoring dentries below it mds +- dftlock is missing from rejoin phase - file size recovery gives (wrong) 4MB-increment results? - hard link backpointers - anchor source dir diff --git a/src/mds/FileLock.h b/src/mds/FileLock.h index 9238967bc254..70c583010522 100644 --- a/src/mds/FileLock.h +++ b/src/mds/FileLock.h @@ -37,7 +37,8 @@ using namespace std; #define LOCK_SYNC_ 1 // AR R . / C R . . . L R . / C R . . . L stat() #define LOCK_LONER_SYNC -12 // A . . / C r . . . L * loner -> sync #define LOCK_MIXED_SYNC -13 // A . w / . R . . . L . w / . R . . . L -#define LOCK_LOCK_SYNC_ -14 // A . w / C . . . b L +#define LOCK_MIXED_SYNC2 -14 // A . w / . R . . . L . w / . R . . . L replica already acked +#define LOCK_LOCK_SYNC_ // A . w / C . . . b L #define LOCK_LOCK_ 2 // AR R W / C . . . B . . . / C . . . . . truncate() #define LOCK_SYNC_LOCK_ -3 // AR R . / C . . . . . r . / C . . . . . @@ -51,7 +52,7 @@ using namespace std; #define LOCK_LONER 9 // A . . / c r w a b L * (lock) #define LOCK_SYNC_LONER -10 // A r . / . R . . . L #define LOCK_MIXED_LONER -11 // A . w / . R W A . L -#define LOCK_LOCK_LONER -15 // A . . / c . . . b . * +#define LOCK_LOCK_LONER -16 // A . . / c . . . b . * // * <- varies if client is loner vs non-loner. @@ -60,6 +61,7 @@ inline const char *get_filelock_state_name(int n) { case LOCK_SYNC: return "sync"; case LOCK_LONER_SYNC: return "loner->sync"; case LOCK_MIXED_SYNC: return "mixed->sync"; + case LOCK_MIXED_SYNC2: return "mixed->sync2"; case LOCK_LOCK_SYNC: return "lock->sync"; case LOCK_LOCK: return "lock"; case LOCK_SYNC_LOCK: return "sync->lock"; @@ -200,6 +202,9 @@ class FileLock : public ScatterLock { case LOCK_LONER_LOCK: return CEPH_CAP_PIN | CEPH_CAP_RDCACHE | CEPH_CAP_WRBUFFER; + case LOCK_LOCK_SYNC: + return CEPH_CAP_PIN | CEPH_CAP_RDCACHE | CEPH_CAP_LAZYIO; + case LOCK_MIXED_LOCK: return CEPH_CAP_PIN; diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 6939ebe1c659..3b5461311728 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -401,8 +401,8 @@ bool Locker::wrlock_start(SimpleLock *lock, MDRequest *mut) return scatter_wrlock_start((ScatterLock*)lock, mut); case CEPH_LOCK_IVERSION: return local_wrlock_start((LocalLock*)lock, mut); - //case CEPH_LOCK_IFILE: - //return file_wrlock_start((ScatterLock*)lock, mut); + case CEPH_LOCK_IFILE: + return file_wrlock_start((FileLock*)lock, mut); default: assert(0); return false; @@ -3053,6 +3053,57 @@ bool Locker::file_wrlock_force(FileLock *lock, Mutation *mut) }*/ } + +bool Locker::file_wrlock_start(FileLock *lock, MDRequest *mut) +{ + dout(7) << "file_wrlock_start on " << *lock + << " on " << *lock->get_parent() << dendl; + + bool want_scatter = lock->get_parent()->is_auth() && + ((CInode*)lock->get_parent())->has_subtree_root_dirfrag(); + + // can wrlock? + if (lock->can_wrlock()) { + lock->get_wrlock(); + if (mut) { + mut->wrlocks.insert(lock); + mut->locks.insert(lock); + } + return true; + } + + if (lock->is_stable()) { + if (lock->get_parent()->is_auth()) { + if (want_scatter) + file_mixed(lock); + else + file_lock(lock); + + // try again? + if (lock->can_wrlock()) { + lock->get_wrlock(); + if (mut) { + mut->wrlocks.insert(lock); + mut->locks.insert(lock); + } + return true; + } + + lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mut)); + + } else { + // replica. + // auth should be auth_pinned (see acquire_locks wrlock weird mustpin case). + int auth = lock->get_parent()->authority().first; + dout(10) << "requesting scatter from auth on " + << *lock << " on " << *lock->get_parent() << dendl; + mds->send_message_mds(new MLock(lock, LOCK_AC_REQSCATTER, mds->get_nodeid()), auth); + } + } + + return false; +} + void Locker::file_wrlock_finish(FileLock *lock, Mutation *mut) { dout(7) << "wrlock_finish on " << *lock << " on " << *lock->get_parent() << dendl; @@ -3248,7 +3299,7 @@ void Locker::file_eval_gather(FileLock *lock) // to mixed case LOCK_SYNC_MIXED: lock->set_state(LOCK_MIXED); - lock->finish_waiters(SimpleLock::WAIT_STABLE); + lock->finish_waiters(SimpleLock::WAIT_STABLE|SimpleLock::WAIT_WR); lock->get_parent()->auth_unpin(lock); break; @@ -3265,7 +3316,7 @@ void Locker::file_eval_gather(FileLock *lock) send_lock_message(lock, LOCK_AC_MIXED, softdata); } - lock->finish_waiters(SimpleLock::WAIT_STABLE); + lock->finish_waiters(SimpleLock::WAIT_STABLE|SimpleLock::WAIT_WR); lock->get_parent()->auth_unpin(lock); break; @@ -3331,9 +3382,12 @@ void Locker::file_eval_gather(FileLock *lock) MLock *reply = new MLock(lock, LOCK_AC_SYNCACK, mds->get_nodeid()); lock->encode_locked_state(reply->get_data()); mds->send_message_mds(reply, in->authority().first); - lock->set_state(LOCK_LOCK); // this is sort of funky :/ + lock->set_state(LOCK_MIXED_SYNC2); } break; + case LOCK_MIXED_SYNC2: + // do nothing, we already acked + break; case LOCK_SYNC_MIXED: { @@ -3470,6 +3524,7 @@ bool Locker::file_sync(FileLock *lock) gather++; if (in->is_replicated() && lock->get_state() == LOCK_MIXED_SYNC) { send_lock_message(lock, LOCK_AC_SYNC); + lock->init_gather(); gather++; } if (in->state_test(CInode::STATE_NEEDSRECOVER)) { @@ -3693,7 +3748,8 @@ void Locker::handle_file_lock(FileLock *lock, MLock *m) // -- replica -- case LOCK_AC_SYNC: assert(lock->get_state() == LOCK_LOCK || - lock->get_state() == LOCK_MIXED); + lock->get_state() == LOCK_MIXED || + lock->get_state() == LOCK_MIXED_SYNC2); if (lock->get_state() == LOCK_MIXED) { // primary needs to gather up our changes @@ -3702,8 +3758,8 @@ void Locker::handle_file_lock(FileLock *lock, MLock *m) MLock *reply = new MLock(lock, LOCK_AC_SYNCACK, mds->get_nodeid()); lock->encode_locked_state(reply->get_data()); mds->send_message_mds(reply, from); + lock->set_state(LOCK_MIXED_SYNC2); } else { - // gather lock->set_state(LOCK_MIXED_SYNC); } } else { @@ -3814,7 +3870,8 @@ void Locker::handle_file_lock(FileLock *lock, MLock *m) break; case LOCK_AC_SYNCACK: - assert(lock->get_state() == LOCK_MIXED_SYNC); + assert(lock->get_state() == LOCK_MIXED_SYNC || + lock->get_state() == LOCK_MIXED_SYNC2); assert(lock->is_gathering(from)); lock->remove_gather(from); @@ -3846,6 +3903,35 @@ void Locker::handle_file_lock(FileLock *lock, MLock *m) break; + // requests.... + case LOCK_AC_REQSCATTER: + if (lock->is_stable()) { + /* NOTE: we can do this _even_ if !can_auth_pin (i.e. freezing) + * because the replica should be holding an auth_pin if they're + * doing this (and thus, we are freezing, not frozen, and indefinite + * starvation isn't an issue). + */ + dout(7) << "handle_file_lock got scatter request on " << *lock + << " on " << *lock->get_parent() << dendl; + file_mixed(lock); + } else { + dout(7) << "handle_file_lock ignoring scatter request on " << *lock + << " on " << *lock->get_parent() << dendl; + } + break; + + case LOCK_AC_NUDGE: + if (lock->get_parent()->is_auth()) { + dout(7) << "handle_file_lock trying nudge on " << *lock + << " on " << *lock->get_parent() << dendl; + scatter_nudge(lock, 0); + } else { + dout(7) << "handle_file_lock IGNORING nudge on non-auth " << *lock + << " on " << *lock->get_parent() << dendl; + } + break; + + default: assert(0); } diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 72d89e6e0af5..b82664c682e2 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -186,6 +186,7 @@ protected: bool file_rdlock_start(FileLock *lock, MDRequest *mut); void file_rdlock_finish(FileLock *lock, Mutation *mut); bool file_wrlock_force(FileLock *lock, Mutation *mut); + bool file_wrlock_start(FileLock *lock, MDRequest *mut); void file_wrlock_finish(FileLock *lock, Mutation *mut); bool file_xlock_start(FileLock *lock, MDRequest *mut); void file_xlock_finish(FileLock *lock, Mutation *mut); diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc index 303241958999..c205ef108370 100644 --- a/src/mds/MDS.cc +++ b/src/mds/MDS.cc @@ -1317,7 +1317,7 @@ bool MDS::_dispatch(Message *m) } while (dest == whoami); mdcache->migrator->export_dir_nicely(dir,dest); } - // hack: thrash exports + // hack: thrash fragments for (int i=0; i