From a33e9e774f99ab3a9a908920a04925eee5f98d49 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 25 Mar 2010 13:17:06 -0700 Subject: [PATCH] mds: allow rdlock on replica to request sync from auth This fixes a ping-pong readdir between inode and dirfrag auth. --- src/mds/Locker.cc | 52 ++++++++++++++++++++++++++++++++++---------- src/mds/SimpleLock.h | 3 +++ src/mds/locks.c | 14 ++++++------ src/mds/locks.h | 3 +++ 4 files changed, 54 insertions(+), 18 deletions(-) diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index f3eef4ba2f6de..179df1e2fbff8 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -200,6 +200,11 @@ bool Locker::acquire_locks(MDRequest *mdr, sorted.insert(*p); if ((*p)->get_parent()->is_auth()) mustpin.insert(*p); + else if ((*p)->get_type() == CEPH_LOCK_IFILE && + !(*p)->get_parent()->is_auth() && !(*p)->can_rdlock(client)) { // we might have to request an rdlock + dout(15) << " will also auth_pin " << *(*p)->get_parent() << " in case we need to request a rdlock" << dendl; + mustpin.insert(*p); + } } @@ -677,16 +682,24 @@ void Locker::eval(SimpleLock *lock, bool *need_issue) bool Locker::_rdlock_kick(SimpleLock *lock) { // kick the lock - if (lock->is_stable() && - lock->get_parent()->is_auth()) { - if (lock->get_sm() == &sm_scatterlock) { - if (lock->get_parent()->is_replicated()) - scatter_tempsync((ScatterLock*)lock); - else + if (lock->is_stable()) { + if (lock->get_parent()->is_auth()) { + if (lock->get_sm() == &sm_scatterlock) { + if (lock->get_parent()->is_replicated()) + scatter_tempsync((ScatterLock*)lock); + else + simple_sync(lock); + } else simple_sync(lock); - } else - simple_sync(lock); - return true; + return true; + } else { + // request rdlock state change from auth + int auth = lock->get_parent()->authority().first; + dout(10) << "requesting rdlock from auth on " + << *lock << " on " << *lock->get_parent() << dendl; + mds->send_message_mds(new MLock(lock, LOCK_AC_REQRDLOCK, mds->get_nodeid()), auth); + return false; + } } return false; } @@ -737,8 +750,13 @@ bool Locker::rdlock_start(SimpleLock *lock, MDRequest *mut) } // wait! + int wait_on; + if (lock->get_parent()->is_auth()) + wait_on = SimpleLock::WAIT_RD; + else + wait_on = SimpleLock::WAIT_STABLE; // REQRDLOCK is ignored if lock is unstable, so we need to retry. dout(7) << "rdlock_start waiting on " << *lock << " on " << *lock->get_parent() << dendl; - lock->add_waiter(SimpleLock::WAIT_RD, new C_MDS_RetryRequest(mdcache, mut)); + lock->add_waiter(wait_on, new C_MDS_RetryRequest(mdcache, mut)); nudge_log(lock); return false; } @@ -3564,6 +3582,19 @@ void Locker::handle_file_lock(ScatterLock *lock, MLock *m) } break; + case LOCK_AC_REQRDLOCK: + if (lock->is_stable()) { + dout(7) << "handle_file_lock got rdlock request on " << *lock + << " on " << *lock->get_parent() << dendl; + assert(in->is_auth()); // replica auth pinned if they're doing this! + simple_sync(lock); + } else { + dout(7) << "handle_file_lock ignoring rdlock request on " << *lock + << " on " << *lock->get_parent() << dendl; + // replica will retry. + } + break; + case LOCK_AC_NUDGE: if (lock->get_parent()->is_auth()) { dout(7) << "handle_file_lock trying nudge on " << *lock @@ -3576,7 +3607,6 @@ void Locker::handle_file_lock(ScatterLock *lock, MLock *m) } break; - default: assert(0); } diff --git a/src/mds/SimpleLock.h b/src/mds/SimpleLock.h index 3561f1857f728..43fb40d18cd48 100644 --- a/src/mds/SimpleLock.h +++ b/src/mds/SimpleLock.h @@ -315,6 +315,9 @@ public: bool fw_rdlock_to_auth() { return get_sm()->states[state].can_rdlock == FW; } + bool req_rdlock_from_auth() { + return get_sm()->states[state].can_rdlock == REQ; + } // gather set static set empty_gather_set; diff --git a/src/mds/locks.c b/src/mds/locks.c index aa46bae2d1dc5..3ffdf75884972 100644 --- a/src/mds/locks.c +++ b/src/mds/locks.c @@ -18,7 +18,7 @@ struct sm_state_t simplelock[LOCK_MAX] = { [LOCK_LOCK_SYNC] = { LOCK_SYNC, false, LOCK_SYNC, ANY, XCL, XCL, 0, XCL, 0, 0,0,0,0 }, [LOCK_EXCL_SYNC] = { LOCK_SYNC, true, LOCK_LOCK, 0, 0, 0, 0, 0, 0, 0,CEPH_CAP_GSHARED,0,0 }, - [LOCK_LOCK] = { 0, false, LOCK_LOCK, AUTH, 0, 0, 0, 0, 0, 0,0,0,0 }, + [LOCK_LOCK] = { 0, false, LOCK_LOCK, AUTH, 0, FW, 0, 0, 0, 0,0,0,0 }, [LOCK_SYNC_LOCK] = { LOCK_LOCK, false, LOCK_LOCK, ANY, 0, 0, 0, 0, 0, 0,0,0,0 }, [LOCK_EXCL_LOCK] = { LOCK_LOCK, false, LOCK_LOCK, 0, 0, 0, 0, 0, 0, 0,0,0,0 }, @@ -85,22 +85,22 @@ struct sm_state_t filelock[LOCK_MAX] = { [LOCK_MIX_SYNC] = { LOCK_SYNC, false, LOCK_MIX, 0, 0, 0, 0, 0, 0, CEPH_CAP_GRD,0,0,CEPH_CAP_GRD }, [LOCK_MIX_SYNC2] = { LOCK_SYNC, false, 0, 0, 0, 0, 0, 0, 0, CEPH_CAP_GRD,0,0,CEPH_CAP_GRD }, - [LOCK_LOCK] = { 0, false, LOCK_LOCK, AUTH, 0, FW, AUTH,0, 0, CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0,0 }, - [LOCK_SYNC_LOCK] = { LOCK_LOCK, false, LOCK_LOCK, AUTH, 0, FW, 0, 0, 0, CEPH_CAP_GCACHE,0,0,CEPH_CAP_GCACHE }, + [LOCK_LOCK] = { 0, false, LOCK_LOCK, AUTH, 0, REQ, AUTH,0, 0, CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0,0 }, + [LOCK_SYNC_LOCK] = { LOCK_LOCK, false, LOCK_LOCK, AUTH, 0, REQ, 0, 0, 0, CEPH_CAP_GCACHE,0,0,CEPH_CAP_GCACHE }, [LOCK_EXCL_LOCK] = { LOCK_LOCK, false, LOCK_LOCK, 0, 0, 0, 0, 0, 0, CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0,CEPH_CAP_GCACHE }, - [LOCK_MIX_LOCK] = { LOCK_LOCK, false, LOCK_LOCK, AUTH, 0, FW, 0, 0, 0, 0,0,0,0 }, + [LOCK_MIX_LOCK] = { LOCK_LOCK, false, LOCK_LOCK, AUTH, 0, REQ, 0, 0, 0, 0,0,0,0 }, [LOCK_PREXLOCK] = { LOCK_SYNC, false, LOCK_LOCK, 0, XCL, 0, 0, 0, ANY, CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0,0 }, [LOCK_XLOCK] = { LOCK_SYNC, false, LOCK_LOCK, 0, XCL, 0, 0, 0, 0, CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0,0 }, [LOCK_XLOCKDONE] = { LOCK_SYNC, false, LOCK_LOCK, XCL, XCL, XCL, 0, XCL, 0, CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,CEPH_CAP_GSHARED,0 }, [LOCK_LOCK_XLOCK]= { LOCK_PREXLOCK,false,LOCK_LOCK,0, XCL, 0, 0, 0, XCL, CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0,0 }, - [LOCK_MIX] = { 0, false, LOCK_MIX, 0, 0, FW, ANY, 0, 0, CEPH_CAP_GRD|CEPH_CAP_GWR,0,0,CEPH_CAP_GRD }, - [LOCK_SYNC_MIX] = { LOCK_MIX, false, LOCK_MIX, ANY, 0, FW, 0, 0, 0, CEPH_CAP_GRD,0,0,CEPH_CAP_GRD }, + [LOCK_MIX] = { 0, false, LOCK_MIX, 0, 0, REQ, ANY, 0, 0, CEPH_CAP_GRD|CEPH_CAP_GWR,0,0,CEPH_CAP_GRD }, + [LOCK_SYNC_MIX] = { LOCK_MIX, false, LOCK_MIX, ANY, 0, REQ, 0, 0, 0, CEPH_CAP_GRD,0,0,CEPH_CAP_GRD }, [LOCK_SYNC_MIX2] = { LOCK_MIX, false, 0, ANY, 0, 0, 0, 0, 0, CEPH_CAP_GRD,0,0,CEPH_CAP_GRD }, [LOCK_EXCL_MIX] = { LOCK_MIX, true, LOCK_LOCK, 0, 0, 0, XCL, 0, 0, 0,CEPH_CAP_GRD|CEPH_CAP_GWR,0,0 }, - [LOCK_EXCL] = { 0, true, LOCK_LOCK, 0, 0, FW, XCL, 0, 0, 0,CEPH_CAP_GSHARED|CEPH_CAP_GEXCL|CEPH_CAP_GCACHE|CEPH_CAP_GRD|CEPH_CAP_GWR|CEPH_CAP_GBUFFER,0,0 }, + [LOCK_EXCL] = { 0, true, LOCK_LOCK, 0, 0, AUTH,XCL, 0, 0, 0,CEPH_CAP_GSHARED|CEPH_CAP_GEXCL|CEPH_CAP_GCACHE|CEPH_CAP_GRD|CEPH_CAP_GWR|CEPH_CAP_GBUFFER,0,0 }, [LOCK_SYNC_EXCL] = { LOCK_EXCL, true, LOCK_LOCK, AUTH, 0, 0, 0, 0, 0, 0,CEPH_CAP_GSHARED|CEPH_CAP_GCACHE|CEPH_CAP_GRD,0,0 }, [LOCK_MIX_EXCL] = { LOCK_EXCL, true, LOCK_LOCK, 0, 0, 0, XCL, 0, 0, 0,CEPH_CAP_GRD|CEPH_CAP_GWR,0,0 }, [LOCK_LOCK_EXCL] = { LOCK_EXCL, true, LOCK_LOCK, AUTH, 0, 0, 0, 0, 0, 0,CEPH_CAP_GCACHE|CEPH_CAP_GBUFFER,0,0 }, diff --git a/src/mds/locks.h b/src/mds/locks.h index 39f96fab56c12..e6fba6a5468ce 100644 --- a/src/mds/locks.h +++ b/src/mds/locks.h @@ -30,6 +30,7 @@ struct sm_t { #define AUTH 2 // auth only #define XCL 3 // auth or exclusive client #define FW 4 // fw to auth, if replica +#define REQ 5 // req state change from auth, if replica extern struct sm_t sm_simplelock; extern struct sm_t sm_filelock; @@ -102,6 +103,7 @@ extern struct sm_t sm_locallock; #define LOCK_AC_REQSCATTER 7 #define LOCK_AC_REQUNSCATTER 8 #define LOCK_AC_NUDGE 9 +#define LOCK_AC_REQRDLOCK 10 #define LOCK_AC_FOR_REPLICA(a) ((a) < 0) #define LOCK_AC_FOR_AUTH(a) ((a) > 0) @@ -120,6 +122,7 @@ static inline const char *get_lock_action_name(int a) { case LOCK_AC_REQSCATTER: return "reqscatter"; case LOCK_AC_REQUNSCATTER: return "requnscatter"; case LOCK_AC_NUDGE: return "nudge"; + case LOCK_AC_REQRDLOCK: return "reqrdlock"; default: return "???"; } } -- 2.39.5