From 28c3026543582c18061e7e08b0802ff7744b5d02 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 25 May 2011 14:54:15 -0700 Subject: [PATCH] mds: fix canceled lock attempt If client tries to lock a file, has to wait, and then cancels the attempt, the client will send an unlock request to unwind its state. - the unlock now removes the waiting lock attempt from the wait list - when the lock request retries and finds it is no longer on the wait list it will fail. Signed-off-by: Sage Weil --- src/mds/MDCache.h | 4 +++ src/mds/Server.cc | 45 ++++++++++++++++++------------- src/mds/flock.h | 68 ++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 87 insertions(+), 30 deletions(-) diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 07dc393757020..5b3b041b3d141 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -306,6 +306,9 @@ struct MDRequest : public Mutation { map sseq_map; map > cap_imports; + // for lock/flock + bool flock_was_waiting; + // for snaps version_t stid; bufferlist snapidbl; @@ -317,6 +320,7 @@ struct MDRequest : public Mutation { More() : src_reanchor_atid(0), dst_reanchor_atid(0), inode_import_v(0), destdn_was_remote_inode(0), was_link_merge(false), + flock_was_waiting(false), stid(0), slave_commit(0) { } } *_more; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 95f81de17770e..c1eceff53351c 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2855,38 +2855,47 @@ void Server::handle_client_file_setlock(MDRequest *mdr) return; } - dout(0) << "state prior to lock change: " << *lock_state << dendl;; + dout(10) << " state prior to lock change: " << *lock_state << dendl;; if (CEPH_LOCK_UNLOCK == set_lock.type) { - dout(0) << "got unlock" << dendl; + dout(10) << " unlock attempt on " << set_lock << dendl; list activated_locks; - lock_state->remove_lock(set_lock, activated_locks); + list waiters; + if (lock_state->is_waiting(set_lock)) { + dout(10) << " unlock removing waiting lock " << set_lock << dendl; + lock_state->remove_waiting(set_lock); + } else { + dout(10) << " unlock attempt on " << set_lock << dendl; + lock_state->remove_lock(set_lock, activated_locks); + cur->take_waiting(CInode::WAIT_FLOCK, waiters); + } reply_request(mdr, 0); /* For now we're ignoring the activated locks because their responses * will be sent when the lock comes up again in rotation by the MDS. * It's a cheap hack, but it's easy to code. */ - list waiters; - cur->take_waiting(CInode::WAIT_FLOCK, waiters); mds->queue_waiters(waiters); } else { - dout(0) << "got lock" << dendl; - if (lock_state->add_lock(set_lock, will_wait)) { - // lock set successfully - dout(0) << "it succeeded" << dendl; - reply_request(mdr, 0); - } else { - dout(0) << "it failed on this attempt" << dendl; + dout(10) << " lock attempt on " << set_lock << dendl; + if (mdr->more()->flock_was_waiting && + !lock_state->is_waiting(set_lock)) { + dout(10) << " was waiting for lock but not anymore, must have been canceled " << set_lock << dendl; + reply_request(mdr, -EINTR); + } else if (!lock_state->add_lock(set_lock, will_wait)) { + dout(10) << " it failed on this attempt" << dendl; // couldn't set lock right now - if (!will_wait) - reply_request(mdr, -1); - else { - dout(0) << "but it's a wait" << dendl; + if (!will_wait) { + reply_request(mdr, -EWOULDBLOCK); + } else { + dout(10) << " added to waiting list" << dendl; + assert(lock_state->is_waiting(set_lock)); + mdr->more()->flock_was_waiting = true; mds->locker->drop_locks(mdr); mdr->drop_local_auth_pins(); cur->add_waiter(CInode::WAIT_FLOCK, new C_MDS_RetryRequest(mdcache, mdr)); } - } + } else + reply_request(mdr, 0); } - dout(0) << "state after lock change: " << *lock_state << dendl; + dout(10) << " state after lock change: " << *lock_state << dendl; } void Server::handle_client_file_readlock(MDRequest *mdr) diff --git a/src/mds/flock.h b/src/mds/flock.h index 39be226ed3a20..f6194653b595c 100644 --- a/src/mds/flock.h +++ b/src/mds/flock.h @@ -3,6 +3,8 @@ #ifndef CEPH_MDS_FLOCK_H #define CEPH_MDS_FLOCK_H +#include + #include "mdstypes.h" @@ -14,6 +16,15 @@ inline ostream& operator<<(ostream& out, ceph_filelock& l) { return out; } +inline bool operator==(ceph_filelock& l, ceph_filelock& r) { + return + l.length == r.length && + l.client == r.client && + l.pid == r.pid && + l.pid_namespace == r.pid_namespace && + l.type == r.type; +} + struct ceph_lock_state_t { multimap held_locks; // current locks multimap waiting_locks; // locks waiting for other locks @@ -21,11 +32,45 @@ struct ceph_lock_state_t { map client_held_lock_counts; map client_waiting_lock_counts; + bool is_waiting(ceph_filelock &fl) { + multimap::iterator p = waiting_locks.find(fl.start); + while (p != waiting_locks.end()) { + if (p->second.start > fl.start) + return false; + if (p->second.length == fl.length && + p->second.client == fl.client && + p->second.pid == fl.pid && + p->second.pid_namespace == fl.pid_namespace) + return true; + ++p; + } + return false; + } + void remove_waiting(ceph_filelock& fl) { + multimap::iterator p = waiting_locks.find(fl.start); + while (p != waiting_locks.end()) { + if (p->second.start > fl.start) + return; + if (p->second.length == fl.length && + p->second.client == fl.client && + p->second.pid == fl.pid && + p->second.pid_namespace == fl.pid_namespace) { + waiting_locks.erase(p); + return; + } + ++p; + } + } + /* * Try to set a new lock. If it's blocked and wait_on_fail is true, * add the lock to waiting_locks. * The lock needs to be of type CEPH_LOCK_EXCL or CEPH_LOCK_SHARED. * + * If we already added ourselves to waiting_locks, did_wait will be + * true. If did_wait==true and we're not on the list, that means we + * were canceled and we should return an error. + * * Returns true if set, false if not set. */ bool add_lock(ceph_filelock& new_lock, bool wait_on_fail) { @@ -33,8 +78,9 @@ struct ceph_lock_state_t { bool ret = false; list::iterator> overlapping_locks, self_overlapping_locks, neighbor_locks; + // first, get any overlapping locks and split them into owned-by-us and not - if(get_overlapping_locks(new_lock, overlapping_locks, &neighbor_locks)) { + if (get_overlapping_locks(new_lock, overlapping_locks, &neighbor_locks)) { dout(15) << "got overlapping lock, splitting by owner" << dendl; split_by_owner(new_lock, overlapping_locks, self_overlapping_locks); } @@ -44,24 +90,19 @@ struct ceph_lock_state_t { dout(15) << "overlapping lock, and this lock is exclusive, can't set" << dendl; if (wait_on_fail) { - waiting_locks. - insert(pair(new_lock.start, new_lock)); + waiting_locks.insert(pair(new_lock.start, new_lock)); } - ret = false; } else { //shared lock, check for any exclusive locks blocking us if (contains_exclusive_lock(overlapping_locks)) { //blocked :( dout(15) << " blocked by exclusive lock in overlapping_locks" << dendl; if (wait_on_fail) { - waiting_locks. - insert(pair(new_lock.start, new_lock)); + waiting_locks.insert(pair(new_lock.start, new_lock)); } - ret = false; } else { //yay, we can insert a shared lock dout(15) << "inserting shared lock" << dendl; adjust_locks(self_overlapping_locks, new_lock, neighbor_locks); - held_locks. - insert(pair(new_lock.start, new_lock)); + held_locks.insert(pair(new_lock.start, new_lock)); ret = true; } } @@ -72,8 +113,10 @@ struct ceph_lock_state_t { (new_lock.start, new_lock)); ret = true; } - if (ret) ++client_held_lock_counts[(client_t)new_lock.client]; - else if (wait_on_fail) ++client_waiting_lock_counts[(client_t)new_lock.client]; + if (ret) + ++client_held_lock_counts[(client_t)new_lock.client]; + else if (wait_on_fail) + ++client_waiting_lock_counts[(client_t)new_lock.client]; return ret; } @@ -195,7 +238,8 @@ struct ceph_lock_state_t { if (!client_waiting_lock_counts[(client_t)cur_lock.client]) { client_waiting_lock_counts.erase((client_t)cur_lock.client); } - if(add_lock(cur_lock, true)) activated_locks.push_back(cur_lock); + if (add_lock(cur_lock, true)) + activated_locks.push_back(cur_lock); } } } -- 2.39.5