]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: fix canceled lock attempt
authorSage Weil <sage@newdream.net>
Wed, 25 May 2011 21:54:15 +0000 (14:54 -0700)
committerSage Weil <sage@newdream.net>
Wed, 25 May 2011 21:56:28 +0000 (14:56 -0700)
If client tries to lock a file, has to wait, and then cancels the attempt,
the client will send an unlock request to unwind its state.

 - the unlock now removes the waiting lock attempt from the wait list
 - when the lock request retries and finds it is no longer on the wait
   list it will fail.

Signed-off-by: Sage Weil <sage@newdream.net>
src/mds/MDCache.h
src/mds/Server.cc
src/mds/flock.h

index 07dc393757020f1405c6bc8b8538eec4010ec97e..5b3b041b3d141e91dec3c6071d66c6a1b4aade62 100644 (file)
@@ -306,6 +306,9 @@ struct MDRequest : public Mutation {
     map<client_t,uint64_t> sseq_map;
     map<CInode*, map<client_t,Capability::Export> > cap_imports;
     
+    // for lock/flock
+    bool flock_was_waiting;
+
     // for snaps
     version_t stid;
     bufferlist snapidbl;
@@ -317,6 +320,7 @@ struct MDRequest : public Mutation {
     More() : 
       src_reanchor_atid(0), dst_reanchor_atid(0), inode_import_v(0),
       destdn_was_remote_inode(0), was_link_merge(false),
+      flock_was_waiting(false),
       stid(0),
       slave_commit(0) { }
   } *_more;
index 95f81de17770e0000a164d7b2aec7d51cb3ddc0b..c1eceff53351c19b567863da10e0521d86c4c788 100644 (file)
@@ -2855,38 +2855,47 @@ void Server::handle_client_file_setlock(MDRequest *mdr)
     return;
   }
 
-  dout(0) << "state prior to lock change: " << *lock_state << dendl;;
+  dout(10) << " state prior to lock change: " << *lock_state << dendl;;
   if (CEPH_LOCK_UNLOCK == set_lock.type) {
-    dout(0) << "got unlock" << dendl;
+    dout(10) << " unlock attempt on " << set_lock << dendl;
     list<ceph_filelock> activated_locks;
-    lock_state->remove_lock(set_lock, activated_locks);
+    list<Context*> waiters;
+    if (lock_state->is_waiting(set_lock)) {
+      dout(10) << " unlock removing waiting lock " << set_lock << dendl;
+      lock_state->remove_waiting(set_lock);
+    } else {
+      dout(10) << " unlock attempt on " << set_lock << dendl;
+      lock_state->remove_lock(set_lock, activated_locks);
+      cur->take_waiting(CInode::WAIT_FLOCK, waiters);
+    }
     reply_request(mdr, 0);
     /* For now we're ignoring the activated locks because their responses
      * will be sent when the lock comes up again in rotation by the MDS.
      * It's a cheap hack, but it's easy to code. */
-    list<Context*> waiters;
-    cur->take_waiting(CInode::WAIT_FLOCK, waiters);
     mds->queue_waiters(waiters);
   } else {
-    dout(0) << "got lock" << dendl;
-    if (lock_state->add_lock(set_lock, will_wait)) {
-      // lock set successfully
-      dout(0) << "it succeeded" << dendl;
-      reply_request(mdr, 0);
-    } else {
-      dout(0) << "it failed on this attempt" << dendl;
+    dout(10) << " lock attempt on " << set_lock << dendl;
+    if (mdr->more()->flock_was_waiting &&
+       !lock_state->is_waiting(set_lock)) {
+      dout(10) << " was waiting for lock but not anymore, must have been canceled " << set_lock << dendl;
+      reply_request(mdr, -EINTR);
+    } else if (!lock_state->add_lock(set_lock, will_wait)) {
+      dout(10) << " it failed on this attempt" << dendl;
       // couldn't set lock right now
-      if (!will_wait)
-       reply_request(mdr, -1);
-      else {
-       dout(0) << "but it's a wait" << dendl;
+      if (!will_wait) {
+       reply_request(mdr, -EWOULDBLOCK);
+      } else {
+       dout(10) << " added to waiting list" << dendl;
+       assert(lock_state->is_waiting(set_lock));
+       mdr->more()->flock_was_waiting = true;
        mds->locker->drop_locks(mdr);
        mdr->drop_local_auth_pins();
        cur->add_waiter(CInode::WAIT_FLOCK, new C_MDS_RetryRequest(mdcache, mdr));
       }
-    }
+    } else
+      reply_request(mdr, 0);
   }
-  dout(0) << "state after lock change: " << *lock_state << dendl;
+  dout(10) << " state after lock change: " << *lock_state << dendl;
 }
 
 void Server::handle_client_file_readlock(MDRequest *mdr)
index 39be226ed3a2036be58910ef097089f6d703e76d..f6194653b595c6a605d85c859be9bb46fd22acba 100644 (file)
@@ -3,6 +3,8 @@
 #ifndef CEPH_MDS_FLOCK_H
 #define CEPH_MDS_FLOCK_H
 
+#include <errno.h>
+
 #include "mdstypes.h"
 
 
@@ -14,6 +16,15 @@ inline ostream& operator<<(ostream& out, ceph_filelock& l) {
   return out;
 }
 
+inline bool operator==(ceph_filelock& l, ceph_filelock& r) {
+  return
+    l.length == r.length &&
+    l.client == r.client &&
+    l.pid == r.pid &&
+    l.pid_namespace == r.pid_namespace &&
+    l.type == r.type;
+}
+
 struct ceph_lock_state_t {
   multimap<uint64_t, ceph_filelock> held_locks;    // current locks
   multimap<uint64_t, ceph_filelock> waiting_locks; // locks waiting for other locks
@@ -21,11 +32,45 @@ struct ceph_lock_state_t {
   map<client_t, int> client_held_lock_counts;
   map<client_t, int> client_waiting_lock_counts;
 
+  bool is_waiting(ceph_filelock &fl) {
+    multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
+    while (p != waiting_locks.end()) {
+      if (p->second.start > fl.start)
+       return false;
+      if (p->second.length == fl.length &&
+         p->second.client == fl.client &&
+         p->second.pid == fl.pid &&
+         p->second.pid_namespace == fl.pid_namespace)
+       return true;
+      ++p;
+    }
+    return false;
+  }
+  void remove_waiting(ceph_filelock& fl) {
+    multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
+    while (p != waiting_locks.end()) {
+      if (p->second.start > fl.start)
+       return;
+      if (p->second.length == fl.length &&
+         p->second.client == fl.client &&
+         p->second.pid == fl.pid &&
+         p->second.pid_namespace == fl.pid_namespace) {
+       waiting_locks.erase(p);
+       return;
+      }
+      ++p;
+    }
+  }
+
   /*
    * Try to set a new lock. If it's blocked and wait_on_fail is true,
    * add the lock to waiting_locks.
    * The lock needs to be of type CEPH_LOCK_EXCL or CEPH_LOCK_SHARED.
    *
+   * If we already added ourselves to waiting_locks, did_wait will be
+   * true.  If did_wait==true and we're not on the list, that means we
+   * were canceled and we should return an error.
+   *
    * Returns true if set, false if not set.
    */
   bool add_lock(ceph_filelock& new_lock, bool wait_on_fail) {
@@ -33,8 +78,9 @@ struct ceph_lock_state_t {
     bool ret = false;
     list<multimap<uint64_t, ceph_filelock>::iterator>
       overlapping_locks, self_overlapping_locks, neighbor_locks;
+
     // first, get any overlapping locks and split them into owned-by-us and not
-    if(get_overlapping_locks(new_lock, overlapping_locks, &neighbor_locks)) {
+    if (get_overlapping_locks(new_lock, overlapping_locks, &neighbor_locks)) {
       dout(15) << "got overlapping lock, splitting by owner" << dendl;
       split_by_owner(new_lock, overlapping_locks, self_overlapping_locks);
     }
@@ -44,24 +90,19 @@ struct ceph_lock_state_t {
        dout(15) << "overlapping lock, and this lock is exclusive, can't set"
                << dendl;
        if (wait_on_fail) {
-         waiting_locks.
-           insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+         waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
        }
-       ret = false;
       } else { //shared lock, check for any exclusive locks blocking us
        if (contains_exclusive_lock(overlapping_locks)) { //blocked :(
          dout(15) << " blocked by exclusive lock in overlapping_locks" << dendl;
          if (wait_on_fail) {
-           waiting_locks.
-             insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+           waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
          }
-         ret = false;
        } else {
          //yay, we can insert a shared lock
          dout(15) << "inserting shared lock" << dendl;
          adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
-         held_locks.
-           insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+         held_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
          ret = true;
        }
       }
@@ -72,8 +113,10 @@ struct ceph_lock_state_t {
                        (new_lock.start, new_lock));
       ret = true;
     }
-    if (ret) ++client_held_lock_counts[(client_t)new_lock.client];
-    else if (wait_on_fail) ++client_waiting_lock_counts[(client_t)new_lock.client];
+    if (ret)
+      ++client_held_lock_counts[(client_t)new_lock.client];
+    else if (wait_on_fail)
+      ++client_waiting_lock_counts[(client_t)new_lock.client];
     return ret;
   }
 
@@ -195,7 +238,8 @@ struct ceph_lock_state_t {
        if (!client_waiting_lock_counts[(client_t)cur_lock.client]) {
          client_waiting_lock_counts.erase((client_t)cur_lock.client);
        }
-       if(add_lock(cur_lock, true)) activated_locks.push_back(cur_lock);
+       if (add_lock(cur_lock, true))
+         activated_locks.push_back(cur_lock);
       }
     }
   }