mds: fix canceled lock attempt

author Sage Weil <sage@newdream.net>

Wed, 25 May 2011 21:54:15 +0000 (14:54 -0700)

committer Sage Weil <sage@newdream.net>

Wed, 25 May 2011 21:56:28 +0000 (14:56 -0700)
author Sage Weil <sage@newdream.net>
Wed, 25 May 2011 21:54:15 +0000 (14:54 -0700)
committer Sage Weil <sage@newdream.net>
Wed, 25 May 2011 21:56:28 +0000 (14:56 -0700)
diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h

index 07dc393757020f1405c6bc8b8538eec4010ec97e..5b3b041b3d141e91dec3c6071d66c6a1b4aade62 100644 (file)
--- a/src/mds/MDCache.h
+++ b/src/mds/MDCache.h
@@ -306,6 +306,9 @@ struct MDRequest : public Mutation {
      map<client_t,uint64_t> sseq_map;
      map<CInode*, map<client_t,Capability::Export> > cap_imports;
      
+    // for lock/flock
+    bool flock_was_waiting;
+
      // for snaps
      version_t stid;
      bufferlist snapidbl;
@@ -317,6 +320,7 @@ struct MDRequest : public Mutation {
      More() : 
        src_reanchor_atid(0), dst_reanchor_atid(0), inode_import_v(0),
        destdn_was_remote_inode(0), was_link_merge(false),
+      flock_was_waiting(false),
        stid(0),
        slave_commit(0) { }
    } *_more;
diff --git a/src/mds/Server.cc b/src/mds/Server.cc

index 95f81de17770e0000a164d7b2aec7d51cb3ddc0b..c1eceff53351c19b567863da10e0521d86c4c788 100644 (file)
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -2855,38 +2855,47 @@ void Server::handle_client_file_setlock(MDRequest *mdr)
      return;
    }
  
-  dout(0) << "state prior to lock change: " << *lock_state << dendl;;
+  dout(10) << " state prior to lock change: " << *lock_state << dendl;;
    if (CEPH_LOCK_UNLOCK == set_lock.type) {
-    dout(0) << "got unlock" << dendl;
+    dout(10) << " unlock attempt on " << set_lock << dendl;
      list<ceph_filelock> activated_locks;
-    lock_state->remove_lock(set_lock, activated_locks);
+    list<Context*> waiters;
+    if (lock_state->is_waiting(set_lock)) {
+      dout(10) << " unlock removing waiting lock " << set_lock << dendl;
+      lock_state->remove_waiting(set_lock);
+    } else {
+      dout(10) << " unlock attempt on " << set_lock << dendl;
+      lock_state->remove_lock(set_lock, activated_locks);
+      cur->take_waiting(CInode::WAIT_FLOCK, waiters);
+    }
      reply_request(mdr, 0);
      /* For now we're ignoring the activated locks because their responses
       * will be sent when the lock comes up again in rotation by the MDS.
       * It's a cheap hack, but it's easy to code. */
-    list<Context*> waiters;
-    cur->take_waiting(CInode::WAIT_FLOCK, waiters);
      mds->queue_waiters(waiters);
    } else {
-    dout(0) << "got lock" << dendl;
-    if (lock_state->add_lock(set_lock, will_wait)) {
-      // lock set successfully
-      dout(0) << "it succeeded" << dendl;
-      reply_request(mdr, 0);
-    } else {
-      dout(0) << "it failed on this attempt" << dendl;
+    dout(10) << " lock attempt on " << set_lock << dendl;
+    if (mdr->more()->flock_was_waiting &&
+       !lock_state->is_waiting(set_lock)) {
+      dout(10) << " was waiting for lock but not anymore, must have been canceled " << set_lock << dendl;
+      reply_request(mdr, -EINTR);
+    } else if (!lock_state->add_lock(set_lock, will_wait)) {
+      dout(10) << " it failed on this attempt" << dendl;
        // couldn't set lock right now
-      if (!will_wait)
-       reply_request(mdr, -1);
-      else {
-       dout(0) << "but it's a wait" << dendl;
+      if (!will_wait) {
+       reply_request(mdr, -EWOULDBLOCK);
+      } else {
+       dout(10) << " added to waiting list" << dendl;
+       assert(lock_state->is_waiting(set_lock));
+       mdr->more()->flock_was_waiting = true;
         mds->locker->drop_locks(mdr);
         mdr->drop_local_auth_pins();
         cur->add_waiter(CInode::WAIT_FLOCK, new C_MDS_RetryRequest(mdcache, mdr));
        }
-    }
+    } else
+      reply_request(mdr, 0);
    }
-  dout(0) << "state after lock change: " << *lock_state << dendl;
+  dout(10) << " state after lock change: " << *lock_state << dendl;
  }
  
  void Server::handle_client_file_readlock(MDRequest *mdr)
diff --git a/src/mds/flock.h b/src/mds/flock.h

index 39be226ed3a2036be58910ef097089f6d703e76d..f6194653b595c6a605d85c859be9bb46fd22acba 100644 (file)
--- a/src/mds/flock.h
+++ b/src/mds/flock.h
@@ -3,6 +3,8 @@
  #ifndef CEPH_MDS_FLOCK_H
  #define CEPH_MDS_FLOCK_H
  
+#include <errno.h>
+
  #include "mdstypes.h"
  
  
@@ -14,6 +16,15 @@ inline ostream& operator<<(ostream& out, ceph_filelock& l) {
    return out;
  }
  
+inline bool operator==(ceph_filelock& l, ceph_filelock& r) {
+  return
+    l.length == r.length &&
+    l.client == r.client &&
+    l.pid == r.pid &&
+    l.pid_namespace == r.pid_namespace &&
+    l.type == r.type;
+}
+
  struct ceph_lock_state_t {
    multimap<uint64_t, ceph_filelock> held_locks;    // current locks
    multimap<uint64_t, ceph_filelock> waiting_locks; // locks waiting for other locks
@@ -21,11 +32,45 @@ struct ceph_lock_state_t {
    map<client_t, int> client_held_lock_counts;
    map<client_t, int> client_waiting_lock_counts;
  
+  bool is_waiting(ceph_filelock &fl) {
+    multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
+    while (p != waiting_locks.end()) {
+      if (p->second.start > fl.start)
+       return false;
+      if (p->second.length == fl.length &&
+         p->second.client == fl.client &&
+         p->second.pid == fl.pid &&
+         p->second.pid_namespace == fl.pid_namespace)
+       return true;
+      ++p;
+    }
+    return false;
+  }
+  void remove_waiting(ceph_filelock& fl) {
+    multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
+    while (p != waiting_locks.end()) {
+      if (p->second.start > fl.start)
+       return;
+      if (p->second.length == fl.length &&
+         p->second.client == fl.client &&
+         p->second.pid == fl.pid &&
+         p->second.pid_namespace == fl.pid_namespace) {
+       waiting_locks.erase(p);
+       return;
+      }
+      ++p;
+    }
+  }
+
    /*
     * Try to set a new lock. If it's blocked and wait_on_fail is true,
     * add the lock to waiting_locks.
     * The lock needs to be of type CEPH_LOCK_EXCL or CEPH_LOCK_SHARED.
     *
+   * If we already added ourselves to waiting_locks, did_wait will be
+   * true.  If did_wait==true and we're not on the list, that means we
+   * were canceled and we should return an error.
+   *
     * Returns true if set, false if not set.
     */
    bool add_lock(ceph_filelock& new_lock, bool wait_on_fail) {
@@ -33,8 +78,9 @@ struct ceph_lock_state_t {
      bool ret = false;
      list<multimap<uint64_t, ceph_filelock>::iterator>
        overlapping_locks, self_overlapping_locks, neighbor_locks;
+
      // first, get any overlapping locks and split them into owned-by-us and not
-    if(get_overlapping_locks(new_lock, overlapping_locks, &neighbor_locks)) {
+    if (get_overlapping_locks(new_lock, overlapping_locks, &neighbor_locks)) {
        dout(15) << "got overlapping lock, splitting by owner" << dendl;
        split_by_owner(new_lock, overlapping_locks, self_overlapping_locks);
      }
@@ -44,24 +90,19 @@ struct ceph_lock_state_t {
         dout(15) << "overlapping lock, and this lock is exclusive, can't set"
                 << dendl;
         if (wait_on_fail) {
-         waiting_locks.
-           insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+         waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
         }
-       ret = false;
        } else { //shared lock, check for any exclusive locks blocking us
         if (contains_exclusive_lock(overlapping_locks)) { //blocked :(
           dout(15) << " blocked by exclusive lock in overlapping_locks" << dendl;
           if (wait_on_fail) {
-           waiting_locks.
-             insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+           waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
           }
-         ret = false;
         } else {
           //yay, we can insert a shared lock
           dout(15) << "inserting shared lock" << dendl;
           adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
-         held_locks.
-           insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+         held_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
           ret = true;
         }
        }
@@ -72,8 +113,10 @@ struct ceph_lock_state_t {
                         (new_lock.start, new_lock));
        ret = true;
      }
-    if (ret) ++client_held_lock_counts[(client_t)new_lock.client];
-    else if (wait_on_fail) ++client_waiting_lock_counts[(client_t)new_lock.client];
+    if (ret)
+      ++client_held_lock_counts[(client_t)new_lock.client];
+    else if (wait_on_fail)
+      ++client_waiting_lock_counts[(client_t)new_lock.client];
      return ret;
    }
  
@@ -195,7 +238,8 @@ struct ceph_lock_state_t {
         if (!client_waiting_lock_counts[(client_t)cur_lock.client]) {
           client_waiting_lock_counts.erase((client_t)cur_lock.client);
         }
-       if(add_lock(cur_lock, true)) activated_locks.push_back(cur_lock);
+       if (add_lock(cur_lock, true))
+         activated_locks.push_back(cur_lock);
        }
      }
    }
author	Sage Weil <sage@newdream.net>
	Wed, 25 May 2011 21:54:15 +0000 (14:54 -0700)
committer	Sage Weil <sage@newdream.net>
	Wed, 25 May 2011 21:56:28 +0000 (14:56 -0700)
src/mds/MDCache.h		patch \| blob \| history
src/mds/Server.cc		patch \| blob \| history
src/mds/flock.h		patch \| blob \| history