]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
flock: move implementation into a separate c++ file
authorGreg Farnum <gregory.farnum@dreamhost.com>
Fri, 2 Sep 2011 00:10:50 +0000 (17:10 -0700)
committerGreg Farnum <gregory.farnum@dreamhost.com>
Fri, 2 Sep 2011 00:10:50 +0000 (17:10 -0700)
Signed-off-by: Greg Farnum <gregory.farnum@dreamhost.com>
src/Makefile.am
src/mds/flock.cc [new file with mode: 0644]
src/mds/flock.h

index e53b68def915076f95d69d1f6d2499999166153f..3e9fa587142cb95168fefd8be75fcf21ca1ffee6 100644 (file)
@@ -885,6 +885,7 @@ libmds_a_SOURCES = \
        mds/Dumper.cc \
        mds/Resetter.cc \
        mds/MDS.cc \
+       mds/flock.cc \
        mds/locks.c \
        mds/journal.cc \
        mds/Server.cc \
@@ -1118,7 +1119,6 @@ noinst_HEADERS = \
        include/rbd/librbd.h\
        include/rbd/librbd.hpp\
        logrotate.conf\
-       mds/flock.h\
        mds/inode_backtrace.h\
        mds/locks.c\
        mds/locks.h\
diff --git a/src/mds/flock.cc b/src/mds/flock.cc
new file mode 100644 (file)
index 0000000..c890617
--- /dev/null
@@ -0,0 +1,515 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+#include <errno.h>
+
+#include "common/debug.h"
+#include "mdstypes.h"
+#include "mds/flock.h"
+
+bool ceph_lock_state_t::is_waiting(ceph_filelock &fl)
+{
+  multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
+  while (p != waiting_locks.end()) {
+    if (p->second.start > fl.start)
+      return false;
+    if (p->second.length == fl.length &&
+        p->second.client == fl.client &&
+        p->second.pid == fl.pid &&
+        p->second.pid_namespace == fl.pid_namespace)
+      return true;
+    ++p;
+  }
+  return false;
+}
+
+void ceph_lock_state_t::remove_waiting(ceph_filelock& fl)
+{
+  multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
+  while (p != waiting_locks.end()) {
+    if (p->second.start > fl.start)
+      return;
+    if (p->second.length == fl.length &&
+        p->second.client == fl.client &&
+        p->second.pid == fl.pid &&
+        p->second.pid_namespace == fl.pid_namespace) {
+      waiting_locks.erase(p);
+      return;
+    }
+    ++p;
+  }
+}
+
+bool ceph_lock_state_t::add_lock(ceph_filelock& new_lock, bool wait_on_fail)
+{
+  dout(15) << "add_lock " << new_lock << dendl;
+  bool ret = false;
+  list<multimap<uint64_t, ceph_filelock>::iterator>
+    overlapping_locks, self_overlapping_locks, neighbor_locks;
+
+  // first, get any overlapping locks and split them into owned-by-us and not
+  if (get_overlapping_locks(new_lock, overlapping_locks, &neighbor_locks)) {
+    dout(15) << "got overlapping lock, splitting by owner" << dendl;
+    split_by_owner(new_lock, overlapping_locks, self_overlapping_locks);
+  }
+  if (!overlapping_locks.empty()) { //overlapping locks owned by others :(
+    if (CEPH_LOCK_EXCL == new_lock.type) {
+      //can't set, we want an exclusive
+      dout(15) << "overlapping lock, and this lock is exclusive, can't set"
+              << dendl;
+      if (wait_on_fail) {
+        waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+      }
+    } else { //shared lock, check for any exclusive locks blocking us
+      if (contains_exclusive_lock(overlapping_locks)) { //blocked :(
+        dout(15) << " blocked by exclusive lock in overlapping_locks" << dendl;
+        if (wait_on_fail) {
+          waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+        }
+      } else {
+        //yay, we can insert a shared lock
+        dout(15) << "inserting shared lock" << dendl;
+        adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
+        held_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+        ret = true;
+      }
+    }
+  } else { //no overlapping locks except our own
+    adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
+    dout(15) << "no conflicts, inserting " << new_lock << dendl;
+    held_locks.insert(pair<uint64_t, ceph_filelock>
+                      (new_lock.start, new_lock));
+    ret = true;
+  }
+  if (ret)
+    ++client_held_lock_counts[(client_t)new_lock.client];
+  else if (wait_on_fail)
+    ++client_waiting_lock_counts[(client_t)new_lock.client];
+  return ret;
+}
+
+void ceph_lock_state_t::look_for_lock(ceph_filelock& testing_lock)
+{
+  list<multimap<uint64_t, ceph_filelock>::iterator> overlapping_locks,
+    self_overlapping_locks;
+  if (get_overlapping_locks(testing_lock, overlapping_locks)) {
+    split_by_owner(testing_lock, overlapping_locks, self_overlapping_locks);
+  }
+  if (!overlapping_locks.empty()) { //somebody else owns overlapping lock
+    if (CEPH_LOCK_EXCL == testing_lock.type) { //any lock blocks it
+      testing_lock = (*overlapping_locks.begin())->second;
+    } else {
+      ceph_filelock *blocking_lock;
+      if ((blocking_lock = contains_exclusive_lock(overlapping_locks))) {
+        testing_lock = *blocking_lock;
+      } else { //nothing blocking!
+        testing_lock.type = CEPH_LOCK_UNLOCK;
+      }
+    }
+    return;
+  }
+  //if we get here, only our own locks block
+  testing_lock.type = CEPH_LOCK_UNLOCK;
+}
+
+void ceph_lock_state_t::remove_lock(ceph_filelock removal_lock,
+                 list<ceph_filelock>& activated_locks)
+{
+  list<multimap<uint64_t, ceph_filelock>::iterator> overlapping_locks,
+    self_overlapping_locks, crossed_waiting_locks;
+  if (get_overlapping_locks(removal_lock, overlapping_locks)) {
+    dout(15) << "splitting by owner" << dendl;
+    split_by_owner(removal_lock, overlapping_locks, self_overlapping_locks);
+  } else dout(15) << "attempt to remove lock at " << removal_lock.start
+                 << " but no locks there!" << dendl;
+  bool remove_to_end = (0 == removal_lock.length);
+  bool old_lock_to_end;
+  uint64_t removal_start = removal_lock.start;
+  uint64_t removal_end = removal_start + removal_lock.length - 1;
+  uint64_t old_lock_end;
+  __s64 old_lock_client = 0;
+  ceph_filelock *old_lock;
+
+  dout(15) << "examining " << self_overlapping_locks.size()
+          << " self-overlapping locks for removal" << dendl;
+  for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+         iter = self_overlapping_locks.begin();
+       iter != self_overlapping_locks.end();
+       ++iter) {
+    dout(15) << "self overlapping lock " << (*iter)->second << dendl;
+    old_lock = &(*iter)->second;
+    old_lock_to_end = (0 == old_lock->length);
+    old_lock_end = old_lock->start + old_lock->length - 1;
+    old_lock_client = old_lock->client;
+    if (remove_to_end) {
+      if (old_lock->start < removal_start) {
+        old_lock->length = removal_start - old_lock->start;
+      } else {
+        dout(15) << "erasing " << (*iter)->second << dendl;
+        held_locks.erase(*iter);
+        --client_held_lock_counts[old_lock_client];
+      }
+    } else if (old_lock_to_end) {
+      ceph_filelock append_lock = *old_lock;
+      append_lock.start = removal_end+1;
+      held_locks.insert(pair<uint64_t, ceph_filelock>
+                        (append_lock.start, append_lock));
+      ++client_held_lock_counts[(client_t)old_lock->client];
+      if (old_lock->start >= removal_start) {
+        dout(15) << "erasing " << (*iter)->second << dendl;
+        held_locks.erase(*iter);
+        --client_held_lock_counts[old_lock_client];
+      } else old_lock->length = removal_start - old_lock->start;
+    } else {
+      if (old_lock_end  > removal_end) {
+        ceph_filelock append_lock = *old_lock;
+        append_lock.start = removal_end + 1;
+        append_lock.length = old_lock_end - append_lock.start + 1;
+        held_locks.insert(pair<uint64_t, ceph_filelock>
+                          (append_lock.start, append_lock));
+        ++client_held_lock_counts[(client_t)old_lock->client];
+      }
+      if (old_lock->start < removal_start) {
+        old_lock->length = removal_start - old_lock->start;
+      } else {
+        dout(15) << "erasing " << (*iter)->second << dendl;
+        held_locks.erase(*iter);
+        --client_held_lock_counts[old_lock_client];
+      }
+    }
+    if (!client_held_lock_counts[old_lock_client]) {
+      client_held_lock_counts.erase(old_lock_client);
+    }
+  }
+
+  /* okay, we've removed the locks, but removing them might allow some
+   * other waiting locks to come through */
+  if (get_waiting_overlaps(removal_lock, crossed_waiting_locks)) {
+    /*let's do this the SUPER lazy way for now. Should work out something
+      that's slightly less slow and wasteful, though.
+      1) Remove lock from waiting_locks.
+      2) attempt to insert lock via add_lock
+      3) Add to success list if we get back "true"
+
+      In the future, should probably set this up to detect some
+      guaranteed blocks and do fewer map lookups.
+     */
+    for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+           iter = crossed_waiting_locks.begin();
+         iter != crossed_waiting_locks.end();
+         ++iter) {
+      ceph_filelock cur_lock = (*iter)->second;
+      waiting_locks.erase(*iter);
+      --client_waiting_lock_counts[(client_t)cur_lock.client];
+      if (!client_waiting_lock_counts[(client_t)cur_lock.client]) {
+        client_waiting_lock_counts.erase((client_t)cur_lock.client);
+      }
+      if (add_lock(cur_lock, true))
+        activated_locks.push_back(cur_lock);
+    }
+  }
+}
+
+bool ceph_lock_state_t::remove_all_from (client_t client)
+{
+  bool cleared_any = false;
+  if (client_held_lock_counts.count(client)) {
+    remove_all_from(client, held_locks);
+    client_held_lock_counts.erase(client);
+    cleared_any = true;
+  }
+  if (client_waiting_lock_counts.count(client)) {
+    remove_all_from(client, waiting_locks);
+    client_waiting_lock_counts.erase(client);
+  }
+  return cleared_any;
+}
+
+void ceph_lock_state_t::adjust_locks(list<multimap<uint64_t, ceph_filelock>::iterator> old_locks,
+                  ceph_filelock& new_lock,
+                  list<multimap<uint64_t, ceph_filelock>::iterator>
+                  neighbor_locks)
+{
+  dout(15) << "adjust_locks" << dendl;
+  bool new_lock_to_end = (0 == new_lock.length);
+  bool old_lock_to_end;
+  uint64_t new_lock_start = new_lock.start;
+  uint64_t new_lock_end = new_lock.start + new_lock.length - 1;
+  uint64_t old_lock_start, old_lock_end;
+  __s64 old_lock_client = 0;
+  ceph_filelock *old_lock;
+  for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+         iter = old_locks.begin();
+       iter != old_locks.end();
+       ++iter) {
+    old_lock = &(*iter)->second;
+    dout(15) << "adjusting lock: " << *old_lock << dendl;
+    old_lock_to_end = (0 == old_lock->length);
+    old_lock_start = old_lock->start;
+    old_lock_end = old_lock->start + old_lock->length - 1;
+    new_lock_start = new_lock.start;
+    new_lock_end = new_lock.start + new_lock.length - 1;
+    old_lock_client = old_lock->client;
+    if (new_lock_to_end || old_lock_to_end) {
+      //special code path to deal with a length set at 0
+      dout(15) << "one lock extends forever" << dendl;
+      if (old_lock->type == new_lock.type) {
+        //just unify them in new lock, remove old lock
+        dout(15) << "same lock type, unifying" << dendl;
+        new_lock.start = (new_lock_start < old_lock_start) ? new_lock_start :
+          old_lock_start;
+        new_lock.length = 0;
+        held_locks.erase(*iter);
+        --client_held_lock_counts[old_lock_client];
+      } else { //not same type, have to keep any remains of old lock around
+        dout(15) << "shrinking old lock" << dendl;
+        if (new_lock_to_end) {
+          if (old_lock_start < new_lock_start) {
+            old_lock->length = new_lock_start - old_lock_start;
+          } else {
+            held_locks.erase(*iter);
+            --client_held_lock_counts[old_lock_client];
+          }
+        } else { //old lock extends past end of new lock
+          ceph_filelock appended_lock = *old_lock;
+          appended_lock.start = new_lock_end + 1;
+          held_locks.insert(pair<uint64_t, ceph_filelock>
+                            (appended_lock.start, appended_lock));
+          ++client_held_lock_counts[(client_t)old_lock->client];
+          if (old_lock_start < new_lock_start) {
+            old_lock->length = new_lock_start - old_lock_start;
+          } else {
+            held_locks.erase(*iter);
+            --client_held_lock_counts[old_lock_client];
+          }
+        }
+      }
+    } else {
+      if (old_lock->type == new_lock.type) { //just merge them!
+        dout(15) << "merging locks, they're the same type" << dendl;
+        new_lock.start = (old_lock_start < new_lock_start ) ? old_lock_start :
+          new_lock_start;
+        int new_end = (new_lock_end > old_lock_end) ? new_lock_end :
+          old_lock_end;
+        new_lock.length = new_end - new_lock.start + 1;
+        dout(15) << "erasing lock " << (*iter)->second << dendl;
+        held_locks.erase(*iter);
+        --client_held_lock_counts[old_lock_client];
+      } else { //we'll have to update sizes and maybe make new locks
+        dout(15) << "locks aren't same type, changing sizes" << dendl;
+        if (old_lock_end > new_lock_end) { //add extra lock after new_lock
+          ceph_filelock appended_lock = *old_lock;
+          appended_lock.start = new_lock_end + 1;
+          appended_lock.length = old_lock_end - appended_lock.start + 1;
+          held_locks.insert(pair<uint64_t, ceph_filelock>
+                            (appended_lock.start, appended_lock));
+          ++client_held_lock_counts[(client_t)old_lock->client];
+        }
+        if (old_lock_start < new_lock_start) {
+          old_lock->length = new_lock_start - old_lock_start;
+        } else { //old_lock starts inside new_lock, so remove it
+          //if it extended past new_lock_end it's been replaced
+          held_locks.erase(*iter);
+          --client_held_lock_counts[old_lock_client];
+        }
+      }
+    }
+    if (!client_held_lock_counts[old_lock_client]) {
+      client_held_lock_counts.erase(old_lock_client);
+    }
+  }
+
+  //make sure to coalesce neighboring locks
+  for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+         iter = neighbor_locks.begin();
+       iter != neighbor_locks.end();
+       ++iter) {
+    old_lock = &(*iter)->second;
+    old_lock_client = old_lock->client;
+    dout(15) << "lock to coalesce: " << *old_lock << dendl;
+    /* because if it's a neibhoring lock there can't be any self-overlapping
+       locks that covered it */
+    if (old_lock->type == new_lock.type) { //merge them
+      if (0 == new_lock.length) {
+        if (old_lock->start + old_lock->length == new_lock.start) {
+          new_lock.start = old_lock->start;
+        } else assert(0); /* if there's no end to new_lock, the neighbor
+                             HAS TO be to left side */
+      } else if (0 == old_lock->length) {
+        if (new_lock.start + new_lock.length == old_lock->start) {
+          new_lock.length = 0;
+        } else assert(0); //same as before, but reversed
+      } else {
+        if (old_lock->start + old_lock->length == new_lock.start) {
+          new_lock.start = old_lock->start;
+          new_lock.length = old_lock->length + new_lock.length;
+        } else if (new_lock.start + new_lock.length == old_lock->start) {
+          new_lock.length = old_lock->length + new_lock.length;
+        }
+      }
+      held_locks.erase(*iter);
+      --client_held_lock_counts[old_lock_client];
+    }
+    if (!client_held_lock_counts[old_lock_client]) {
+      client_held_lock_counts.erase(old_lock_client);
+    }
+  }
+}
+
+void ceph_lock_state_t::remove_all_from(client_t client,
+                                        multimap<uint64_t,
+                                          ceph_filelock>& locks)
+{
+  multimap<uint64_t, ceph_filelock>::iterator iter = locks.begin();
+  while (iter != locks.end()) {
+    if ((client_t)iter->second.client == client) {
+      locks.erase(iter++);
+    } else ++iter;
+  }
+}
+
+multimap<uint64_t, ceph_filelock>::iterator
+ceph_lock_state_t::get_lower_bound(uint64_t start,
+                                   multimap<uint64_t, ceph_filelock>& lock_map)
+{
+   multimap<uint64_t, ceph_filelock>::iterator lower_bound =
+     lock_map.lower_bound(start);
+   if ((lower_bound->first != start)
+       && (start != 0)
+       && (lower_bound != lock_map.begin())) --lower_bound;
+   if (lock_map.end() == lower_bound)
+     dout(15) << "get_lower_dout(15)eturning end()" << dendl;
+   else dout(15) << "get_lower_bound returning iterator pointing to "
+                << lower_bound->second << dendl;
+   return lower_bound;
+ }
+
+multimap<uint64_t, ceph_filelock>::iterator
+ceph_lock_state_t::get_last_before(uint64_t end,
+                                   multimap<uint64_t, ceph_filelock>& lock_map)
+{
+  multimap<uint64_t, ceph_filelock>::iterator last =
+    lock_map.upper_bound(end);
+  if (last != lock_map.begin()) --last;
+  if (lock_map.end() == last)
+    dout(15) << "get_last_before returning end()" << dendl;
+  else dout(15) << "get_last_before returning iterator pointing to "
+               << last->second << dendl;
+  return last;
+}
+
+bool ceph_lock_state_t::share_space(
+    multimap<uint64_t, ceph_filelock>::iterator& iter,
+    uint64_t start, uint64_t end)
+{
+  bool ret = ((iter->first >= start && iter->first <= end) ||
+              ((iter->first < start) &&
+               (((iter->first + iter->second.length - 1) >= start) ||
+                (0 == iter->second.length))));
+  dout(15) << "share_space got start: " << start << ", end: " << end
+          << ", lock: " << iter->second << ", returning " << ret << dendl;
+  return ret;
+}
+
+bool ceph_lock_state_t::get_overlapping_locks(ceph_filelock& lock,
+                           list<multimap<uint64_t,
+                               ceph_filelock>::iterator> & overlaps,
+                           list<multimap<uint64_t,
+                               ceph_filelock>::iterator> *self_neighbors)
+{
+  dout(15) << "get_overlapping_locks" << dendl;
+  // create a lock starting one earlier and ending one later
+  // to check for neighbors
+  ceph_filelock neighbor_check_lock = lock;
+  if (neighbor_check_lock.start != 0) {
+    neighbor_check_lock.start = neighbor_check_lock.start - 1;
+    if (neighbor_check_lock.length)
+      neighbor_check_lock.length = neighbor_check_lock.length + 2;
+  } else {
+    if (neighbor_check_lock.length)
+      neighbor_check_lock.length = neighbor_check_lock.length + 1;
+  }
+  //find the last held lock starting at the point after lock
+  uint64_t endpoint = lock.start;
+  if (lock.length) {
+    endpoint += lock.length;
+  } else {
+    endpoint = uint64_t(-1); // max offset
+  }
+  multimap<uint64_t, ceph_filelock>::iterator iter =
+    get_last_before(endpoint, held_locks);
+  bool cont = iter != held_locks.end();
+  while(cont) {
+    if (share_space(iter, lock)) {
+      overlaps.push_front(iter);
+    } else if (self_neighbors &&
+               (neighbor_check_lock.client == iter->second.client) &&
+               (neighbor_check_lock.pid == iter->second.pid) &&
+               share_space(iter, neighbor_check_lock)) {
+      self_neighbors->push_front(iter);
+    }
+    if ((iter->first < lock.start) && (CEPH_LOCK_EXCL == iter->second.type)) {
+      //can't be any more overlapping locks or they'd interfere with this one
+      cont = false;
+    } else if (held_locks.begin() == iter) cont = false;
+    else --iter;
+  }
+  return !overlaps.empty();
+}
+
+bool ceph_lock_state_t::get_waiting_overlaps(ceph_filelock& lock,
+                                             list<multimap<uint64_t,
+                                               ceph_filelock>::iterator>&
+                                               overlaps)
+{
+  dout(15) << "get_waiting_overlaps" << dendl;
+  multimap<uint64_t, ceph_filelock>::iterator iter =
+    get_last_before(lock.start + lock.length - 1, waiting_locks);
+  bool cont = iter != waiting_locks.end();
+  while(cont) {
+    if (share_space(iter, lock)) overlaps.push_front(iter);
+    if (waiting_locks.begin() == iter) cont = false;
+    --iter;
+  }
+  return !overlaps.empty();
+}
+
+void ceph_lock_state_t::split_by_owner(ceph_filelock& owner,
+                                       list<multimap<uint64_t,
+                                           ceph_filelock>::iterator>& locks,
+                                       list<multimap<uint64_t,
+                                           ceph_filelock>::iterator>&
+                                           owned_locks)
+{
+  list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+    iter = locks.begin();
+  dout(15) << "owner lock: " << owner << dendl;
+  while (iter != locks.end()) {
+    dout(15) << "comparing to " << (*iter)->second << dendl;
+    if ((*iter)->second.client == owner.client &&
+        (*iter)->second.pid_namespace == owner.pid_namespace &&
+        (*iter)->second.pid == owner.pid) {
+      dout(15) << "success, pushing to owned_locks" << dendl;
+      owned_locks.push_back(*iter);
+      iter = locks.erase(iter);
+    } else {
+      dout(15) << "failure, something not equal in this group "
+              << (*iter)->second.client << ":" << owner.client << ","
+              << (*iter)->second.pid_namespace << ":" << owner.pid_namespace
+              << "," << (*iter)->second.pid << ":" << owner.pid << dendl;
+      ++iter;
+    }
+  }
+}
+
+ceph_filelock *
+ceph_lock_state_t::contains_exclusive_lock(list<multimap<uint64_t,
+                                               ceph_filelock>::iterator>& locks)
+{
+  for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
+         iter = locks.begin();
+       iter != locks.end();
+       ++iter) {
+    if (CEPH_LOCK_EXCL == (*iter)->second.type) return &(*iter)->second;
+  }
+  return NULL;
+}
index 69228b46c90a450d07cae122c30bc48178f72903..21dc3318fb0af2a170bf6d8b31dbcfb6397d3f5d 100644 (file)
@@ -26,42 +26,16 @@ inline bool operator==(ceph_filelock& l, ceph_filelock& r) {
     l.type == r.type;
 }
 
-struct ceph_lock_state_t {
+class ceph_lock_state_t {
+public:
   multimap<uint64_t, ceph_filelock> held_locks;    // current locks
   multimap<uint64_t, ceph_filelock> waiting_locks; // locks waiting for other locks
   // both of the above are keyed by starting offset
   map<client_t, int> client_held_lock_counts;
   map<client_t, int> client_waiting_lock_counts;
 
-  bool is_waiting(ceph_filelock &fl) {
-    multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
-    while (p != waiting_locks.end()) {
-      if (p->second.start > fl.start)
-       return false;
-      if (p->second.length == fl.length &&
-         p->second.client == fl.client &&
-         p->second.pid == fl.pid &&
-         p->second.pid_namespace == fl.pid_namespace)
-       return true;
-      ++p;
-    }
-    return false;
-  }
-  void remove_waiting(ceph_filelock& fl) {
-    multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
-    while (p != waiting_locks.end()) {
-      if (p->second.start > fl.start)
-       return;
-      if (p->second.length == fl.length &&
-         p->second.client == fl.client &&
-         p->second.pid == fl.pid &&
-         p->second.pid_namespace == fl.pid_namespace) {
-       waiting_locks.erase(p);
-       return;
-      }
-      ++p;
-    }
-  }
+  bool is_waiting(ceph_filelock &fl);
+  void remove_waiting(ceph_filelock& fl);
 
   /*
    * Try to set a new lock. If it's blocked and wait_on_fail is true,
@@ -74,191 +48,17 @@ struct ceph_lock_state_t {
    *
    * Returns true if set, false if not set.
    */
-  bool add_lock(ceph_filelock& new_lock, bool wait_on_fail) {
-    dout(15) << "add_lock " << new_lock << dendl;
-    bool ret = false;
-    list<multimap<uint64_t, ceph_filelock>::iterator>
-      overlapping_locks, self_overlapping_locks, neighbor_locks;
-
-    // first, get any overlapping locks and split them into owned-by-us and not
-    if (get_overlapping_locks(new_lock, overlapping_locks, &neighbor_locks)) {
-      dout(15) << "got overlapping lock, splitting by owner" << dendl;
-      split_by_owner(new_lock, overlapping_locks, self_overlapping_locks);
-    }
-    if (!overlapping_locks.empty()) { //overlapping locks owned by others :(
-      if (CEPH_LOCK_EXCL == new_lock.type) {
-       //can't set, we want an exclusive
-       dout(15) << "overlapping lock, and this lock is exclusive, can't set"
-               << dendl;
-       if (wait_on_fail) {
-         waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
-       }
-      } else { //shared lock, check for any exclusive locks blocking us
-       if (contains_exclusive_lock(overlapping_locks)) { //blocked :(
-         dout(15) << " blocked by exclusive lock in overlapping_locks" << dendl;
-         if (wait_on_fail) {
-           waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
-         }
-       } else {
-         //yay, we can insert a shared lock
-         dout(15) << "inserting shared lock" << dendl;
-         adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
-         held_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
-         ret = true;
-       }
-      }
-    } else { //no overlapping locks except our own
-      adjust_locks(self_overlapping_locks, new_lock, neighbor_locks);
-      dout(15) << "no conflicts, inserting " << new_lock << dendl;
-      held_locks.insert(pair<uint64_t, ceph_filelock>
-                       (new_lock.start, new_lock));
-      ret = true;
-    }
-    if (ret)
-      ++client_held_lock_counts[(client_t)new_lock.client];
-    else if (wait_on_fail)
-      ++client_waiting_lock_counts[(client_t)new_lock.client];
-    return ret;
-  }
-
-  void look_for_lock(ceph_filelock& testing_lock) {
-    list<multimap<uint64_t, ceph_filelock>::iterator> overlapping_locks,
-      self_overlapping_locks;
-    if (get_overlapping_locks(testing_lock, overlapping_locks)) {
-      split_by_owner(testing_lock, overlapping_locks, self_overlapping_locks);
-    }
-    if (!overlapping_locks.empty()) { //somebody else owns overlapping lock
-      if (CEPH_LOCK_EXCL == testing_lock.type) { //any lock blocks it
-       testing_lock = (*overlapping_locks.begin())->second;
-      } else {
-       ceph_filelock *blocking_lock;
-       if ((blocking_lock = contains_exclusive_lock(overlapping_locks))) {
-         testing_lock = *blocking_lock;
-       } else { //nothing blocking!
-         testing_lock.type = CEPH_LOCK_UNLOCK;
-       }
-      }
-      return;
-    }
-    //if we get here, only our own locks block
-    testing_lock.type = CEPH_LOCK_UNLOCK;
-  }
+  bool add_lock(ceph_filelock& new_lock, bool wait_on_fail);
+  void look_for_lock(ceph_filelock& testing_lock);
 
   /*
    * Remove lock(s) described in old_lock. This may involve splitting a
    * previous lock or making a previous lock smaller.
    */
   void remove_lock(ceph_filelock removal_lock,
-                  list<ceph_filelock>& activated_locks) {
-    list<multimap<uint64_t, ceph_filelock>::iterator> overlapping_locks,
-      self_overlapping_locks, crossed_waiting_locks;
-    if (get_overlapping_locks(removal_lock, overlapping_locks)) {
-      dout(15) << "splitting by owner" << dendl;
-      split_by_owner(removal_lock, overlapping_locks, self_overlapping_locks);
-    } else dout(15) << "attempt to remove lock at " << removal_lock.start
-                  << " but no locks there!" << dendl;
-    bool remove_to_end = (0 == removal_lock.length);
-    bool old_lock_to_end;
-    uint64_t removal_start = removal_lock.start;
-    uint64_t removal_end = removal_start + removal_lock.length - 1;
-    uint64_t old_lock_end;
-    __s64 old_lock_client = 0;
-    ceph_filelock *old_lock;
-
-    dout(15) << "examining " << self_overlapping_locks.size()
-           << " self-overlapping locks for removal" << dendl;
-    for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
-          iter = self_overlapping_locks.begin();
-        iter != self_overlapping_locks.end();
-        ++iter) {
-      dout(15) << "self overlapping lock " << (*iter)->second << dendl;
-      old_lock = &(*iter)->second;
-      old_lock_to_end = (0 == old_lock->length);
-      old_lock_end = old_lock->start + old_lock->length - 1;
-      old_lock_client = old_lock->client;
-      if (remove_to_end) {
-       if (old_lock->start < removal_start) {
-         old_lock->length = removal_start - old_lock->start;
-       } else {
-         dout(15) << "erasing " << (*iter)->second << dendl;
-         held_locks.erase(*iter);
-         --client_held_lock_counts[old_lock_client];
-       }
-      } else if (old_lock_to_end) {
-       ceph_filelock append_lock = *old_lock;
-       append_lock.start = removal_end+1;
-       held_locks.insert(pair<uint64_t, ceph_filelock>
-                         (append_lock.start, append_lock));
-       ++client_held_lock_counts[(client_t)old_lock->client];
-       if (old_lock->start >= removal_start) {
-         dout(15) << "erasing " << (*iter)->second << dendl;
-         held_locks.erase(*iter);
-         --client_held_lock_counts[old_lock_client];
-       } else old_lock->length = removal_start - old_lock->start;
-      } else {
-       if (old_lock_end  > removal_end) {
-         ceph_filelock append_lock = *old_lock;
-         append_lock.start = removal_end + 1;
-         append_lock.length = old_lock_end - append_lock.start + 1;
-         held_locks.insert(pair<uint64_t, ceph_filelock>
-                           (append_lock.start, append_lock));
-         ++client_held_lock_counts[(client_t)old_lock->client];
-       }
-       if (old_lock->start < removal_start) {
-         old_lock->length = removal_start - old_lock->start;
-       } else {
-         dout(15) << "erasing " << (*iter)->second << dendl;
-         held_locks.erase(*iter);
-         --client_held_lock_counts[old_lock_client];
-       }
-      }
-      if (!client_held_lock_counts[old_lock_client]) {
-       client_held_lock_counts.erase(old_lock_client);
-      }
-    }
-
-    /* okay, we've removed the locks, but removing them might allow some
-     * other waiting locks to come through */
-    if (get_waiting_overlaps(removal_lock, crossed_waiting_locks)) {
-      /*let's do this the SUPER lazy way for now. Should work out something
-       that's slightly less slow and wasteful, though.
-       1) Remove lock from waiting_locks.
-       2) attempt to insert lock via add_lock
-       3) Add to success list if we get back "true"
-
-       In the future, should probably set this up to detect some
-       guaranteed blocks and do fewer map lookups.
-       */
-      for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
-            iter = crossed_waiting_locks.begin();
-          iter != crossed_waiting_locks.end();
-          ++iter) {
-       ceph_filelock cur_lock = (*iter)->second;
-       waiting_locks.erase(*iter);
-       --client_waiting_lock_counts[(client_t)cur_lock.client];
-       if (!client_waiting_lock_counts[(client_t)cur_lock.client]) {
-         client_waiting_lock_counts.erase((client_t)cur_lock.client);
-       }
-       if (add_lock(cur_lock, true))
-         activated_locks.push_back(cur_lock);
-      }
-    }
-  }
-
-  bool remove_all_from (client_t client) {
-    bool cleared_any = false;
-    if (client_held_lock_counts.count(client)) {
-      remove_all_from(client, held_locks);
-      client_held_lock_counts.erase(client);
-      cleared_any = true;
-    }
-    if (client_waiting_lock_counts.count(client)) {
-      remove_all_from(client, waiting_locks);
-      client_waiting_lock_counts.erase(client);
-    }
-    return cleared_any;
-  }
+                   list<ceph_filelock>& activated_locks);
 
+  bool remove_all_from(client_t client);
 private:
   /**
    * Adjust old locks owned by a single process so that process can set
@@ -280,172 +80,22 @@ private:
    *    left or right side.
    */
   void adjust_locks(list<multimap<uint64_t, ceph_filelock>::iterator> old_locks,
-                   ceph_filelock& new_lock,
-                   list<multimap<uint64_t, ceph_filelock>::iterator>
-                   neighbor_locks) {
-    dout(15) << "adjust_locks" << dendl;
-    bool new_lock_to_end = (0 == new_lock.length);
-    bool old_lock_to_end;
-    uint64_t new_lock_start = new_lock.start;
-    uint64_t new_lock_end = new_lock.start + new_lock.length - 1;
-    uint64_t old_lock_start, old_lock_end;
-    __s64 old_lock_client = 0;
-    ceph_filelock *old_lock;
-    for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
-          iter = old_locks.begin();
-        iter != old_locks.end();
-        ++iter) {
-      old_lock = &(*iter)->second;
-      dout(15) << "adjusting lock: " << *old_lock << dendl;
-      old_lock_to_end = (0 == old_lock->length);
-      old_lock_start = old_lock->start;
-      old_lock_end = old_lock->start + old_lock->length - 1;
-      new_lock_start = new_lock.start;
-      new_lock_end = new_lock.start + new_lock.length - 1;
-      old_lock_client = old_lock->client;
-      if (new_lock_to_end || old_lock_to_end) {
-       //special code path to deal with a length set at 0
-       dout(15) << "one lock extends forever" << dendl;
-       if (old_lock->type == new_lock.type) {
-         //just unify them in new lock, remove old lock
-         dout(15) << "same lock type, unifying" << dendl;
-         new_lock.start = (new_lock_start < old_lock_start) ? new_lock_start :
-           old_lock_start;
-         new_lock.length = 0;
-         held_locks.erase(*iter);
-         --client_held_lock_counts[old_lock_client];
-       } else { //not same type, have to keep any remains of old lock around
-         dout(15) << "shrinking old lock" << dendl;
-         if (new_lock_to_end) {
-           if (old_lock_start < new_lock_start) {
-             old_lock->length = new_lock_start - old_lock_start;
-           } else {
-             held_locks.erase(*iter);
-             --client_held_lock_counts[old_lock_client];
-           }
-         } else { //old lock extends past end of new lock
-           ceph_filelock appended_lock = *old_lock;
-           appended_lock.start = new_lock_end + 1;
-           held_locks.insert(pair<uint64_t, ceph_filelock>
-                             (appended_lock.start, appended_lock));
-           ++client_held_lock_counts[(client_t)old_lock->client];
-           if (old_lock_start < new_lock_start) {
-             old_lock->length = new_lock_start - old_lock_start;
-           } else {
-             held_locks.erase(*iter);
-             --client_held_lock_counts[old_lock_client];
-           }
-         }
-       }
-      } else {
-       if (old_lock->type == new_lock.type) { //just merge them!
-         dout(15) << "merging locks, they're the same type" << dendl;
-         new_lock.start = (old_lock_start < new_lock_start ) ? old_lock_start :
-           new_lock_start;
-         int new_end = (new_lock_end > old_lock_end) ? new_lock_end :
-           old_lock_end;
-         new_lock.length = new_end - new_lock.start + 1;
-         dout(15) << "erasing lock " << (*iter)->second << dendl;
-         held_locks.erase(*iter);
-         --client_held_lock_counts[old_lock_client];
-       } else { //we'll have to update sizes and maybe make new locks
-         dout(15) << "locks aren't same type, changing sizes" << dendl;
-         if (old_lock_end > new_lock_end) { //add extra lock after new_lock
-           ceph_filelock appended_lock = *old_lock;
-           appended_lock.start = new_lock_end + 1;
-           appended_lock.length = old_lock_end - appended_lock.start + 1;
-           held_locks.insert(pair<uint64_t, ceph_filelock>
-                             (appended_lock.start, appended_lock));
-           ++client_held_lock_counts[(client_t)old_lock->client];
-         }
-         if (old_lock_start < new_lock_start) {
-           old_lock->length = new_lock_start - old_lock_start;
-         } else { //old_lock starts inside new_lock, so remove it
-           //if it extended past new_lock_end it's been replaced
-           held_locks.erase(*iter);
-           --client_held_lock_counts[old_lock_client];
-         }
-       }
-      }
-      if (!client_held_lock_counts[old_lock_client]) {
-       client_held_lock_counts.erase(old_lock_client);
-      }
-    }
-
-    //make sure to coalesce neighboring locks
-    for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
-          iter = neighbor_locks.begin();
-        iter != neighbor_locks.end();
-        ++iter) {
-      old_lock = &(*iter)->second;
-      old_lock_client = old_lock->client;
-      dout(15) << "lock to coalesce: " << *old_lock << dendl;
-      /* because if it's a neibhoring lock there can't be any self-overlapping
-        locks that covered it */
-      if (old_lock->type == new_lock.type) { //merge them
-       if (0 == new_lock.length) {
-         if (old_lock->start + old_lock->length == new_lock.start) {
-           new_lock.start = old_lock->start;
-         } else assert(0); /* if there's no end to new_lock, the neighbor
-                              HAS TO be to left side */
-       } else if (0 == old_lock->length) {
-         if (new_lock.start + new_lock.length == old_lock->start) {
-           new_lock.length = 0;
-         } else assert(0); //same as before, but reversed
-       } else {
-         if (old_lock->start + old_lock->length == new_lock.start) {
-           new_lock.start = old_lock->start;
-           new_lock.length = old_lock->length + new_lock.length;
-         } else if (new_lock.start + new_lock.length == old_lock->start) {
-           new_lock.length = old_lock->length + new_lock.length;
-         }
-       }
-       held_locks.erase(*iter);
-       --client_held_lock_counts[old_lock_client];
-      }
-      if (!client_held_lock_counts[old_lock_client]) {
-       client_held_lock_counts.erase(old_lock_client);
-      }
-    }
-  }
+                    ceph_filelock& new_lock,
+                    list<multimap<uint64_t, ceph_filelock>::iterator>
+                      neighbor_locks);
 
   //this won't reset the counter map value, do that yourself
-  void remove_all_from(client_t client, multimap<uint64_t, ceph_filelock>& locks) {
-    multimap<uint64_t, ceph_filelock>::iterator iter = locks.begin();
-    while (iter != locks.end()) {
-      if ((client_t)iter->second.client == client) {
-       locks.erase(iter++);
-      } else ++iter;
-    }
-  }
+  void remove_all_from(client_t client,
+                       multimap<uint64_t, ceph_filelock>& locks);
 
   //get last lock prior to start position
   multimap<uint64_t, ceph_filelock>::iterator
-  get_lower_bound(uint64_t start, multimap<uint64_t, ceph_filelock>& lock_map) {
-    multimap<uint64_t, ceph_filelock>::iterator lower_bound =
-      lock_map.lower_bound(start);
-    if ((lower_bound->first != start)
-       && (start != 0)
-       && (lower_bound != lock_map.begin())) --lower_bound;
-    if (lock_map.end() == lower_bound)
-      dout(15) << "get_lower_dout(15)eturning end()" << dendl;
-    else dout(15) << "get_lower_bound returning iterator pointing to "
-                << lower_bound->second << dendl;
-    return lower_bound;
-  }
-
+  get_lower_bound(uint64_t start,
+                  multimap<uint64_t, ceph_filelock>& lock_map);
   //get latest-starting lock that goes over the byte "end"
   multimap<uint64_t, ceph_filelock>::iterator
-  get_last_before(uint64_t end, multimap<uint64_t, ceph_filelock>& lock_map) {
-    multimap<uint64_t, ceph_filelock>::iterator last =
-      lock_map.upper_bound(end);
-    if (last != lock_map.begin()) --last;
-    if (lock_map.end() == last)
-      dout(15) << "get_last_before returning end()" << dendl;
-    else dout(15) << "get_last_before returning iterator pointing to "
-                << last->second << dendl;
-    return last;
-  }
+  get_last_before(uint64_t end,
+                  multimap<uint64_t, ceph_filelock>& lock_map);
 
   /*
    * See if an iterator's lock covers any of the same bounds as a given range
@@ -454,25 +104,18 @@ private:
    * If the length is 0, the lock covers from "start" to the end of the file.
    */
   bool share_space(multimap<uint64_t, ceph_filelock>::iterator& iter,
-                  uint64_t start, uint64_t end) {
-    bool ret = ((iter->first >= start && iter->first <= end) ||
-               ((iter->first < start) &&
-                (((iter->first + iter->second.length - 1) >= start) ||
-                 (0 == iter->second.length))));
-    dout(15) << "share_space got start: " << start << ", end: " << end
-           << ", lock: " << iter->second << ", returning " << ret << dendl;
-    return ret;
-  }
+                  uint64_t start, uint64_t end);
+  
   bool share_space(multimap<uint64_t, ceph_filelock>::iterator& iter,
-                  ceph_filelock& lock) {
+                   ceph_filelock &lock) {
     uint64_t end = lock.start;
-    if (lock.length)
+    if (lock.length) {
       end += lock.length - 1;
-    else // zero length means to end-of-file
+    } else { // zero length means end of file
       end = uint64_t(-1);
+    }
     return share_space(iter, lock.start, end);
   }
-  
   /*
    *get a list of all locks overlapping with the given lock's range
    * lock: the lock to compare with.
@@ -480,47 +123,11 @@ private:
    * Returns: true if at least one lock overlaps.
    */
   bool get_overlapping_locks(ceph_filelock& lock,
-                            list<multimap<uint64_t, ceph_filelock>::iterator> & overlaps,
-                            list<multimap<uint64_t, ceph_filelock>::iterator> *self_neighbors) {
-    dout(15) << "get_overlapping_locks" << dendl;
-    // create a lock starting one earlier and ending one later
-    // to check for neighbors
-    ceph_filelock neighbor_check_lock = lock;
-    if (neighbor_check_lock.start != 0) {
-      neighbor_check_lock.start = neighbor_check_lock.start - 1;
-      if (neighbor_check_lock.length)
-        neighbor_check_lock.length = neighbor_check_lock.length + 2;
-    } else {
-      if (neighbor_check_lock.length)
-        neighbor_check_lock.length = neighbor_check_lock.length + 1;
-    }
-    //find the last held lock starting at the point after lock
-    uint64_t endpoint = lock.start;
-    if (lock.length) {
-      endpoint += lock.length;
-    } else {
-      endpoint = uint64_t(-1); // max offset
-    }
-    multimap<uint64_t, ceph_filelock>::iterator iter =
-      get_last_before(endpoint, held_locks);
-    bool cont = iter != held_locks.end();
-    while(cont) {
-      if (share_space(iter, lock)) {
-       overlaps.push_front(iter);
-      } else if (self_neighbors &&
-                (neighbor_check_lock.client == iter->second.client) &&
-                (neighbor_check_lock.pid == iter->second.pid) &&
-                share_space(iter, neighbor_check_lock)) {
-       self_neighbors->push_front(iter);
-      }
-      if ((iter->first < lock.start) && (CEPH_LOCK_EXCL == iter->second.type)) {
-       //can't be any more overlapping locks or they'd interfere with this one
-       cont = false;
-      } else if (held_locks.begin() == iter) cont = false;
-      else --iter;
-    }
-    return !overlaps.empty();
-  }
+                             list<multimap<uint64_t,
+                                 ceph_filelock>::iterator> & overlaps,
+                             list<multimap<uint64_t,
+                                 ceph_filelock>::iterator> *self_neighbors);
+
   
   bool get_overlapping_locks(ceph_filelock& lock,
                             list<multimap<uint64_t, ceph_filelock>::iterator>& overlaps) {
@@ -534,20 +141,8 @@ private:
    * Returns: true if at least one waiting_lock overlaps
    */
   bool get_waiting_overlaps(ceph_filelock& lock,
-                           list<multimap<uint64_t, ceph_filelock>::iterator>&
-                           overlaps) {
-    dout(15) << "get_waiting_overlaps" << dendl;
-    multimap<uint64_t, ceph_filelock>::iterator iter =
-      get_last_before(lock.start + lock.length - 1, waiting_locks);
-    bool cont = iter != waiting_locks.end();
-    while(cont) {
-      if (share_space(iter, lock)) overlaps.push_front(iter);
-      if (waiting_locks.begin() == iter) cont = false;
-      --iter;
-    }
-    return !overlaps.empty();
-  }
-
+                            list<multimap<uint64_t,
+                                ceph_filelock>::iterator>& overlaps);
   /*
    * split a list of locks up by whether they're owned by same
    * process as given lock
@@ -557,38 +152,13 @@ private:
    * owned_locks: an empty list, to be filled with the locks owned by owner
    */
   void split_by_owner(ceph_filelock& owner,
-                     list<multimap<uint64_t, ceph_filelock>::iterator> & locks,
-                     list<multimap<uint64_t, ceph_filelock>::iterator> & owned_locks) {
-    list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
-      iter = locks.begin();
-    dout(15) << "owner lock: " << owner << dendl;
-    while (iter != locks.end()) {
-      dout(15) << "comparing to " << (*iter)->second << dendl;
-      if ((*iter)->second.client == owner.client &&
-         (*iter)->second.pid_namespace == owner.pid_namespace &&
-         (*iter)->second.pid == owner.pid) {
-       dout(15) << "success, pushing to owned_locks" << dendl;
-       owned_locks.push_back(*iter);
-       iter = locks.erase(iter);
-      } else {
-       dout(15) << "failure, something not equal in this group "
-               << (*iter)->second.client << ":" << owner.client << ","
-               << (*iter)->second.pid_namespace << ":" << owner.pid_namespace
-               << "," << (*iter)->second.pid << ":" << owner.pid << dendl;
-       ++iter;
-      }
-    }
-  }
+                     list<multimap<uint64_t,
+                         ceph_filelock>::iterator> & locks,
+                     list<multimap<uint64_t,
+                         ceph_filelock>::iterator> & owned_locks);
 
-  ceph_filelock *contains_exclusive_lock(list<multimap<uint64_t, ceph_filelock>::iterator>& locks) {
-    for (list<multimap<uint64_t, ceph_filelock>::iterator>::iterator
-          iter = locks.begin();
-        iter != locks.end();
-        ++iter) {
-      if (CEPH_LOCK_EXCL == (*iter)->second.type) return &(*iter)->second;
-    }
-    return NULL;
-  }
+  ceph_filelock *contains_exclusive_lock(list<multimap<uint64_t,
+                                         ceph_filelock>::iterator>& locks);
 
 public:
   void encode(bufferlist& bl) const {