ceph_lock_state_t *lock_state;
if (lock_type == CEPH_LOCK_FCNTL) {
if (!in->fcntl_locks)
- in->fcntl_locks = new ceph_lock_state_t(cct);
+ in->fcntl_locks = new ceph_lock_state_t(cct, CEPH_LOCK_FCNTL);
lock_state = in->fcntl_locks;
} else if (lock_type == CEPH_LOCK_FLOCK) {
if (!in->flock_locks)
- in->flock_locks = new ceph_lock_state_t(cct);
+ in->flock_locks = new ceph_lock_state_t(cct, CEPH_LOCK_FLOCK);
lock_state = in->flock_locks;
} else {
assert(0);
if (fh) {
if (lock_type == CEPH_LOCK_FCNTL) {
if (!fh->fcntl_locks)
- fh->fcntl_locks = new ceph_lock_state_t(cct);
+ fh->fcntl_locks = new ceph_lock_state_t(cct, CEPH_LOCK_FCNTL);
lock_state = fh->fcntl_locks;
} else {
if (!fh->flock_locks)
- fh->flock_locks = new ceph_lock_state_t(cct);
+ fh->flock_locks = new ceph_lock_state_t(cct, CEPH_LOCK_FLOCK);
lock_state = fh->flock_locks;
}
_update_lock_state(fl, owner, lock_state);
list<ceph_filelock> activated_locks;
lock_state->remove_lock(filelock, activated_locks);
} else {
- bool r = lock_state->add_lock(filelock, false, false);
+ bool r = lock_state->add_lock(filelock, false, false, NULL);
assert(r);
}
}
ceph_lock_state_t *get_fcntl_lock_state() {
if (!fcntl_locks)
- fcntl_locks = new ceph_lock_state_t(g_ceph_context);
+ fcntl_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FCNTL);
return fcntl_locks;
}
void clear_fcntl_lock_state() {
}
ceph_lock_state_t *get_flock_lock_state() {
if (!flock_locks)
- flock_locks = new ceph_lock_state_t(g_ceph_context);
+ flock_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FLOCK);
return flock_locks;
}
void clear_flock_lock_state() {
respond_to_request(mdr, 0);
} else {
dout(10) << " lock attempt on " << set_lock << dendl;
+ bool deadlock = false;
if (mdr->more()->flock_was_waiting &&
!lock_state->is_waiting(set_lock)) {
dout(10) << " was waiting for lock but not anymore, must have been canceled " << set_lock << dendl;
respond_to_request(mdr, -EINTR);
- } else if (!lock_state->add_lock(set_lock, will_wait, mdr->more()->flock_was_waiting)) {
+ } else if (!lock_state->add_lock(set_lock, will_wait, mdr->more()->flock_was_waiting, &deadlock)) {
dout(10) << " it failed on this attempt" << dendl;
// couldn't set lock right now
- if (!will_wait) {
+ if (deadlock) {
+ respond_to_request(mdr, -EDEADLK);
+ } else if (!will_wait) {
respond_to_request(mdr, -EWOULDBLOCK);
} else {
dout(10) << " added to waiting list" << dendl;
#define dout_subsys ceph_subsys_mds
-bool ceph_lock_state_t::is_waiting(ceph_filelock &fl)
+static multimap<ceph_filelock, ceph_lock_state_t*> global_waiting_locks;
+
+ceph_lock_state_t::~ceph_lock_state_t()
+{
+ if (type == CEPH_LOCK_FCNTL) {
+ for (auto p = waiting_locks.begin(); p != waiting_locks.end(); ++p) {
+ for (auto q = global_waiting_locks.find(p->second);
+ q != global_waiting_locks.end(); ) {
+ if (q->first != p->second)
+ break;
+ if (q->second == this) {
+ global_waiting_locks.erase(q);
+ break;
+ }
+ ++q;
+ }
+ }
+ }
+}
+
+bool ceph_lock_state_t::is_waiting(const ceph_filelock &fl)
{
multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
while (p != waiting_locks.end()) {
return false;
}
-void ceph_lock_state_t::remove_waiting(ceph_filelock& fl)
+void ceph_lock_state_t::remove_waiting(const ceph_filelock& fl)
{
- multimap<uint64_t, ceph_filelock>::iterator p = waiting_locks.find(fl.start);
- while (p != waiting_locks.end()) {
+ for (auto p = waiting_locks.find(fl.start);
+ p != waiting_locks.end(); ) {
if (p->second.start > fl.start)
- return;
+ break;
if (p->second.length == fl.length &&
ceph_filelock_owner_equal(p->second, fl)) {
waiting_locks.erase(p);
if (!client_waiting_lock_counts[(client_t)fl.client]) {
client_waiting_lock_counts.erase((client_t)fl.client);
}
- return;
+ break;
}
++p;
}
+
+ if (type == CEPH_LOCK_FCNTL) {
+ for (auto q = global_waiting_locks.find(fl);
+ q != global_waiting_locks.end(); ) {
+ if (q->first != fl)
+ break;
+ if (q->second == this) {
+ global_waiting_locks.erase(q);
+ break;
+ }
+ ++q;
+ }
+ }
+}
+
+bool ceph_lock_state_t::is_deadlock(const ceph_filelock& fl,
+ list<multimap<uint64_t, ceph_filelock>::iterator>&
+ overlapping_locks,
+ const ceph_filelock *first_fl, unsigned depth)
+{
+ ldout(cct,15) << "is_deadlock " << fl << dendl;
+
+ // only for posix lock
+ if (type != CEPH_LOCK_FCNTL)
+ return false;
+
+ // find conflict locks' owners
+ set<ceph_filelock> lock_owners;
+ for (auto p = overlapping_locks.begin();
+ p != overlapping_locks.end();
+ ++p) {
+
+ if (fl.type == CEPH_LOCK_SHARED &&
+ (*p)->second.type == CEPH_LOCK_SHARED)
+ continue;
+
+ // circle detected
+ if (first_fl && ceph_filelock_owner_equal(*first_fl, (*p)->second)) {
+ ldout(cct,15) << " detect deadlock" << dendl;
+ return true;
+ }
+
+ ceph_filelock tmp = (*p)->second;
+ tmp.start = 0;
+ tmp.length = 0;
+ tmp.type = 0;
+ lock_owners.insert(tmp);
+ }
+
+ if (depth >= MAX_DEADLK_DEPTH)
+ return false;
+
+ first_fl = first_fl ? first_fl : &fl;
+ for (auto p = lock_owners.begin();
+ p != lock_owners.end();
+ ++p) {
+ ldout(cct,15) << " conflict lock owner " << *p << dendl;
+ // if conflict lock' owner is waiting for other lock?
+ for (auto q = global_waiting_locks.lower_bound(*p);
+ q != global_waiting_locks.end();
+ ++q) {
+ if (!ceph_filelock_owner_equal(q->first, *p))
+ break;
+
+ list<multimap<uint64_t, ceph_filelock>::iterator>
+ _overlapping_locks, _self_overlapping_locks;
+ ceph_lock_state_t& state = *(q->second);
+ if (state.get_overlapping_locks(q->first, _overlapping_locks)) {
+ state.split_by_owner(q->first, _overlapping_locks, _self_overlapping_locks);
+ }
+ if (!_overlapping_locks.empty()) {
+ if (is_deadlock(q->first, _overlapping_locks, first_fl, depth + 1))
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+void ceph_lock_state_t::add_waiting(const ceph_filelock& fl)
+{
+ waiting_locks.insert(pair<uint64_t, ceph_filelock>(fl.start, fl));
+ if (type == CEPH_LOCK_FCNTL) {
+ global_waiting_locks.insert(pair<ceph_filelock,ceph_lock_state_t*>(fl, this));
+ }
}
bool ceph_lock_state_t::add_lock(ceph_filelock& new_lock,
- bool wait_on_fail, bool replay)
+ bool wait_on_fail, bool replay,
+ bool *deadlock)
{
ldout(cct,15) << "add_lock " << new_lock << dendl;
bool ret = false;
ldout(cct,15) << "overlapping lock, and this lock is exclusive, can't set"
<< dendl;
if (wait_on_fail && !replay) {
- waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
+ if (is_deadlock(new_lock, overlapping_locks))
+ *deadlock = true;
+ else
+ add_waiting(new_lock);
}
} else { //shared lock, check for any exclusive locks blocking us
if (contains_exclusive_lock(overlapping_locks)) { //blocked :(
ldout(cct,15) << " blocked by exclusive lock in overlapping_locks" << dendl;
- if (wait_on_fail && !replay) {
- waiting_locks.insert(pair<uint64_t, ceph_filelock>(new_lock.start, new_lock));
- }
+ if (wait_on_fail && !replay) {
+ if (is_deadlock(new_lock, overlapping_locks))
+ *deadlock = true;
+ else
+ add_waiting(new_lock);
+ }
} else {
//yay, we can insert a shared lock
ldout(cct,15) << "inserting shared lock" << dendl;
{
bool cleared_any = false;
if (client_held_lock_counts.count(client)) {
- remove_all_from(client, held_locks);
+ multimap<uint64_t, ceph_filelock>::iterator iter = held_locks.begin();
+ while (iter != held_locks.end()) {
+ if ((client_t)iter->second.client == client) {
+ held_locks.erase(iter++);
+ } else
+ ++iter;
+ }
client_held_lock_counts.erase(client);
cleared_any = true;
}
+
if (client_waiting_lock_counts.count(client)) {
- remove_all_from(client, waiting_locks);
+ multimap<uint64_t, ceph_filelock>::iterator iter = waiting_locks.begin();
+ while (iter != waiting_locks.end()) {
+ if ((client_t)iter->second.client != client) {
+ ++iter;
+ continue;
+ }
+
+ for (auto p = global_waiting_locks.find(iter->second);
+ p != global_waiting_locks.end(); ) {
+ if (p->first != iter->second)
+ break;
+ if (p->second == this) {
+ global_waiting_locks.erase(p);
+ break;
+ }
+ ++p;
+ }
+ waiting_locks.erase(iter++);
+ }
client_waiting_lock_counts.erase(client);
}
return cleared_any;
}
}
-void ceph_lock_state_t::remove_all_from(client_t client,
- multimap<uint64_t,
- ceph_filelock>& locks)
-{
- multimap<uint64_t, ceph_filelock>::iterator iter = locks.begin();
- while (iter != locks.end()) {
- if ((client_t)iter->second.client == client) {
- locks.erase(iter++);
- } else ++iter;
- }
-}
-
multimap<uint64_t, ceph_filelock>::iterator
ceph_lock_state_t::get_lower_bound(uint64_t start,
multimap<uint64_t, ceph_filelock>& lock_map)
return ret;
}
-bool ceph_lock_state_t::get_overlapping_locks(ceph_filelock& lock,
+bool ceph_lock_state_t::get_overlapping_locks(const ceph_filelock& lock,
list<multimap<uint64_t,
ceph_filelock>::iterator> & overlaps,
list<multimap<uint64_t,
return !overlaps.empty();
}
-bool ceph_lock_state_t::get_waiting_overlaps(ceph_filelock& lock,
+bool ceph_lock_state_t::get_waiting_overlaps(const ceph_filelock& lock,
list<multimap<uint64_t,
ceph_filelock>::iterator>&
overlaps)
return !overlaps.empty();
}
-void ceph_lock_state_t::split_by_owner(ceph_filelock& owner,
+void ceph_lock_state_t::split_by_owner(const ceph_filelock& owner,
list<multimap<uint64_t,
ceph_filelock>::iterator>& locks,
list<multimap<uint64_t,
#include "mdstypes.h"
-inline ostream& operator<<(ostream& out, ceph_filelock& l) {
+inline ostream& operator<<(ostream& out, const ceph_filelock& l) {
out << "start: " << l.start << ", length: " << l.length
<< ", client: " << l.client << ", owner: " << l.owner
<< ", pid: " << l.pid << ", type: " << (int)l.type
return out;
}
-inline bool ceph_filelock_owner_equal(ceph_filelock& l, ceph_filelock& r)
+inline bool ceph_filelock_owner_equal(const ceph_filelock& l, const ceph_filelock& r)
{
if (l.client != r.client || l.owner != r.owner)
return false;
return l.pid == r.pid;
}
-inline bool operator==(ceph_filelock& l, ceph_filelock& r) {
- return
- l.length == r.length &&
- l.type == r.type &&
- ceph_filelock_owner_equal(l, r);
+inline int ceph_filelock_owner_compare(const ceph_filelock& l, const ceph_filelock& r)
+{
+ if (l.client != r.client)
+ return l.client > r.client ? 1 : -1;
+ if (l.owner != r.owner)
+ return l.owner > r.owner ? 1 : -1;
+ if (l.owner & (1ULL << 63))
+ return 0;
+ if (l.pid != r.pid)
+ return l.pid > r.pid ? 1 : -1;
+ return 0;
+}
+
+inline int ceph_filelock_compare(const ceph_filelock& l, const ceph_filelock& r)
+{
+ int ret = ceph_filelock_owner_compare(l, r);
+ if (ret)
+ return ret;
+ if (l.start != r.start)
+ return l.start > r.start ? 1 : -1;
+ if (l.length != r.length)
+ return l.length > r.length ? 1 : -1;
+ if (l.type != r.type)
+ return l.type > r.type ? 1 : -1;
+ return 0;
+}
+
+inline bool operator<(const ceph_filelock& l, const ceph_filelock& r)
+{
+ return ceph_filelock_compare(l, r) < 0;
+}
+
+inline bool operator==(const ceph_filelock& l, const ceph_filelock& r) {
+ return ceph_filelock_compare(l, r) == 0;
+}
+
+inline bool operator!=(const ceph_filelock& l, const ceph_filelock& r) {
+ return ceph_filelock_compare(l, r) != 0;
}
class ceph_lock_state_t {
CephContext *cct;
+ int type;
public:
- explicit ceph_lock_state_t(CephContext *cct_) : cct(cct_) {}
+ explicit ceph_lock_state_t(CephContext *cct_, int type_) : cct(cct_), type(type_) {}
+ ~ceph_lock_state_t();
multimap<uint64_t, ceph_filelock> held_locks; // current locks
multimap<uint64_t, ceph_filelock> waiting_locks; // locks waiting for other locks
// both of the above are keyed by starting offset
* @param fl The filelock to check for
* @returns True if the lock is waiting, false otherwise
*/
- bool is_waiting(ceph_filelock &fl);
+ bool is_waiting(const ceph_filelock &fl);
/**
* Remove a lock from the waiting_locks list
*
* @param fl The filelock to remove
*/
- void remove_waiting(ceph_filelock& fl);
-
+ void remove_waiting(const ceph_filelock& fl);
/*
* Try to set a new lock. If it's blocked and wait_on_fail is true,
* add the lock to waiting_locks.
*
* @returns true if set, false if not set.
*/
- bool add_lock(ceph_filelock& new_lock, bool wait_on_fail, bool replay);
+ bool add_lock(ceph_filelock& new_lock, bool wait_on_fail, bool replay,
+ bool *deadlock);
/**
* See if a lock is blocked by existing locks. If the lock is blocked,
* it will be set to the value of the first blocking lock. Otherwise,
* @param removal_lock The lock to remove
* @param activated_locks A return parameter, holding activated wait locks.
*/
- void remove_lock(ceph_filelock removal_lock,
+ void remove_lock(const ceph_filelock removal_lock,
list<ceph_filelock>& activated_locks);
bool remove_all_from(client_t client);
private:
+ static const unsigned MAX_DEADLK_DEPTH = 5;
+
+ /**
+ * Check if adding the lock causes deadlock
+ *
+ * @param fl The blocking filelock
+ * @param overlapping_locks list of all overlapping locks
+ * @param first_fl
+ * @depth recursion call depth
+ */
+ bool is_deadlock(const ceph_filelock& fl,
+ list<multimap<uint64_t, ceph_filelock>::iterator>&
+ overlapping_locks,
+ const ceph_filelock *first_fl=NULL, unsigned depth=0);
+
+ /**
+ * Add a lock to the waiting_locks list
+ *
+ * @param fl The filelock to add
+ */
+ void add_waiting(const ceph_filelock& fl);
+
/**
* Adjust old locks owned by a single process so that process can set
* a new lock of different type. Handle any changes needed to the old locks
list<multimap<uint64_t, ceph_filelock>::iterator>
neighbor_locks);
- //this won't reset the counter map value, do that yourself
- void remove_all_from(client_t client,
- multimap<uint64_t, ceph_filelock>& locks);
-
//get last lock prior to start position
multimap<uint64_t, ceph_filelock>::iterator
get_lower_bound(uint64_t start,
uint64_t start, uint64_t end);
bool share_space(multimap<uint64_t, ceph_filelock>::iterator& iter,
- ceph_filelock &lock) {
+ const ceph_filelock &lock) {
uint64_t end = lock.start;
if (lock.length) {
end += lock.length - 1;
* overlaps: an empty list, to be filled.
* Returns: true if at least one lock overlaps.
*/
- bool get_overlapping_locks(ceph_filelock& lock,
+ bool get_overlapping_locks(const ceph_filelock& lock,
list<multimap<uint64_t,
ceph_filelock>::iterator> & overlaps,
list<multimap<uint64_t,
ceph_filelock>::iterator> *self_neighbors);
- bool get_overlapping_locks(ceph_filelock& lock,
+ bool get_overlapping_locks(const ceph_filelock& lock,
list<multimap<uint64_t, ceph_filelock>::iterator>& overlaps) {
return get_overlapping_locks(lock, overlaps, NULL);
}
* overlaps: an empty list, to be filled
* Returns: true if at least one waiting_lock overlaps
*/
- bool get_waiting_overlaps(ceph_filelock& lock,
+ bool get_waiting_overlaps(const ceph_filelock& lock,
list<multimap<uint64_t,
ceph_filelock>::iterator>& overlaps);
/*
* Will have all locks owned by owner removed
* owned_locks: an empty list, to be filled with the locks owned by owner
*/
- void split_by_owner(ceph_filelock& owner,
+ void split_by_owner(const ceph_filelock& owner,
list<multimap<uint64_t,
ceph_filelock>::iterator> & locks,
list<multimap<uint64_t,