From: Yan, Zheng Date: Fri, 19 Feb 2016 10:39:49 +0000 (+0800) Subject: mds: Add posix file lock deadlock detection X-Git-Tag: v10.1.0~75^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=56dd09fc2a35733e0d4e4f0557fe314d3a0cf521;p=ceph.git mds: Add posix file lock deadlock detection Signed-off-by: Yan, Zheng --- diff --git a/src/client/Client.cc b/src/client/Client.cc index c506c4550b6..a998be32cb2 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -8766,11 +8766,11 @@ int Client::_do_filelock(Inode *in, Fh *fh, int lock_type, int op, int sleep, ceph_lock_state_t *lock_state; if (lock_type == CEPH_LOCK_FCNTL) { if (!in->fcntl_locks) - in->fcntl_locks = new ceph_lock_state_t(cct); + in->fcntl_locks = new ceph_lock_state_t(cct, CEPH_LOCK_FCNTL); lock_state = in->fcntl_locks; } else if (lock_type == CEPH_LOCK_FLOCK) { if (!in->flock_locks) - in->flock_locks = new ceph_lock_state_t(cct); + in->flock_locks = new ceph_lock_state_t(cct, CEPH_LOCK_FLOCK); lock_state = in->flock_locks; } else { assert(0); @@ -8781,11 +8781,11 @@ int Client::_do_filelock(Inode *in, Fh *fh, int lock_type, int op, int sleep, if (fh) { if (lock_type == CEPH_LOCK_FCNTL) { if (!fh->fcntl_locks) - fh->fcntl_locks = new ceph_lock_state_t(cct); + fh->fcntl_locks = new ceph_lock_state_t(cct, CEPH_LOCK_FCNTL); lock_state = fh->fcntl_locks; } else { if (!fh->flock_locks) - fh->flock_locks = new ceph_lock_state_t(cct); + fh->flock_locks = new ceph_lock_state_t(cct, CEPH_LOCK_FLOCK); lock_state = fh->flock_locks; } _update_lock_state(fl, owner, lock_state); @@ -8920,7 +8920,7 @@ void Client::_update_lock_state(struct flock *fl, uint64_t owner, list activated_locks; lock_state->remove_lock(filelock, activated_locks); } else { - bool r = lock_state->add_lock(filelock, false, false); + bool r = lock_state->add_lock(filelock, false, false, NULL); assert(r); } } diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 15fef42e99b..8f0b0a12449 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -542,7 +542,7 @@ protected: ceph_lock_state_t *get_fcntl_lock_state() { if (!fcntl_locks) - fcntl_locks = new ceph_lock_state_t(g_ceph_context); + fcntl_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FCNTL); return fcntl_locks; } void clear_fcntl_lock_state() { @@ -551,7 +551,7 @@ protected: } ceph_lock_state_t *get_flock_lock_state() { if (!flock_locks) - flock_locks = new ceph_lock_state_t(g_ceph_context); + flock_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FLOCK); return flock_locks; } void clear_flock_lock_state() { diff --git a/src/mds/Server.cc b/src/mds/Server.cc index b9f5b0c361a..596de2bc4cc 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3531,14 +3531,17 @@ void Server::handle_client_file_setlock(MDRequestRef& mdr) respond_to_request(mdr, 0); } else { dout(10) << " lock attempt on " << set_lock << dendl; + bool deadlock = false; if (mdr->more()->flock_was_waiting && !lock_state->is_waiting(set_lock)) { dout(10) << " was waiting for lock but not anymore, must have been canceled " << set_lock << dendl; respond_to_request(mdr, -EINTR); - } else if (!lock_state->add_lock(set_lock, will_wait, mdr->more()->flock_was_waiting)) { + } else if (!lock_state->add_lock(set_lock, will_wait, mdr->more()->flock_was_waiting, &deadlock)) { dout(10) << " it failed on this attempt" << dendl; // couldn't set lock right now - if (!will_wait) { + if (deadlock) { + respond_to_request(mdr, -EDEADLK); + } else if (!will_wait) { respond_to_request(mdr, -EWOULDBLOCK); } else { dout(10) << " added to waiting list" << dendl; diff --git a/src/mds/flock.cc b/src/mds/flock.cc index e99435e5d19..09557032e07 100644 --- a/src/mds/flock.cc +++ b/src/mds/flock.cc @@ -8,7 +8,27 @@ #define dout_subsys ceph_subsys_mds -bool ceph_lock_state_t::is_waiting(ceph_filelock &fl) +static multimap global_waiting_locks; + +ceph_lock_state_t::~ceph_lock_state_t() +{ + if (type == CEPH_LOCK_FCNTL) { + for (auto p = waiting_locks.begin(); p != waiting_locks.end(); ++p) { + for (auto q = global_waiting_locks.find(p->second); + q != global_waiting_locks.end(); ) { + if (q->first != p->second) + break; + if (q->second == this) { + global_waiting_locks.erase(q); + break; + } + ++q; + } + } + } +} + +bool ceph_lock_state_t::is_waiting(const ceph_filelock &fl) { multimap::iterator p = waiting_locks.find(fl.start); while (p != waiting_locks.end()) { @@ -22,12 +42,12 @@ bool ceph_lock_state_t::is_waiting(ceph_filelock &fl) return false; } -void ceph_lock_state_t::remove_waiting(ceph_filelock& fl) +void ceph_lock_state_t::remove_waiting(const ceph_filelock& fl) { - multimap::iterator p = waiting_locks.find(fl.start); - while (p != waiting_locks.end()) { + for (auto p = waiting_locks.find(fl.start); + p != waiting_locks.end(); ) { if (p->second.start > fl.start) - return; + break; if (p->second.length == fl.length && ceph_filelock_owner_equal(p->second, fl)) { waiting_locks.erase(p); @@ -35,14 +55,100 @@ void ceph_lock_state_t::remove_waiting(ceph_filelock& fl) if (!client_waiting_lock_counts[(client_t)fl.client]) { client_waiting_lock_counts.erase((client_t)fl.client); } - return; + break; } ++p; } + + if (type == CEPH_LOCK_FCNTL) { + for (auto q = global_waiting_locks.find(fl); + q != global_waiting_locks.end(); ) { + if (q->first != fl) + break; + if (q->second == this) { + global_waiting_locks.erase(q); + break; + } + ++q; + } + } +} + +bool ceph_lock_state_t::is_deadlock(const ceph_filelock& fl, + list::iterator>& + overlapping_locks, + const ceph_filelock *first_fl, unsigned depth) +{ + ldout(cct,15) << "is_deadlock " << fl << dendl; + + // only for posix lock + if (type != CEPH_LOCK_FCNTL) + return false; + + // find conflict locks' owners + set lock_owners; + for (auto p = overlapping_locks.begin(); + p != overlapping_locks.end(); + ++p) { + + if (fl.type == CEPH_LOCK_SHARED && + (*p)->second.type == CEPH_LOCK_SHARED) + continue; + + // circle detected + if (first_fl && ceph_filelock_owner_equal(*first_fl, (*p)->second)) { + ldout(cct,15) << " detect deadlock" << dendl; + return true; + } + + ceph_filelock tmp = (*p)->second; + tmp.start = 0; + tmp.length = 0; + tmp.type = 0; + lock_owners.insert(tmp); + } + + if (depth >= MAX_DEADLK_DEPTH) + return false; + + first_fl = first_fl ? first_fl : &fl; + for (auto p = lock_owners.begin(); + p != lock_owners.end(); + ++p) { + ldout(cct,15) << " conflict lock owner " << *p << dendl; + // if conflict lock' owner is waiting for other lock? + for (auto q = global_waiting_locks.lower_bound(*p); + q != global_waiting_locks.end(); + ++q) { + if (!ceph_filelock_owner_equal(q->first, *p)) + break; + + list::iterator> + _overlapping_locks, _self_overlapping_locks; + ceph_lock_state_t& state = *(q->second); + if (state.get_overlapping_locks(q->first, _overlapping_locks)) { + state.split_by_owner(q->first, _overlapping_locks, _self_overlapping_locks); + } + if (!_overlapping_locks.empty()) { + if (is_deadlock(q->first, _overlapping_locks, first_fl, depth + 1)) + return true; + } + } + } + return false; +} + +void ceph_lock_state_t::add_waiting(const ceph_filelock& fl) +{ + waiting_locks.insert(pair(fl.start, fl)); + if (type == CEPH_LOCK_FCNTL) { + global_waiting_locks.insert(pair(fl, this)); + } } bool ceph_lock_state_t::add_lock(ceph_filelock& new_lock, - bool wait_on_fail, bool replay) + bool wait_on_fail, bool replay, + bool *deadlock) { ldout(cct,15) << "add_lock " << new_lock << dendl; bool ret = false; @@ -60,14 +166,20 @@ bool ceph_lock_state_t::add_lock(ceph_filelock& new_lock, ldout(cct,15) << "overlapping lock, and this lock is exclusive, can't set" << dendl; if (wait_on_fail && !replay) { - waiting_locks.insert(pair(new_lock.start, new_lock)); + if (is_deadlock(new_lock, overlapping_locks)) + *deadlock = true; + else + add_waiting(new_lock); } } else { //shared lock, check for any exclusive locks blocking us if (contains_exclusive_lock(overlapping_locks)) { //blocked :( ldout(cct,15) << " blocked by exclusive lock in overlapping_locks" << dendl; - if (wait_on_fail && !replay) { - waiting_locks.insert(pair(new_lock.start, new_lock)); - } + if (wait_on_fail && !replay) { + if (is_deadlock(new_lock, overlapping_locks)) + *deadlock = true; + else + add_waiting(new_lock); + } } else { //yay, we can insert a shared lock ldout(cct,15) << "inserting shared lock" << dendl; @@ -190,12 +302,37 @@ bool ceph_lock_state_t::remove_all_from (client_t client) { bool cleared_any = false; if (client_held_lock_counts.count(client)) { - remove_all_from(client, held_locks); + multimap::iterator iter = held_locks.begin(); + while (iter != held_locks.end()) { + if ((client_t)iter->second.client == client) { + held_locks.erase(iter++); + } else + ++iter; + } client_held_lock_counts.erase(client); cleared_any = true; } + if (client_waiting_lock_counts.count(client)) { - remove_all_from(client, waiting_locks); + multimap::iterator iter = waiting_locks.begin(); + while (iter != waiting_locks.end()) { + if ((client_t)iter->second.client != client) { + ++iter; + continue; + } + + for (auto p = global_waiting_locks.find(iter->second); + p != global_waiting_locks.end(); ) { + if (p->first != iter->second) + break; + if (p->second == this) { + global_waiting_locks.erase(p); + break; + } + ++p; + } + waiting_locks.erase(iter++); + } client_waiting_lock_counts.erase(client); } return cleared_any; @@ -328,18 +465,6 @@ void ceph_lock_state_t::adjust_locks(list::ite } } -void ceph_lock_state_t::remove_all_from(client_t client, - multimap& locks) -{ - multimap::iterator iter = locks.begin(); - while (iter != locks.end()) { - if ((client_t)iter->second.client == client) { - locks.erase(iter++); - } else ++iter; - } -} - multimap::iterator ceph_lock_state_t::get_lower_bound(uint64_t start, multimap& lock_map) @@ -383,7 +508,7 @@ bool ceph_lock_state_t::share_space( return ret; } -bool ceph_lock_state_t::get_overlapping_locks(ceph_filelock& lock, +bool ceph_lock_state_t::get_overlapping_locks(const ceph_filelock& lock, list::iterator> & overlaps, list::iterator>& overlaps) @@ -445,7 +570,7 @@ bool ceph_lock_state_t::get_waiting_overlaps(ceph_filelock& lock, return !overlaps.empty(); } -void ceph_lock_state_t::split_by_owner(ceph_filelock& owner, +void ceph_lock_state_t::split_by_owner(const ceph_filelock& owner, list::iterator>& locks, list r.client ? 1 : -1; + if (l.owner != r.owner) + return l.owner > r.owner ? 1 : -1; + if (l.owner & (1ULL << 63)) + return 0; + if (l.pid != r.pid) + return l.pid > r.pid ? 1 : -1; + return 0; +} + +inline int ceph_filelock_compare(const ceph_filelock& l, const ceph_filelock& r) +{ + int ret = ceph_filelock_owner_compare(l, r); + if (ret) + return ret; + if (l.start != r.start) + return l.start > r.start ? 1 : -1; + if (l.length != r.length) + return l.length > r.length ? 1 : -1; + if (l.type != r.type) + return l.type > r.type ? 1 : -1; + return 0; +} + +inline bool operator<(const ceph_filelock& l, const ceph_filelock& r) +{ + return ceph_filelock_compare(l, r) < 0; +} + +inline bool operator==(const ceph_filelock& l, const ceph_filelock& r) { + return ceph_filelock_compare(l, r) == 0; +} + +inline bool operator!=(const ceph_filelock& l, const ceph_filelock& r) { + return ceph_filelock_compare(l, r) != 0; } class ceph_lock_state_t { CephContext *cct; + int type; public: - explicit ceph_lock_state_t(CephContext *cct_) : cct(cct_) {} + explicit ceph_lock_state_t(CephContext *cct_, int type_) : cct(cct_), type(type_) {} + ~ceph_lock_state_t(); multimap held_locks; // current locks multimap waiting_locks; // locks waiting for other locks // both of the above are keyed by starting offset @@ -52,14 +87,13 @@ public: * @param fl The filelock to check for * @returns True if the lock is waiting, false otherwise */ - bool is_waiting(ceph_filelock &fl); + bool is_waiting(const ceph_filelock &fl); /** * Remove a lock from the waiting_locks list * * @param fl The filelock to remove */ - void remove_waiting(ceph_filelock& fl); - + void remove_waiting(const ceph_filelock& fl); /* * Try to set a new lock. If it's blocked and wait_on_fail is true, * add the lock to waiting_locks. @@ -73,7 +107,8 @@ public: * * @returns true if set, false if not set. */ - bool add_lock(ceph_filelock& new_lock, bool wait_on_fail, bool replay); + bool add_lock(ceph_filelock& new_lock, bool wait_on_fail, bool replay, + bool *deadlock); /** * See if a lock is blocked by existing locks. If the lock is blocked, * it will be set to the value of the first blocking lock. Otherwise, @@ -91,11 +126,33 @@ public: * @param removal_lock The lock to remove * @param activated_locks A return parameter, holding activated wait locks. */ - void remove_lock(ceph_filelock removal_lock, + void remove_lock(const ceph_filelock removal_lock, list& activated_locks); bool remove_all_from(client_t client); private: + static const unsigned MAX_DEADLK_DEPTH = 5; + + /** + * Check if adding the lock causes deadlock + * + * @param fl The blocking filelock + * @param overlapping_locks list of all overlapping locks + * @param first_fl + * @depth recursion call depth + */ + bool is_deadlock(const ceph_filelock& fl, + list::iterator>& + overlapping_locks, + const ceph_filelock *first_fl=NULL, unsigned depth=0); + + /** + * Add a lock to the waiting_locks list + * + * @param fl The filelock to add + */ + void add_waiting(const ceph_filelock& fl); + /** * Adjust old locks owned by a single process so that process can set * a new lock of different type. Handle any changes needed to the old locks @@ -120,10 +177,6 @@ private: list::iterator> neighbor_locks); - //this won't reset the counter map value, do that yourself - void remove_all_from(client_t client, - multimap& locks); - //get last lock prior to start position multimap::iterator get_lower_bound(uint64_t start, @@ -143,7 +196,7 @@ private: uint64_t start, uint64_t end); bool share_space(multimap::iterator& iter, - ceph_filelock &lock) { + const ceph_filelock &lock) { uint64_t end = lock.start; if (lock.length) { end += lock.length - 1; @@ -158,14 +211,14 @@ private: * overlaps: an empty list, to be filled. * Returns: true if at least one lock overlaps. */ - bool get_overlapping_locks(ceph_filelock& lock, + bool get_overlapping_locks(const ceph_filelock& lock, list::iterator> & overlaps, list::iterator> *self_neighbors); - bool get_overlapping_locks(ceph_filelock& lock, + bool get_overlapping_locks(const ceph_filelock& lock, list::iterator>& overlaps) { return get_overlapping_locks(lock, overlaps, NULL); } @@ -176,7 +229,7 @@ private: * overlaps: an empty list, to be filled * Returns: true if at least one waiting_lock overlaps */ - bool get_waiting_overlaps(ceph_filelock& lock, + bool get_waiting_overlaps(const ceph_filelock& lock, list::iterator>& overlaps); /* @@ -187,7 +240,7 @@ private: * Will have all locks owned by owner removed * owned_locks: an empty list, to be filled with the locks owned by owner */ - void split_by_owner(ceph_filelock& owner, + void split_by_owner(const ceph_filelock& owner, list::iterator> & locks, list