From 5692f7f55ee44e0ab5a576909845549201d6c986 Mon Sep 17 00:00:00 2001 From: Leonid Usov Date: Thu, 16 May 2024 17:11:19 +0300 Subject: [PATCH] mds/quiesce: drop remote authpins before waiting for the quiesce lock Signed-off-by: Leonid Usov Fixes: https://tracker.ceph.com/issues/65802 --- src/mds/Locker.cc | 72 +++++++++++++++++++++++++++++++++++++++++++++- src/mds/Locker.h | 4 +++ src/mds/MDCache.cc | 70 +------------------------------------------- src/mds/MDCache.h | 3 -- src/mds/Server.cc | 10 +++++-- 5 files changed, 84 insertions(+), 75 deletions(-) diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index ef57eaf583064..02115453027d6 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -659,10 +659,80 @@ void Locker::handle_quiesce_failure(const MDRequestRef& mdr, std::string_view& m { dout(10) << " failed to acquire quiesce lock; dropping all locks" << dendl; marker = "failed to acquire quiesce lock"sv; - drop_locks(mdr.get(), NULL); + request_drop_locks(mdr); mdr->drop_local_auth_pins(); } +void Locker::request_drop_remote_locks(const MDRequestRef& mdr) +{ + if (!mdr->has_more()) + return; + + // clean up peers + // (will implicitly drop remote dn pins) + for (set::iterator p = mdr->more()->peers.begin(); + p != mdr->more()->peers.end(); + ++p) { + auto r = make_message(mdr->reqid, mdr->attempt, + MMDSPeerRequest::OP_FINISH); + + if (mdr->killed && !mdr->committing) { + r->mark_abort(); + } else if (mdr->more()->srcdn_auth_mds == *p && + mdr->more()->inode_import.length() > 0) { + // information about rename imported caps + r->inode_export = std::move(mdr->more()->inode_import); + } + + mds->send_message_mds(r, *p); + } + + /* strip foreign xlocks out of lock lists, since the OP_FINISH drops them + * implicitly. Note that we don't call the finishers -- there shouldn't + * be any on a remote lock and the request finish wakes up all + * the waiters anyway! */ + + for (auto it = mdr->locks.begin(); it != mdr->locks.end(); ) { + SimpleLock *lock = it->lock; + if (!lock->is_locallock() && it->is_xlock() && !lock->get_parent()->is_auth()) { + dout(10) << "request_drop_remote_locks forgetting lock " << *lock + << " on " << lock->get_parent() << dendl; + lock->put_xlock(); + mdr->locks.erase(it++); + } else if (it->is_remote_wrlock()) { + dout(10) << "request_drop_remote_locks forgetting remote_wrlock " << *lock + << " on mds." << it->wrlock_target << " on " << *lock->get_parent() << dendl; + if (it->is_wrlock()) { + it->clear_remote_wrlock(); + ++it; + } else { + mdr->locks.erase(it++); + } + } else { + ++it; + } + } + + mdr->more()->peers.clear(); /* we no longer have requests out to them, and + * leaving them in can cause double-notifies as + * this function can get called more than once */ +} + +void Locker::request_drop_non_rdlocks(const MDRequestRef& mdr) +{ + // NB: request_drop_remote_locks must be called before the local drop locks + // Otherwise, OP_FINISH won't be called and the authpins will stay on the remote objects + request_drop_remote_locks(mdr); + drop_non_rdlocks(mdr.get()); +} + +void Locker::request_drop_locks(const MDRequestRef& mdr) +{ + // NB: request_drop_remote_locks must be called before the local drop locks + // Otherwise, OP_FINISH won't be called and the authpins will stay on the remote objects + request_drop_remote_locks(mdr); + drop_locks(mdr.get()); +} void Locker::notify_freeze_waiter(MDSCacheObject *o) { diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 0a500f09be15b..a1ea4a260989c 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -70,6 +70,10 @@ public: void drop_lock(MutationImpl* mut, SimpleLock* what); void drop_locks_for_fragment_unfreeze(MutationImpl *mut); + void request_drop_remote_locks(const MDRequestRef& mdr); + void request_drop_non_rdlocks(const MDRequestRef& r); + void request_drop_locks(const MDRequestRef& r); + int get_cap_bit_for_lock_cache(int op); void create_lock_cache(const MDRequestRef& mdr, CInode *diri, file_layout_t *dir_layout=nullptr); bool find_and_attach_lock_cache(const MDRequestRef& mdr, CInode *diri); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 11f7d7ab347fc..554616c333799 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -9893,74 +9893,6 @@ void MDCache::dispatch_request(const MDRequestRef& mdr) } } - -void MDCache::request_drop_foreign_locks(const MDRequestRef& mdr) -{ - if (!mdr->has_more()) - return; - - // clean up peers - // (will implicitly drop remote dn pins) - for (set::iterator p = mdr->more()->peers.begin(); - p != mdr->more()->peers.end(); - ++p) { - auto r = make_message(mdr->reqid, mdr->attempt, - MMDSPeerRequest::OP_FINISH); - - if (mdr->killed && !mdr->committing) { - r->mark_abort(); - } else if (mdr->more()->srcdn_auth_mds == *p && - mdr->more()->inode_import.length() > 0) { - // information about rename imported caps - r->inode_export = std::move(mdr->more()->inode_import); - } - - mds->send_message_mds(r, *p); - } - - /* strip foreign xlocks out of lock lists, since the OP_FINISH drops them - * implicitly. Note that we don't call the finishers -- there shouldn't - * be any on a remote lock and the request finish wakes up all - * the waiters anyway! */ - - for (auto it = mdr->locks.begin(); it != mdr->locks.end(); ) { - SimpleLock *lock = it->lock; - if (!lock->is_locallock() && it->is_xlock() && !lock->get_parent()->is_auth()) { - dout(10) << "request_drop_foreign_locks forgetting lock " << *lock - << " on " << lock->get_parent() << dendl; - lock->put_xlock(); - mdr->locks.erase(it++); - } else if (it->is_remote_wrlock()) { - dout(10) << "request_drop_foreign_locks forgetting remote_wrlock " << *lock - << " on mds." << it->wrlock_target << " on " << *lock->get_parent() << dendl; - if (it->is_wrlock()) { - it->clear_remote_wrlock(); - ++it; - } else { - mdr->locks.erase(it++); - } - } else { - ++it; - } - } - - mdr->more()->peers.clear(); /* we no longer have requests out to them, and - * leaving them in can cause double-notifies as - * this function can get called more than once */ -} - -void MDCache::request_drop_non_rdlocks(const MDRequestRef& mdr) -{ - request_drop_foreign_locks(mdr); - mds->locker->drop_non_rdlocks(mdr.get()); -} - -void MDCache::request_drop_locks(const MDRequestRef& mdr) -{ - request_drop_foreign_locks(mdr); - mds->locker->drop_locks(mdr.get()); -} - void MDCache::request_cleanup(const MDRequestRef& mdr) { dout(15) << "request_cleanup " << *mdr << dendl; @@ -10002,7 +9934,7 @@ void MDCache::request_cleanup(const MDRequestRef& mdr) break; } - request_drop_locks(mdr); + mds->locker->request_drop_locks(mdr); // drop (local) auth pins mdr->drop_local_auth_pins(); diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 87702a72b8d72..feda66729970d 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -426,9 +426,6 @@ class MDCache { void request_finish(const MDRequestRef& mdr); void request_forward(const MDRequestRef& mdr, mds_rank_t mds, int port=0); void dispatch_request(const MDRequestRef& mdr); - void request_drop_foreign_locks(const MDRequestRef& mdr); - void request_drop_non_rdlocks(const MDRequestRef& r); - void request_drop_locks(const MDRequestRef& r); void request_cleanup(const MDRequestRef& r); void request_kill(const MDRequestRef& r); // called when session closes diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 8961e9b128af7..681663cb65276 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2337,7 +2337,7 @@ void Server::reply_client_request(const MDRequestRef& mdr, const ref_trequest_drop_non_rdlocks(mdr); + mds->locker->request_drop_non_rdlocks(mdr); // reply at all? if (session && !client_inst.name.is_mds()) { @@ -3098,7 +3098,13 @@ void Server::dispatch_peer_request(const MDRequestRef& mdr) break; } - // don't add quiescelock, let the peer acquire that lock themselves + // avoid taking the quiesce lock, as we can't communicate a failure to lock it + // Without communicating the failure which would make the peer request drop all locks, + // blocking on quiesce here will create an opportunity for a deadlock + // The current quiesce design shouldn't suffer from this though. The reason quiesce + // will want to take other locks is to prevent issuing unwanted client capabilities, + // but since replicas can't issue capabilities, it should be fine allowing remote locks + // without taking the quiesce lock. if (!mds->locker->acquire_locks(mdr, lov, nullptr, {}, false, true)) return; -- 2.39.5