From 5a7d4cbc53935bc56c6d7b22816268d2e82246a2 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 22 Jun 2017 08:00:35 +0800 Subject: [PATCH] mds: improve freeze deadlock detection if request waits for unfreeze while holding remote auth pins, deadlock may happen. Signed-off-by: "Yan, Zheng" --- src/mds/Locker.cc | 26 ++++++++++++++++++++++++++ src/mds/Locker.h | 1 + src/mds/MDCache.cc | 5 ++++- src/mds/Server.cc | 23 ++++------------------- 4 files changed, 35 insertions(+), 20 deletions(-) diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 6cb9a73511130..cf795ed225d36 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -365,6 +365,10 @@ bool Locker::acquire_locks(MDRequestRef& mdr, } dout(10) << " can't auth_pin (freezing?), waiting to authpin " << *object << dendl; object->add_waiter(MDSCacheObject::WAIT_UNFREEZE, new C_MDS_RetryRequest(mdcache, mdr)); + + if (!mdr->remote_auth_pins.empty()) + notify_freeze_waiter(object); + return false; } } @@ -615,6 +619,28 @@ bool Locker::acquire_locks(MDRequestRef& mdr, return result; } +void Locker::notify_freeze_waiter(MDSCacheObject *o) +{ + CDir *dir = NULL; + if (CInode *in = dynamic_cast(o)) { + if (!in->is_root()) + dir = in->get_parent_dir(); + } else if (CDentry *dn = dynamic_cast(o)) { + dir = dn->get_dir(); + } else { + dir = dynamic_cast(o); + assert(dir); + } + if (dir) { + if (dir->is_freezing_dir()) + mdcache->fragment_freeze_inc_num_waiters(dir); + if (dir->is_freezing_tree()) { + while (!dir->is_freezing_tree_root()) + dir = dir->get_parent_dir(); + mdcache->migrator->export_freeze_inc_num_waiters(dir); + } + } +} void Locker::set_xlocks_done(MutationImpl *mut, bool skip_dentry) { diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 32b7d63016e4c..8cff4ec3e57b3 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -80,6 +80,7 @@ public: CInode *auth_pin_freeze=NULL, bool auth_pin_nonblock=false); + void notify_freeze_waiter(MDSCacheObject *o); void cancel_locking(MutationImpl *mut, set *pneed_issue); void drop_locks(MutationImpl *mut, set *pneed_issue=0); void set_xlocks_done(MutationImpl *mut, bool skip_dentry=false); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 1c6936823bcb0..1944e43bd00d1 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -12084,9 +12084,12 @@ void MDCache::repair_dirfrag_stats_work(MDRequestRef& mdr) } if (!mdr->is_auth_pinned(dir) && !dir->can_auth_pin()) { + dir->add_waiter(CDir::WAIT_UNFREEZE, new C_MDS_RetryRequest(this, mdr)); + mds->locker->drop_locks(mdr.get()); mdr->drop_local_auth_pins(); - dir->add_waiter(CDir::WAIT_UNFREEZE, new C_MDS_RetryRequest(this, mdr)); + if (!mdr->remote_auth_pins.empty()) + mds->locker->notify_freeze_waiter(dir); return; } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index d65b3c9d9aa1f..54b8ed2de471d 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2256,24 +2256,7 @@ void Server::handle_slave_auth_pin(MDRequestRef& mdr) (*p)->add_waiter(CDir::WAIT_UNFREEZE, new C_MDS_RetryRequest(mdcache, mdr)); mdr->drop_local_auth_pins(); - CDir *dir = NULL; - if (CInode *in = dynamic_cast(*p)) { - if (!in->is_root()) - dir = in->get_parent_dir(); - } else if (CDentry *dn = dynamic_cast(*p)) { - dir = dn->get_dir(); - } else { - ceph_abort(); - } - if (dir) { - if (dir->is_freezing_dir()) - mdcache->fragment_freeze_inc_num_waiters(dir); - if (dir->is_freezing_tree()) { - while (!dir->is_freezing_tree_root()) - dir = dir->get_parent_dir(); - mdcache->migrator->export_freeze_inc_num_waiters(dir); - } - } + mds->locker->notify_freeze_waiter(*p); return; } } @@ -2825,6 +2808,8 @@ CInode* Server::rdlock_path_pin_ref(MDRequestRef& mdr, int n, */ mds->locker->drop_locks(mdr.get(), NULL); mdr->drop_local_auth_pins(); + if (!mdr->remote_auth_pins.empty()) + mds->locker->notify_freeze_waiter(ref); return 0; } @@ -2970,7 +2955,7 @@ CDir* Server::try_open_auth_dirfrag(CInode *diri, frag_t fg, MDRequestRef& mdr) if (!dir && diri->is_frozen()) { dout(10) << "try_open_auth_dirfrag: dir inode is frozen, waiting " << *diri << dendl; assert(diri->get_parent_dir()); - diri->get_parent_dir()->add_waiter(CDir::WAIT_UNFREEZE, new C_MDS_RetryRequest(mdcache, mdr)); + diri->add_waiter(CInode::WAIT_UNFREEZE, new C_MDS_RetryRequest(mdcache, mdr)); return 0; } -- 2.39.5