From b225ca97e8d0220035693e5a355479e9778ea231 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 5 Mar 2014 09:14:56 +0800 Subject: [PATCH] mds: fix open remote dirfrag deadlock During subtree migration, the importer may need to open subtree bound dirfrags. Opening subtree bound dirfrags happens after the exporter freeze the exporting subtee. So the discover message for opening subtree bound dirfrags should not wait for any freezing tree/directory, otherwise deadlock can happen. In MDCache::handle_discover(), there are two cases can cause discover messages wait for freezing tree/directory. One case is fetching bare-bone dirfrags. Another case is, when merging dirfrags, some of the dirfrags are frozen, some are freezing. Signed-off-by: Yan, Zheng --- src/mds/MDCache.cc | 37 +++++++++++++++++++++++++++++-------- src/mds/MDCache.h | 1 + 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index e92513d6bfc4e..c8afa2ff692b1 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -10214,14 +10214,20 @@ void MDCache::handle_discover(MDiscover *dis) break; } - // open dir? - if (!curdir) + if (!curdir) { // open dir? + if (cur->is_frozen()) { + if (!reply->is_empty()) { + dout(7) << *cur << " is frozen, non-empty reply, stopping" << dendl; + break; + } + dout(7) << *cur << " is frozen, empty reply, waiting" << dendl; + cur->add_waiter(CInode::WAIT_UNFREEZE, new C_MDS_RetryMessage(mds, dis)); + reply->put(); + return; + } curdir = cur->get_or_open_dirfrag(this, fg); - assert(curdir); - assert(curdir->is_auth()); - - // is dir frozen? - if (curdir->is_frozen()) { + } else if (curdir->is_frozen_tree() || + (curdir->is_frozen_dir() && fragment_are_all_frozen(curdir))) { if (dis->wants_base_dir() && dis->get_base_dir_frag() != curdir->get_frag()) { dout(7) << *curdir << " is frozen, dirfrag mismatch, stopping" << dendl; reply->set_flag_error_dir(); @@ -10282,7 +10288,8 @@ void MDCache::handle_discover(MDiscover *dis) dout(7) << "incomplete dir contents for " << *curdir << ", fetching" << dendl; if (reply->is_empty()) { // fetch and wait - curdir->fetch(new C_MDS_RetryMessage(mds, dis)); + curdir->fetch(new C_MDS_RetryMessage(mds, dis), + dis->wants_base_dir() && curdir->get_version() == 0); reply->put(); return; } else { @@ -11352,6 +11359,20 @@ void MDCache::fragment_unmark_unfreeze_dirs(list& dirs) } } +bool MDCache::fragment_are_all_frozen(CDir *dir) +{ + assert(dir->is_frozen_dir()); + map::iterator p; + for (p = fragments.lower_bound(dirfrag_t(dir->ino(), 0)); + p != fragments.end() && p->first.ino == dir->ino(); + ++p) { + if (p->first.frag.contains(dir->get_frag())) + return p->second.has_frozen; + } + assert(0); + return false; +} + void MDCache::fragment_freeze_inc_num_waiters(CDir *dir) { map::iterator p; diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index c4a89ded8153c..25819d30145b7 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -1016,6 +1016,7 @@ public: void find_stale_fragment_freeze(); void fragment_freeze_inc_num_waiters(CDir *dir); + bool fragment_are_all_frozen(CDir *dir); int get_num_fragmenting_dirs() { return fragments.size(); } // -- updates -- -- 2.39.5