From: Yan, Zheng Date: Tue, 16 May 2017 01:16:46 +0000 (+0800) Subject: ceph: simplify CInode::maybe_export_pin() X-Git-Tag: v12.1.1~65^2~18^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9d2d032e2f2282e2a249254b4ae8f63129d8648b;p=ceph.git ceph: simplify CInode::maybe_export_pin() move most works into MDBalancer::handle_export_pins(), this simplifies the code a lot. Another reason for this change is that creating aux subtree directly in CInode::maybe_export_pin() confuses journal replay (the subtree map check when replaying ESubtreeMap). Signed-off-by: "Yan, Zheng" --- diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 7af16b5641e5..ba2ebaab60a8 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1809,8 +1809,9 @@ void CInode::decode_lock_state(int type, bufferlist& bl) if (inode.ctime < tm) inode.ctime = tm; ::decode(inode.layout, p); ::decode(inode.quota, p); + mds_rank_t old_pin = inode.export_pin; ::decode(inode.export_pin, p); - maybe_export_pin(); + maybe_export_pin(old_pin != inode.export_pin); } break; @@ -4407,70 +4408,41 @@ int64_t CInode::get_backtrace_pool() const } } -class C_CInode_ExportPin : public MDSInternalContext { -public: - explicit C_CInode_ExportPin(CInode *in) : MDSInternalContext(in->mdcache->mds), in(in) { - in->get(MDSCacheObject::PIN_PTRWAITER); - } - ~C_CInode_ExportPin() { - in->put(MDSCacheObject::PIN_PTRWAITER); - } +void CInode::maybe_export_pin(bool update) +{ + if (!g_conf->mds_bal_export_pin) + return; + if (!is_dir() || !is_normal()) + return; - void finish(int r) override { - in->maybe_export_pin(); - } -private: - CInode *in; -}; + mds_rank_t export_pin = get_export_pin(false); + if (export_pin == MDS_RANK_NONE && !update) + return; -void CInode::maybe_export_pin() -{ - if (g_conf->mds_bal_export_pin && is_dir() && is_normal()) { - mds_rank_t pin = get_export_pin(false); - dout(20) << "maybe_export_pin export_pin=" << pin << " on " << *this << dendl; - if (pin == mdcache->mds->get_nodeid()) { - for (auto it = dirfrags.begin(); it != dirfrags.end(); it++) { - CDir *cd = it->second; - dout(20) << "dirfrag: " << *cd << dendl; - if (cd->state_test(CDir::STATE_CREATING)) { - /* inode is not journaled yet */ - cd->add_waiter(CDir::WAIT_CREATED, new C_CInode_ExportPin(this)); - dout(15) << "aux subtree pin of " << *cd << " delayed for finished creation" << dendl; - continue; - } - if (cd->state_test(CDir::STATE_AUXSUBTREE)) continue; - CDir *subtree = mdcache->get_subtree_root(cd); - assert(subtree); - if (subtree->is_ambiguous_auth()) { - subtree->add_waiter(MDSCacheObject::WAIT_SINGLEAUTH, new C_CInode_ExportPin(this)); - dout(15) << "aux subtree pin of " << *cd << " delayed for single auth on subtree " << *subtree << dendl; - } else if (subtree->is_auth()) { - assert(cd->is_auth()); - if (subtree->is_frozen() || subtree->is_freezing()) { - subtree->add_waiter(MDSCacheObject::WAIT_UNFREEZE, new C_CInode_ExportPin(this)); - dout(15) << "aux subtree pin of " << *cd << " delayed for unfreeze on subtree " << *subtree << dendl; - } else { - cd->state_set(CDir::STATE_AUXSUBTREE); - mdcache->adjust_subtree_auth(cd, mdcache->mds->get_nodeid()); - dout(15) << "aux subtree pinned " << *cd << dendl; - } - } else { - assert(!cd->is_auth()); - dout(15) << "not setting aux subtree pin for " << *cd << " because not auth" << dendl; - } - } - } else if (pin != MDS_RANK_NONE) { - for (auto it = dirfrags.begin(); it != dirfrags.end(); it++) { - CDir *cd = it->second; - if (cd->is_auth() && cd->state_test(CDir::STATE_AUXSUBTREE)) { - assert(!(cd->is_frozen() || cd->is_freezing())); - assert(!cd->state_test(CDir::STATE_EXPORTBOUND)); - cd->state_clear(CDir::STATE_AUXSUBTREE); /* merge will happen eventually */ - dout(15) << "cleared aux subtree pin " << *cd << dendl; - } + if (mdcache->export_pin_queue.count(this)) + return; + + bool queue = false; + for (auto p = dirfrags.begin(); p != dirfrags.end(); p++) { + CDir *dir = p->second; + if (!dir->is_auth()) + continue; + if (export_pin != MDS_RANK_NONE) { + if (dir->is_subtree_root()) { + // export subtrees ? + queue = (export_pin != dir->get_dir_auth().first); + } else { + // create aux subtrees + queue = true; } - dout(20) << "adding to export_pin_queue " << *this << dendl; + } else { + // clear aux subtrees ? + queue = dir->state_test(CDir::STATE_AUXSUBTREE); + } + if (queue) { + get(CInode::PIN_EXPORTPINQUEUE); mdcache->export_pin_queue.insert(this); + break; } } } @@ -4480,7 +4452,7 @@ void CInode::set_export_pin(mds_rank_t rank) assert(is_dir()); assert(is_projected()); get_projected_inode()->export_pin = rank; - maybe_export_pin(); + maybe_export_pin(true); } mds_rank_t CInode::get_export_pin(bool inherit) const diff --git a/src/mds/CInode.h b/src/mds/CInode.h index eae86ef4c331..0eb72690cc2b 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -153,6 +153,7 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter ls; - in->dirfragtree.get_leaves(ls); - for (const auto &fg : ls) { - CDir *cd = in->get_dirfrag(fg); - if (cd && cd->is_auth()) { - /* N.B. when we are no longer auth after exporting, this function will remove the inode from the queue */ - mds->mdcache->migrator->export_dir(cd, export_pin); - has_auth = true; + mds_rank_t export_pin = in->get_export_pin(false); + + bool remove = true; + list dfls; + in->get_dirfrags(dfls); + for (auto dir : dfls) { + if (!dir->is_auth()) + continue; + + if (export_pin == MDS_RANK_NONE) { + if (dir->state_test(CDir::STATE_AUXSUBTREE)) { + if (dir->is_frozen() || dir->is_freezing()) { + // try again later + remove = false; + continue; + } + dout(10) << " clear auxsubtree on " << *dir << dendl; + dir->state_clear(CDir::STATE_AUXSUBTREE); + mds->mdcache->try_subtree_merge(dir); + } + } else if (export_pin == mds->get_nodeid()) { + if (!dir->is_subtree_root()) { + if (dir->state_test(CDir::STATE_CREATING) || + dir->is_frozen() || dir->is_freezing()) { + // try again later + remove = false; + continue; + } + dir->state_set(CDir::STATE_AUXSUBTREE); + mds->mdcache->adjust_subtree_auth(dir, mds->get_nodeid()); + dout(10) << " create aux subtree on " << *dir << dendl; + } + } else { + mds->mdcache->migrator->export_dir(dir, export_pin); + remove = false; } } - if (!has_auth) { - dout(10) << "can no longer export " << *in << " because I am not auth for any dirfrags" << dendl; - q.erase(current); - continue; + + if (remove) { + q.erase(cur); + in->put(CInode::PIN_EXPORTPINQUEUE); } } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 8f0ca76accb2..ae1c7e250e4b 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -295,8 +295,6 @@ void MDCache::remove_inode(CInode *o) o->item_open_file.remove_myself(); - export_pin_queue.erase(o); - // remove from inode map inode_map.erase(o->vino());