From 639756516fb8e396b0ccb5823f92998a5d0c0f00 Mon Sep 17 00:00:00 2001 From: Zhi Zhang Date: Fri, 26 Jul 2019 12:01:08 +0800 Subject: [PATCH] mds: delay exporting directory whose pin value exceeds max rank id Currently we allow to set ceph.dir.pin value to any number. If it is larger than current max id, this dir will stay in export_pin_queue all the time and every tick migrator will try to handle it but never export it successfully. Fixes: http://tracker.ceph.com/issues/40603 Signed-off-by: Zhi Zhang (cherry picked from commit 2c312614a7eddda6fc788753db4e2afab4e2b73e) --- src/mds/CInode.h | 3 ++- src/mds/MDBalancer.cc | 12 +++++++++++- src/mds/MDCache.cc | 23 +++++++++++++++++++++++ src/mds/MDCache.h | 4 ++++ src/mds/MDSRank.cc | 2 ++ 5 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 2c48e3b2d996..4260ef5767be 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -235,6 +235,7 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counteris_dir()); mds_rank_t export_pin = in->get_export_pin(false); + if (export_pin >= mds->mdsmap->get_max_mds()) { + dout(20) << " delay export pin on " << *in << dendl; + in->state_clear(CInode::STATE_QUEUEDEXPORTPIN); + q.erase(cur); + + in->state_set(CInode::STATE_DELAYEDEXPORTPIN); + mds->mdcache->export_pin_delayed_queue.insert(in); + continue; + } bool remove = true; list dfls; @@ -166,7 +175,8 @@ void MDBalancer::handle_export_pins(void) dendl; } - if (export_pin >= 0 && export_pin != mds->get_nodeid()) { + if (export_pin >= 0 && export_pin < mds->mdsmap->get_max_mds() + && export_pin != mds->get_nodeid()) { mds->mdcache->migrator->export_dir(cd, export_pin); } } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 886b0eef1a39..26bce13ffe6d 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -305,6 +305,9 @@ void MDCache::remove_inode(CInode *o) if (o->state_test(CInode::STATE_QUEUEDEXPORTPIN)) export_pin_queue.erase(o); + if (o->state_test(CInode::STATE_DELAYEDEXPORTPIN)) + export_pin_delayed_queue.erase(o); + // remove from inode map if (o->last == CEPH_NOSNAP) { inode_map.erase(o->ino()); @@ -12988,3 +12991,23 @@ bool MDCache::dump_inode(Formatter *f, uint64_t number) { f->close_section(); return true; } + +void MDCache::handle_mdsmap(const MDSMap &mdsmap) { + // process export_pin_delayed_queue whenever a new MDSMap received + auto &q = export_pin_delayed_queue; + for (auto it = q.begin(); it != q.end(); ) { + auto *in = *it; + mds_rank_t export_pin = in->get_export_pin(false); + dout(10) << " delayed export_pin=" << export_pin << " on " << *in + << " max_mds=" << mdsmap.get_max_mds() << dendl; + if (export_pin >= mdsmap.get_max_mds()) { + it++; + continue; + } + + in->state_clear(CInode::STATE_DELAYEDEXPORTPIN); + it = q.erase(it); + in->maybe_export_pin(); + } +} + diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 0482b82bbdf9..5628033d0cd0 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -1261,6 +1261,9 @@ public: void process_delayed_expire(CDir *dir); void discard_delayed_expire(CDir *dir); + // -- mdsmap -- + void handle_mdsmap(const MDSMap &mdsmap); + protected: int dump_cache(std::string_view fn, Formatter *f); public: @@ -1318,6 +1321,7 @@ public: public: /* Because exports may fail, this set lets us keep track of inodes that need exporting. */ std::set export_pin_queue; + std::set export_pin_delayed_queue; OpenFileTable open_file_table; }; diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 8b622cab0f97..3b6d42addd87 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -2404,6 +2404,8 @@ void MDSRankDispatcher::handle_mds_map( if (oldmap.get_max_mds() != mdsmap->get_max_mds()) { purge_queue.update_op_limit(*mdsmap); } + + mdcache->handle_mdsmap(*mdsmap); } void MDSRank::handle_mds_recovery(mds_rank_t who) -- 2.47.3