From: Simon Gao Date: Sat, 22 Feb 2020 02:26:57 +0000 (+0800) Subject: mds: automatically fragment stray dirs X-Git-Tag: v16.1.0~2546^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=28227872295ae657a0d26a3f004c54c41794db18;p=ceph.git mds: automatically fragment stray dirs Signed-off-by: Simon Gao --- diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 8af221935113..4ad5edfd5de7 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -392,11 +392,13 @@ public: void merge(const std::vector& subs, MDSContext::vec& waiters, bool replay); bool should_split() const { - return (int)get_frag_size() > g_conf()->mds_bal_split_size; + return g_conf()->mds_bal_split_size > 0 && + (int)get_frag_size() > g_conf()->mds_bal_split_size; } bool should_split_fast() const; bool should_merge() const { - return (int)get_frag_size() < g_conf()->mds_bal_merge_size; + return get_frag() != frag_t() && + (int)get_frag_size() < g_conf()->mds_bal_merge_size; } mds_authority_t authority() const override; diff --git a/src/mds/MDBalancer.cc b/src/mds/MDBalancer.cc index a1b8508f54e3..0c3fa3ebaa06 100644 --- a/src/mds/MDBalancer.cc +++ b/src/mds/MDBalancer.cc @@ -1154,7 +1154,7 @@ void MDBalancer::maybe_fragment(CDir *dir, bool hot) !dir->inode->is_stray()) { // not straydir // split - if (g_conf()->mds_bal_split_size > 0 && (dir->should_split() || hot)) { + if (dir->should_split() || hot) { if (split_pending.count(dir->dirfrag()) == 0) { queue_split(dir, false); } else { @@ -1177,6 +1177,8 @@ void MDBalancer::maybe_fragment(CDir *dir, bool hot) void MDBalancer::hit_dir(CDir *dir, int type, int who, double amount) { + if (dir->inode->is_stray()) + return; // hit me double v = dir->pop_me.get(type).hit(amount); diff --git a/src/mds/MDBalancer.h b/src/mds/MDBalancer.h index c6d85a5aed6e..2e8fef167790 100644 --- a/src/mds/MDBalancer.h +++ b/src/mds/MDBalancer.h @@ -58,6 +58,9 @@ public: void queue_split(const CDir *dir, bool fast); void queue_merge(CDir *dir); + bool is_fragment_pending(dirfrag_t df) { + return split_pending.count(df) || merge_pending.count(df); + } /** * Based on size and configuration, decide whether to issue a queue_split diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index e5e1da59e6a9..413c7e88cce0 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -691,6 +691,51 @@ void MDCache::open_root() populate_mydir(); } +void MDCache::advance_stray() { + // check whether the directory has been fragmented + if (stray_fragmenting_index >= 0) { + auto&& dfs = strays[stray_fragmenting_index]->get_dirfrags(); + bool any_fragmenting = false; + for (const auto& dir : dfs) { + if (dir->state_test(CDir::STATE_FRAGMENTING) || + mds->balancer->is_fragment_pending(dir->dirfrag())) { + any_fragmenting = true; + break; + } + } + if (!any_fragmenting) + stray_fragmenting_index = -1; + } + + for (int i = 1; i < NUM_STRAY; i++){ + stray_index = (stray_index + i) % NUM_STRAY; + if (stray_index != stray_fragmenting_index) + break; + } + + if (stray_fragmenting_index == -1 && is_open()) { + // Fragment later stray dir in advance. We don't choose past + // stray dir because in-flight requests may still use it. + stray_fragmenting_index = (stray_index + 3) % NUM_STRAY; + auto&& dfs = strays[stray_fragmenting_index]->get_dirfrags(); + bool any_fragmenting = false; + for (const auto& dir : dfs) { + if (dir->should_split()) { + mds->balancer->queue_split(dir, true); + any_fragmenting = true; + } else if (dir->should_merge()) { + mds->balancer->queue_merge(dir); + any_fragmenting = true; + } + } + if (!any_fragmenting) + stray_fragmenting_index = -1; + } + + dout(10) << "advance_stray to index " << stray_index + << " fragmenting index " << stray_fragmenting_index << dendl; +} + void MDCache::populate_mydir() { ceph_assert(myin); diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 6dd04f1bfb57..1fb4460f8b1c 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -213,9 +213,7 @@ class MDCache { return cache_size() > cache_memory_limit*cache_health_threshold; } - void advance_stray() { - stray_index = (stray_index+1)%NUM_STRAY; - } + void advance_stray(); /** * Call this when you know that a CDentry is ready to be passed @@ -1112,6 +1110,7 @@ class MDCache { bool readonly = false; int stray_index = 0; + int stray_fragmenting_index = -1; set base_inodes; diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index ee2d08cebba4..76d455cb2d0e 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -554,8 +554,6 @@ void MDLog::_prepare_new_segment() logger->set(l_mdl_seg, segments.size()); // Adjust to next stray dir - dout(10) << "Advancing to next stray directory on mds " << mds->get_nodeid() - << dendl; mds->mdcache->advance_stray(); }