From: Sage Weil Date: Fri, 29 Oct 2010 23:42:16 +0000 (-0700) Subject: mds: detect small dirs that should be merged, and merge them X-Git-Tag: v0.23~61^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7e8fc103dd327fc4fcde8b43885f524398ec12c5;p=ceph.git mds: detect small dirs that should be merged, and merge them Signed-off-by: Sage Weil --- diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 5a2aae7d2d00..eea38f2b4121 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -343,6 +343,14 @@ public: public: void split(int bits, list& subs, list& waiters, bool replay); void merge(list& subs, list& waiters, bool replay); + + bool should_split() { + return (int)get_num_head_items() > g_conf.mds_bal_split_size; + } + bool should_merge() { + return (int)get_num_head_items() < g_conf.mds_bal_merge_size; + } + private: void steal_dentry(CDentry *dn); // from another dir. used by merge/split. void purge_stolen(list& waiters, bool replay); diff --git a/src/mds/MDBalancer.cc b/src/mds/MDBalancer.cc index d4e7a7550c82..da08bf1949eb 100644 --- a/src/mds/MDBalancer.cc +++ b/src/mds/MDBalancer.cc @@ -316,25 +316,77 @@ void MDBalancer::queue_split(CDir *dir) void MDBalancer::do_fragmenting() { - if (split_queue.empty()) { + if (split_queue.empty() && merge_queue.empty()) { dout(20) << "do_fragmenting has nothing to do" << dendl; return; } - dout(0) << "do_fragmenting " << split_queue.size() << " dirs marked for possible splitting" << dendl; + if (!split_queue.empty()) { + dout(0) << "do_fragmenting " << split_queue.size() << " dirs marked for possible splitting" << dendl; - set q; - q.swap(split_queue); - - for (set::iterator i = q.begin(); - i != q.end(); - i++) { - CDir *dir = mds->mdcache->get_dirfrag(*i); - if (!dir) continue; - if (!dir->is_auth()) continue; - - dout(0) << "do_fragmenting splitting " << *dir << dendl; - mds->mdcache->split_dir(dir, g_conf.mds_bal_split_bits); + set q; + q.swap(split_queue); + + for (set::iterator i = q.begin(); + i != q.end(); + i++) { + CDir *dir = mds->mdcache->get_dirfrag(*i); + if (!dir || + !dir->is_auth()) + continue; + + dout(0) << "do_fragmenting splitting " << *dir << dendl; + mds->mdcache->split_dir(dir, g_conf.mds_bal_split_bits); + } + } + + if (!merge_queue.empty()) { + dout(0) << "do_fragmenting " << merge_queue.size() << " dirs marked for possible merging" << dendl; + + set q; + q.swap(merge_queue); + + for (set::iterator i = q.begin(); + i != q.end(); + i++) { + CDir *dir = mds->mdcache->get_dirfrag(*i); + if (!dir || + !dir->is_auth() || + dir->get_frag() == frag_t()) // ok who's the joker? + continue; + + dout(0) << "do_fragmenting merging " << *dir << dendl; + + CInode *diri = dir->get_inode(); + + frag_t fg = dir->get_frag(); + while (fg != frag_t()) { + frag_t sibfg = fg.get_sibling(); + list sibs; + bool complete = diri->get_dirfrags_under(sibfg, sibs); + if (!complete) { + dout(10) << " not all sibs under " << sibfg << " in cache (have " << sibs << ")" << dendl; + break; + } + bool all = true; + for (list::iterator p = sibs.begin(); p != sibs.end(); p++) { + CDir *sib = *p; + if (!sib->is_auth() || !sib->should_merge()) { + all = false; + break; + } + } + if (!all) { + dout(10) << " not all sibs under " << sibfg << " " << sibs << " should_merge" << dendl; + break; + } + dout(10) << " all sibs under " << sibfg << " " << sibs << " should merge" << dendl; + fg = fg.parent(); + } + + if (fg != dir->get_frag()) + mds->mdcache->merge_dir(diri, fg); + } } } @@ -909,21 +961,32 @@ void MDBalancer::hit_dir(utime_t now, CDir *dir, int type, int who, double amoun //if (dir->ino() == inodeno_t(0x10000000000)) //dout(0) << "hit_dir " << type << " pop " << v << " in " << *dir << dendl; - // split? + // split/merge if (g_conf.mds_bal_frag && g_conf.mds_bal_fragment_interval > 0 && !dir->inode->is_base() && // not root/base (for now at least) - dir->is_auth() && - - ((g_conf.mds_bal_split_size > 0 && - dir->get_num_head_items() > (unsigned)g_conf.mds_bal_split_size) || - (v > g_conf.mds_bal_split_rd && type == META_POP_IRD) || - (v > g_conf.mds_bal_split_wr && type == META_POP_IWR)) && - split_queue.count(dir->dirfrag()) == 0) { - dout(0) << "hit_dir " << type << " pop is " << v << ", putting in split_queue: " << *dir << dendl; - split_queue.insert(dir->dirfrag()); - } - // merge? - + dir->is_auth()) { + + dout(20) << "hit_dir " << type << " pop is " << v << ", frag " << dir->get_frag() + << " size " << dir->get_num_head_items() << dendl; + + // split + if (g_conf.mds_bal_split_size > 0 && + ((dir->get_num_head_items() > (unsigned)g_conf.mds_bal_split_size) || + (v > g_conf.mds_bal_split_rd && type == META_POP_IRD) || + (v > g_conf.mds_bal_split_wr && type == META_POP_IWR)) && + split_queue.count(dir->dirfrag()) == 0) { + dout(1) << "hit_dir " << type << " pop is " << v << ", putting in split_queue: " << *dir << dendl; + split_queue.insert(dir->dirfrag()); + } + + // merge? + if (dir->get_frag() != frag_t() && + (dir->get_num_head_items() < (unsigned)g_conf.mds_bal_merge_size) && + merge_queue.count(dir->dirfrag()) == 0) { + dout(1) << "hit_dir " << type << " pop is " << v << ", putting in merge_queue: " << *dir << dendl; + merge_queue.insert(dir->dirfrag()); + } + } // replicate? if (type == META_POP_IRD && who >= 0) { diff --git a/src/mds/MDBalancer.h b/src/mds/MDBalancer.h index bbf3cfc89d30..e4eb530b4494 100644 --- a/src/mds/MDBalancer.h +++ b/src/mds/MDBalancer.h @@ -48,7 +48,7 @@ class MDBalancer { utime_t rebalance_time; //ensure a consistent view of load for rebalance // todo - set split_queue; + set split_queue, merge_queue; // per-epoch scatter/gathered info map mds_load;