From fd5623f4673aed7646d42edc97182cb57f69e227 Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Wed, 30 Sep 2009 17:00:58 -0700 Subject: [PATCH] mds: Break apart rebalance function into prepwork and offload --- src/mds/MDBalancer.cc | 27 +++++++++++++++------------ src/mds/MDBalancer.h | 10 +++++++--- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/mds/MDBalancer.cc b/src/mds/MDBalancer.cc index 9ff02a058b77f..e1a2a570e8f77 100644 --- a/src/mds/MDBalancer.cc +++ b/src/mds/MDBalancer.cc @@ -331,7 +331,7 @@ void MDBalancer::do_rebalance(int beat) { int cluster_size = mds->get_mds_map()->get_num_mds(); int whoami = mds->get_nodeid(); - utime_t now = g_clock.now(); + rebalance_time = g_clock.now(); dump_pop_map(); @@ -347,7 +347,7 @@ void MDBalancer::do_rebalance(int beat) // rescale! turn my mds_load back into meta_load units double load_fac = 1.0; if (mds_load[whoami].mds_load() > 0) { - double metald = mds_load[whoami].auth.meta_load(now); + double metald = mds_load[whoami].auth.meta_load(rebalance_time); double mdsld = mds_load[whoami].mds_load(); load_fac = metald / mdsld; dout(7) << " load_fac is " << load_fac @@ -484,9 +484,13 @@ void MDBalancer::do_rebalance(int beat) } } } + try_rebalance(); +} +void MDBalancer::try_rebalance() +{ // make a sorted list of my imports map import_pop_map; multimap import_from_map; @@ -499,7 +503,7 @@ void MDBalancer::do_rebalance(int beat) CDir *im = *it; if (im->get_inode()->is_stray()) continue; - double pop = im->pop_auth_subtree.meta_load(now); + double pop = im->pop_auth_subtree.meta_load(rebalance_time); if (g_conf.mds_bal_idle_threshold > 0 && pop < g_conf.mds_bal_idle_threshold && im->inode != mds->mdcache->get_root() && @@ -566,7 +570,7 @@ void MDBalancer::do_rebalance(int beat) if (dir->inode->is_root()) continue; if (dir->is_freezing() || dir->is_frozen()) continue; // export pbly already in progress - double pop = dir->pop_auth_subtree.meta_load(now); + double pop = dir->pop_auth_subtree.meta_load(rebalance_time); assert(dir->inode->authority().first == target); // cuz that's how i put it in the map, dummy if (pop <= amount-have) { @@ -623,7 +627,7 @@ void MDBalancer::do_rebalance(int beat) pot != candidates.end(); pot++) { if ((*pot)->get_inode()->is_stray()) continue; - find_exports(*pot, amount, exports, have, already_exporting, now); + find_exports(*pot, amount, exports, have, already_exporting); if (have > amount-MIN_OFFLOAD) break; } @@ -634,7 +638,7 @@ void MDBalancer::do_rebalance(int beat) dout(-5) << " - exporting " << (*it)->pop_auth_subtree << " " - << (*it)->pop_auth_subtree.meta_load(now) + << (*it)->pop_auth_subtree.meta_load(rebalance_time) << " to mds" << target << " " << **it << dendl; @@ -653,8 +657,7 @@ void MDBalancer::find_exports(CDir *dir, double amount, list& exports, double& have, - set& already_exporting, - utime_t now) + set& already_exporting) { double need = amount - have; if (need < amount * g_conf.mds_bal_min_start) @@ -667,7 +670,7 @@ void MDBalancer::find_exports(CDir *dir, list bigger_rep, bigger_unrep; multimap smaller; - double dir_pop = dir->pop_auth_subtree.meta_load(now); + double dir_pop = dir->pop_auth_subtree.meta_load(rebalance_time); dout(7) << " find_exports in " << dir_pop << " " << *dir << " need " << need << " (" << needmin << " - " << needmax << ")" << dendl; double subdir_sum = 0; @@ -690,7 +693,7 @@ void MDBalancer::find_exports(CDir *dir, if (subdir->is_frozen()) continue; // can't export this right now! // how popular? - double pop = subdir->pop_auth_subtree.meta_load(now); + double pop = subdir->pop_auth_subtree.meta_load(rebalance_time); subdir_sum += pop; dout(15) << " subdir pop " << pop << " " << *subdir << dendl; @@ -738,7 +741,7 @@ void MDBalancer::find_exports(CDir *dir, it != bigger_unrep.end(); it++) { dout(15) << " descending into " << **it << dendl; - find_exports(*it, amount, exports, have, already_exporting, now); + find_exports(*it, amount, exports, have, already_exporting); if (have > needmin) return; } @@ -761,7 +764,7 @@ void MDBalancer::find_exports(CDir *dir, it != bigger_rep.end(); it++) { dout(7) << " descending into replicated " << **it << dendl; - find_exports(*it, amount, exports, have, already_exporting, now); + find_exports(*it, amount, exports, have, already_exporting); if (have > needmin) return; } diff --git a/src/mds/MDBalancer.h b/src/mds/MDBalancer.h index ba639d234c887..8919dbc92b1ea 100644 --- a/src/mds/MDBalancer.h +++ b/src/mds/MDBalancer.h @@ -45,7 +45,7 @@ class MDBalancer { utime_t last_heartbeat; utime_t last_fragment; utime_t last_sample; - + utime_t rebalance_time; //ensure a consistent view of load for rebalance // todo set split_queue; @@ -88,13 +88,17 @@ class MDBalancer { void do_fragmenting(); void export_empties(); + //set up the rebalancing targets for export; maybe time for a name change? void do_rebalance(int beat); + /*check if the monitor has recorded the current export targets; + if it has then do the actual export. Otherwise send off our + export targets message again*/ + void try_rebalance(); void find_exports(CDir *dir, double amount, list& exports, double& have, - set& already_exporting, - utime_t now); + set& already_exporting); void subtract_export(class CDir *ex); -- 2.39.5