From: sage Date: Sat, 9 Jul 2005 19:59:16 +0000 (+0000) Subject: *** empty log message *** X-Git-Tag: v0.1~1949 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e3b966eee908ad178e76ea1b7b0d9480619150cc;p=ceph.git *** empty log message *** git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@432 29311d96-e01e-0410-9327-a35deaab8ce9 --- diff --git a/ceph/common/DecayCounter.h b/ceph/common/DecayCounter.h index 8d73ae98895..fe6f6fce4a7 100644 --- a/ceph/common/DecayCounter.h +++ b/ceph/common/DecayCounter.h @@ -5,6 +5,7 @@ #include #include "Clock.h" +#include "include/config.h" class DecayCounter { protected: @@ -17,7 +18,7 @@ class DecayCounter { public: DecayCounter() : val(0) { - set_halflife( 40.0 ); + set_halflife( g_conf.mds_bal_interval ); reset(); } DecayCounter(double hl) : val(0) { diff --git a/ceph/mds/MDBalancer.cc b/ceph/mds/MDBalancer.cc index a7b6bfb24d2..591cbd60410 100644 --- a/ceph/mds/MDBalancer.cc +++ b/ceph/mds/MDBalancer.cc @@ -85,15 +85,40 @@ void MDBalancer::send_heartbeat() if (mds->get_nodeid() == 0) beat_epoch++; + timepair_t now = g_clock.gettimepair(); + + // load mds_load_t load = mds->get_load(); mds_load[ mds->get_nodeid() ] = load; + // import_map + map import_map; + + for (set::iterator it = mds->mdcache->imports.begin(); + it != mds->mdcache->imports.end(); + it++) { + CDir *im = *it; + if (im->inode->is_root()) continue; + int from = im->inode->authority(); + import_map[from] += im->popularity[MDS_POP_CURDOM].get(now); + } + mds_import_map[ mds->get_nodeid() ] = import_map; + + dout(5) << "mds" << mds->get_nodeid() << " sending heartbeat " << beat_epoch << " " << load << endl; + for (map::iterator it = import_map.begin(); + it != import_map.end(); + it++) { + dout(5) << " import_map from " << it->first << " -> " << it->second << endl; + } + int size = mds->get_cluster()->get_num_mds(); for (int i = 0; iget_nodeid()) continue; - mds->messenger->send_message(new MHeartbeat(load, beat_epoch), + MHeartbeat *hb = new MHeartbeat(load, beat_epoch); + hb->get_import_map() = import_map; + mds->messenger->send_message(hb, i, MDS_PORT_BALANCER, MDS_PORT_BALANCER); } @@ -118,6 +143,8 @@ void MDBalancer::handle_heartbeat(MHeartbeat *m) } mds_load[ m->get_source() ] = m->get_load(); + mds_import_map[ m->get_source() ] = m->get_import_map(); + //cout << " load is " << load << " have " << mds_load.size() << endl; int cluster_size = mds->get_cluster()->get_num_mds(); @@ -148,6 +175,33 @@ void MDBalancer::export_empties() +double MDBalancer::try_match(int ex, double& maxex, + int im, double& maxim) +{ + if (maxex <= 0 || maxim <= 0) return 0.0; + + double howmuch = -1; + if (maxim < maxex) // import takes it all + howmuch = maxim; + else if (maxim >= maxex) // export all + howmuch = maxim; + if (howmuch <= 0) return 0.0; + + dout(5) << " - mds" << ex << " exports " << howmuch << " to mds" << im << endl; + + if (ex == mds->get_nodeid()) + my_targets[im] += howmuch; + + exported[ex] += howmuch; + imported[im] += howmuch; + + maxex -= howmuch; + maxim -= howmuch; + + return howmuch; +} + + void MDBalancer::do_rebalance(int beat) { @@ -156,6 +210,11 @@ void MDBalancer::do_rebalance(int beat) int cluster_size = mds->get_cluster()->get_num_mds(); int whoami = mds->get_nodeid(); + // reset + my_targets.clear(); + imported.clear(); + exported.clear(); + dout(5) << " do_rebalance: cluster loads are" << endl; mds_load_t total_load; @@ -168,14 +227,13 @@ void MDBalancer::do_rebalance(int beat) } dout(5) << " total load " << total_load << endl; - - double my_load = mds_load[whoami].root_pop; - mds_load_t target_load = total_load / (double)cluster_size; - double target_root = target_load.root_pop; + // target load + target_load = total_load / (double)cluster_size; dout(5) << " target load " << target_load << endl; - - if (my_load < target_load.root_pop) { + + // under or over? + if (mds_load[whoami].root_pop < target_load.root_pop) { dout(5) << " i am underloaded, doing nothing." << endl; show_imports(); return; @@ -184,106 +242,97 @@ void MDBalancer::do_rebalance(int beat) dout(5) << " i am overloaded" << endl; + // first separate exporters and importers + multimap importers; + multimap exporters; + set importer_set; + set exporter_set; + + for (multimap::iterator it = load_map.begin(); + it != load_map.end(); + it++) { + if (it->first < target_load.root_pop) { + //dout(5) << " mds" << it->second << " is importer" << endl; + importers.insert(pair(it->first,it->second)); + importer_set.insert(it->second); + } else { + //dout(5) << " mds" << it->second << " is exporter" << endl; + exporters.insert(pair(it->first,it->second)); + exporter_set.insert(it->second); + } + } + + // determine load transfer mapping - multimap my_targets; - if (true || beat % 2 == 1) { - // old way - - // match up big exporters with big importers - multimap::reverse_iterator exporter = load_map.rbegin(); - multimap::iterator importer = load_map.begin(); - double imported = 0; - double exported = 0; - while (exporter != load_map.rend() && - importer != load_map.end()) { - double maxex = (*exporter).first - target_load.root_pop - exported; - double maxim = target_load.root_pop - (*importer).first - imported; - if (maxex < 0 || - maxim < 0) break; + if (true) { + // analyze import_map; do any matches i can + + dout(5) << " matching exporters to import sources" << endl; + + // big -> small exporters + for (multimap::reverse_iterator ex = exporters.rbegin(); + ex != exporters.rend(); + ex++) { + double maxex = get_maxex(ex->second); + if (maxex <= .001) continue; - if (maxim < maxex) { // import takes it all - dout(5) << " - mds" << (*exporter).second << " exports " << maxim << " to mds" << (*importer).second << endl; - if ((*exporter).second == whoami) - my_targets.insert(pair((*importer).second, maxim)); - exported += maxim; - importer++; - imported = 0; - } - else if (maxim > maxex) { // export all - dout(5) << " - mds" << (*exporter).second << " exports " << maxex << " to mds" << (*importer).second << endl; - if ((*exporter).second == whoami) - my_targets.insert(pair((*importer).second, maxex)); - imported += maxex; - exporter++; - exported = 0; - } else { - // wow, perfect match! - dout(5) << " - mds" << (*exporter).second << " exports " << maxex << " to mds" << (*importer).second << endl; - if ((*exporter).second == whoami) - my_targets.insert(pair((*importer).second, maxex)); - imported = exported = 0; - importer++; importer++; + // check importers. for now, just in arbitrary order (no intelligent matching). + for (map::iterator im = mds_import_map[ex->second].begin(); + im != mds_import_map[ex->second].end(); + im++) { + double maxim = get_maxim(im->first); + if (maxim <= .001) continue; + + try_match(ex->second, maxex, + im->first, maxim); + if (maxex <= .001) break;; } } - } else { - // new way + } - // first separate exporters and importers - multimap importers; - multimap exporters; - - for (multimap::iterator it = load_map.begin(); - it != load_map.end(); - it++) { - if (it->first < target_root) { - //dout(5) << " mds" << it->second << " is importer" << endl; - importers.insert(pair(it->first,it->second)); - } else { - //dout(5) << " mds" << it->second << " is exporter" << endl; - exporters.insert(pair(it->first,it->second)); + + if (1) { + if (beat % 2 == 1) { + // old way + dout(5) << " matching big exporters to big importers" << endl; + // big exporters to big importers + multimap::reverse_iterator ex = exporters.rbegin(); + multimap::iterator im = importers.begin(); + while (ex != exporters.rend() && + im != importers.end()) { + double maxex = get_maxex(ex->second); + double maxim = get_maxim(im->second); + + if (maxex < .001 || maxim < .001) break; + + try_match(ex->second, maxex, + im->second, maxim); + if (maxex <= .001) ex++; + if (maxim <= .001) im++; } - } - - // now match them up.. big exporters with small importers! - multimap::iterator ex = exporters.begin(); - multimap::iterator im = importers.begin(); - double imported = 0; - double exported = 0; - while (ex != exporters.end() && - im != importers.end()) { - double maxex = ex->first - target_load.root_pop - exported; - double maxim = target_load.root_pop - im->first - imported; - - if (maxex < 0 || - maxim < 0) break; - - if (maxim < maxex) { // import takes it all - dout(5) << " - mds" << ex->second << " exports " << maxim << " to mds" << im->second << endl; - if (ex->second == whoami) - my_targets.insert(pair(im->second, maxim)); - exported += maxim; - im++; - imported = 0; - } - else if (maxim > maxex) { // export all - dout(5) << " - mds" << ex->second << " exports " << maxex << " to mds" << im->second << endl; - if (ex->second == whoami) - my_targets.insert(pair(im->second, maxex)); - imported += maxex; - ex++; - exported = 0; - } else { - // wow, perfect match! - dout(5) << " - mds" << ex->second << " exports " << maxex << " to mds" << im->second << endl; - if (ex->second == whoami) - my_targets.insert(pair(im->second, maxex)); - imported = exported = 0; - im++; ex++; + } else { + // new way + dout(5) << " matching small exporters to big importers" << endl; + // small exporters to big importers + multimap::iterator ex = exporters.begin(); + multimap::iterator im = importers.begin(); + while (ex != exporters.end() && + im != importers.end()) { + double maxex = get_maxex(ex->second); + double maxim = get_maxim(im->second); + + if (maxex < .001 || maxim < .001) break; + + try_match(ex->second, maxex, + im->second, maxim); + if (maxex <= .001) ex++; + if (maxim <= .001) im++; } } } + // make a sorted list of my imports map import_pop_map; multimap import_from_map; @@ -305,7 +354,7 @@ void MDBalancer::do_rebalance(int beat) // do my exports! set already_exporting; - for (multimap::iterator it = my_targets.begin(); + for (map::iterator it = my_targets.begin(); it != my_targets.end(); it++) { int target = (*it).first; diff --git a/ceph/mds/MDBalancer.h b/ceph/mds/MDBalancer.h index 1bc5e114161..464c2d6d6b1 100644 --- a/ceph/mds/MDBalancer.h +++ b/ceph/mds/MDBalancer.h @@ -5,6 +5,7 @@ #include using namespace std; +#include #include using namespace __gnu_cxx; @@ -24,7 +25,24 @@ class MDBalancer { int beat_epoch; - hash_map mds_load; + // per-epoch scatter/gathered info + hash_map mds_load; + map > mds_import_map; + + // per-epoch state + mds_load_t target_load; + map my_targets; + map imported; + map exported; + + double try_match(int ex, double& maxex, + int im, double& maxim); + double get_maxim(int im) { + return target_load.root_pop - mds_load[im].root_pop - imported[im]; + } + double get_maxex(int ex) { + return mds_load[ex].root_pop - target_load.root_pop - exported[ex]; + } public: MDBalancer(MDS *m) { diff --git a/ceph/messages/MHeartbeat.h b/ceph/messages/MHeartbeat.h index 7093108113a..524ba265fae 100644 --- a/ceph/messages/MHeartbeat.h +++ b/ceph/messages/MHeartbeat.h @@ -7,11 +7,16 @@ class MHeartbeat : public Message { mds_load_t load; int beat; + map import_map; public: mds_load_t& get_load() { return load; } int get_beat() { return beat; } + map& get_import_map() { + return import_map; + } + MHeartbeat() {} MHeartbeat(mds_load_t& load, int beat) : Message(MSG_MDS_HEARTBEAT) { @@ -26,10 +31,35 @@ class MHeartbeat : public Message { off += sizeof(load); s.copy(off, sizeof(beat), (char*)&beat); off += sizeof(beat); + + int n; + s.copy(off, sizeof(n), (char*)&n); + off += sizeof(n); + while (n--) { + int f; + s.copy(off, sizeof(f), (char*)&f); + off += sizeof(f); + float v; + s.copy(off, sizeof(v), (char*)&v); + off += sizeof(v); + import_map[f] = v; + } } virtual void encode_payload(crope& s) { s.append((char*)&load, sizeof(load)); s.append((char*)&beat, sizeof(beat)); + + int n = import_map.size(); + s.append((char*)&n, sizeof(n)); + for (map::iterator it = import_map.begin(); + it != import_map.end(); + it++) { + int f = it->first; + s.append((char*)&f, sizeof(f)); + float v = it->second; + s.append((char*)&v, sizeof(v)); + } + } };