From: Jianyu Li Date: Thu, 30 Nov 2017 10:57:40 +0000 (+0800) Subject: Make MDS evaluates the overload situation with the same criterion X-Git-Tag: v13.0.2~708^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9a276c2733415f41d54524db20a9dd8eb62359bd;p=ceph.git Make MDS evaluates the overload situation with the same criterion Currently, the prep_rebalancer requires the mds_load of itself being greater than average level mds_bal_min_rebalance percent to be counted as exporter, however, for the other mds, as long as their mdsload is greater than average level, they could be consider as exporter. This inconsistent standard would cause gray area in the rebalance activity: one may expect the other do some work, but it didn't. With the increasing amount of mds, this inconsistent decision would get worse and affects the optimization effect of rebalancer. Signed-off-by: Jianyu Li --- diff --git a/src/mds/MDBalancer.cc b/src/mds/MDBalancer.cc index ae3d9a9553a..85aeab8645e 100644 --- a/src/mds/MDBalancer.cc +++ b/src/mds/MDBalancer.cc @@ -338,7 +338,7 @@ void MDBalancer::send_heartbeat() for (set::iterator p = up.begin(); p != up.end(); ++p) { if (*p == mds->get_nodeid()) continue; - MHeartbeat *hb = new MHeartbeat(load, beat_epoch); + MHeartbeat *hb = new MHeartbeat(load, beat_epoch, last_epoch_under); hb->get_import_map() = import_map; messenger->send_message(hb, mds->mdsmap->get_inst(*p)); @@ -384,6 +384,7 @@ void MDBalancer::handle_heartbeat(MHeartbeat *m) } } mds_import_map[ who ] = m->get_import_map(); + mds_last_epoch_under_info[who] = m->get_last_epoch_under(); //dout(0) << " load is " << load << " have " << mds_load.size() << dendl; @@ -663,7 +664,8 @@ void MDBalancer::prep_rebalance(int beat) dout(15) << " mds." << it->second << " is importer" << dendl; importers.insert(pair(it->first,it->second)); importer_set.insert(it->second); - } else { + } else if ((it->first > target_load * (1.0 + g_conf->mds_bal_min_rebalance)) && + (it->second == whoami || !mds_last_epoch_under_info[it->second] || beat_epoch - mds_last_epoch_under_info[it->second] >= 2)){ dout(15) << " mds." << it->second << " is exporter" << dendl; exporters.insert(pair(it->first,it->second)); exporter_set.insert(it->second); diff --git a/src/mds/MDBalancer.h b/src/mds/MDBalancer.h index d23185b22f8..ae7aa21ccc6 100644 --- a/src/mds/MDBalancer.h +++ b/src/mds/MDBalancer.h @@ -142,6 +142,7 @@ private: map mds_load; map mds_meta_load; map > mds_import_map; + map mds_last_epoch_under_info; // per-epoch state double my_load, target_load; diff --git a/src/messages/MHeartbeat.h b/src/messages/MHeartbeat.h index ad0d8f8e481..ff3d178d5b0 100644 --- a/src/messages/MHeartbeat.h +++ b/src/messages/MHeartbeat.h @@ -20,24 +20,29 @@ #include "msg/Message.h" class MHeartbeat : public Message { + static const int HEAD_VERSION = 2; + static const int COMPAT_VERSION = 1; mds_load_t load; __s32 beat = 0; + __s32 last_epoch_under = 0; map import_map; public: mds_load_t& get_load() { return load; } int get_beat() { return beat; } + int get_last_epoch_under() { return last_epoch_under; } map& get_import_map() { return import_map; } MHeartbeat() - : Message(MSG_MDS_HEARTBEAT), load(utime_t()) { } - MHeartbeat(mds_load_t& load, int beat) - : Message(MSG_MDS_HEARTBEAT), + : Message(MSG_MDS_HEARTBEAT, HEAD_VERSION, COMPAT_VERSION), load(utime_t()) { } + MHeartbeat(mds_load_t& load, int beat, int last_epoch_under) + : Message(MSG_MDS_HEARTBEAT, HEAD_VERSION, COMPAT_VERSION), load(load) { this->beat = beat; + this->last_epoch_under = last_epoch_under; } private: ~MHeartbeat() override {} @@ -49,6 +54,7 @@ public: ::encode(load, payload); ::encode(beat, payload); ::encode(import_map, payload); + ::encode(last_epoch_under, payload); } void decode_payload() override { bufferlist::iterator p = payload.begin(); @@ -56,6 +62,11 @@ public: ::decode(load, now, p); ::decode(beat, p); ::decode(import_map, p); + if (header.version >= 2) { + ::decode(last_epoch_under, p); + } else { + last_epoch_under = 0; + } } };