// We can use duration_cast below, although the result is an int,
// because the values from g_conf are also integers.
// balance?
- if (mono_clock::is_zero(last_heartbeat))
- last_heartbeat = now;
if (mds->get_nodeid() == 0 &&
g_conf->mds_bal_interval > 0 &&
(num_bal_times ||
dout(20) << "get_load no root, no load" << dendl;
}
- load.req_rate = mds->get_req_rate();
+ uint64_t num_requests = mds->get_num_requests();
+ bool new_req_rate = false;
+ if (last_get_load != utime_t() &&
+ now > last_get_load &&
+ num_requests >= last_num_requests) {
+ utime_t el = now;
+ el -= last_get_load;
+ if (el.sec() >= 1) {
+ load.req_rate = (num_requests - last_num_requests) / (double)el;
+ new_req_rate = true;
+ }
+ }
+ if (!new_req_rate) {
+ auto p = mds_load.find(mds->get_nodeid());
+ if (p != mds_load.end())
+ load.req_rate = p->second.req_rate;
+ }
+ last_get_load = now;
+ last_num_requests = num_requests;
+
load.queue_len = messenger->get_dispatch_queue_len();
ifstream cpu(PROCPREFIX "/proc/loadavg");
if (mds->get_nodeid() == 0) {
beat_epoch++;
-
mds_load.clear();
}
// my load
mds_load_t load = get_load(now);
+ mds->logger->set(l_mds_load_cent, 100 * load.mds_load());
+ mds->logger->set(l_mds_dispatch_queue_len, load.queue_len);
+
map<mds_rank_t, mds_load_t>::value_type val(mds->get_nodeid(), load);
mds_load.insert(val);
if (who == 0) {
dout(20) << " from mds0, new epoch " << m->get_beat() << dendl;
if (beat_epoch != m->get_beat()) {
+ beat_epoch = m->get_beat();
mds_load.clear();
}
- beat_epoch = m->get_beat();
+
send_heartbeat();
mds->mdcache->show_subtrees();
+ } else if (mds->get_nodeid() == 0) {
+ if (beat_epoch != m->get_beat()) {
+ dout(10) << " old heartbeat epoch, ignoring" << dendl;
+ goto out;
+ }
}
{
friend class C_Bal_SendHeartbeat;
public:
MDBalancer(MDSRank *m, Messenger *msgr, MonClient *monc) :
- mds(m),
- messenger(msgr),
- mon_client(monc),
- beat_epoch(0),
- last_epoch_under(0), my_load(0.0), target_load(0.0)
- { }
-
- mds_load_t get_load(utime_t);
+ mds(m), messenger(msgr), mon_client(monc) { }
int proc_message(Message *m);
void handle_export_pins(void);
void export_empties();
+
+ mds_load_t get_load(utime_t now);
int localize_balancer();
void send_heartbeat();
void handle_heartbeat(MHeartbeat *m);
MDSRank *mds;
Messenger *messenger;
MonClient *mon_client;
- int beat_epoch;
+ int beat_epoch = 0;
- int last_epoch_under;
+ int last_epoch_under = 0;
string bal_code;
string bal_version;
mono_time last_sample = mono_clock::zero();
utime_t rebalance_time; //ensure a consistent view of load for rebalance
+ utime_t last_get_load;
+ uint64_t last_num_requests = 0;
+
// Dirfrags which are marked to be passed on to MDCache::[split|merge]_dir
// just as soon as a delayed context comes back and triggers it.
// These sets just prevent us from spawning extra timer contexts for
map<mds_rank_t, int> mds_last_epoch_under_info;
// per-epoch state
- double my_load, target_load;
+ double my_load = 0;
+ double target_load = 0;
};
#endif