OPTION(mon_sync_fs_threshold, OPT_INT, 5) // sync() when writing this many objects; 0 to disable.
OPTION(mon_tick_interval, OPT_INT, 5)
OPTION(mon_subscribe_interval, OPT_DOUBLE, 300)
+OPTION(mon_osd_laggy_halflife, OPT_INT, 60*60) // (seconds) how quickly our laggy estimations decay
OPTION(mon_osd_auto_mark_in, OPT_BOOL, false) // mark any booting osds 'in'
OPTION(mon_osd_auto_mark_auto_out_in, OPT_BOOL, true) // mark booting auto-marked-out osds 'in'
OPTION(mon_osd_auto_mark_new_in, OPT_BOOL, true) // mark booting new osds 'in'
utime_t max_failed_since = fi.get_failed_since();
utime_t failed_for = now - max_failed_since;
+ double halflife = (double)g_conf->mon_osd_laggy_halflife;
+ double decay_k = ::log(.5) / halflife;
+
// scale grace period based on historical probability of 'lagginess'
// (false positive failures due to slowness).
const osd_xinfo_t& xi = osdmap.get_xinfo(target_osd);
- double my_grace = (double)xi.laggy_interval * ((double)xi.laggy_probability / (double)0xffffffffull);
+ double decay = exp((double)failed_for * decay_k);
+ dout(20) << " halflife " << halflife << " decay_k " << decay_k
+ << " failed_for " << failed_for << " decay " << decay << dendl;
+ double my_grace = decay * (double)xi.laggy_interval * ((double)xi.laggy_probability / (double)0xffffffffull);
utime_t grace = orig_grace;
grace += my_grace;
p != fi.reporters.end();
p++) {
const osd_xinfo_t& xi = osdmap.get_xinfo(p->first);
- peer_grace += (double)xi.laggy_interval * ((double)xi.laggy_probability / (double)0xffffffffull);
+ utime_t elapsed = now - xi.down_stamp;
+ double decay = exp((double)elapsed * decay_k);
+ peer_grace += decay * (double)xi.laggy_interval * ((double)xi.laggy_probability / (double)0xffffffffull);
}
peer_grace /= (double)fi.reporters.size();
grace += peer_grace;