From: Zengran Zhang Date: Tue, 26 Apr 2016 02:45:56 +0000 (-0400) Subject: mon/osdmonitor: decouple adjust_heartbeat_grace and min_down_reporters X-Git-Tag: v11.0.0~142^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7ac5ca5698cfdaab019bf282b537bcc897f9444c;p=ceph.git mon/osdmonitor: decouple adjust_heartbeat_grace and min_down_reporters if cancel mon_osd_adjust_heartbeat_grace, we must set mon_osd_min_down_reporters to zero. otherwise the next checking for reporters_by_subtree.size() >= g_conf->mon_osd_min_down_reporters will allways be fail.the two options should take effect respectively... Signed-off-by: Zengran Zhang --- diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 7ec912b6d1e7..bb3cc9982c92 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1717,9 +1717,10 @@ bool OSDMonitor::check_failure(utime_t now, int target_osd, failure_info_t& fi) utime_t grace = orig_grace; double my_grace = 0, peer_grace = 0; + double decay_k = 0; if (g_conf->mon_osd_adjust_heartbeat_grace) { double halflife = (double)g_conf->mon_osd_laggy_halflife; - double decay_k = ::log(.5) / halflife; + decay_k = ::log(.5) / halflife; // scale grace period based on historical probability of 'lagginess' // (false positive failures due to slowness). @@ -1729,31 +1730,35 @@ bool OSDMonitor::check_failure(utime_t now, int target_osd, failure_info_t& fi) << " failed_for " << failed_for << " decay " << decay << dendl; my_grace = decay * (double)xi.laggy_interval * xi.laggy_probability; grace += my_grace; + } - // consider the peers reporting a failure a proxy for a potential - // 'subcluster' over the overall cluster that is similarly - // laggy. this is clearly not true in all cases, but will sometimes - // help us localize the grace correction to a subset of the system - // (say, a rack with a bad switch) that is unhappy. - assert(fi.reporters.size()); - for (map::iterator p = fi.reporters.begin(); - p != fi.reporters.end(); - ++p) { - // get the parent bucket whose type matches with "reporter_subtree_level". - // fall back to OSD if the level doesn't exist. - map reporter_loc = osdmap.crush->get_full_location(p->first); - map::iterator iter = reporter_loc.find(reporter_subtree_level); - if (iter == reporter_loc.end()) { - reporters_by_subtree.insert("osd." + to_string(p->first)); - } else { - reporters_by_subtree.insert(iter->second); - } - + // consider the peers reporting a failure a proxy for a potential + // 'subcluster' over the overall cluster that is similarly + // laggy. this is clearly not true in all cases, but will sometimes + // help us localize the grace correction to a subset of the system + // (say, a rack with a bad switch) that is unhappy. + assert(fi.reporters.size()); + for (map::iterator p = fi.reporters.begin(); + p != fi.reporters.end(); + ++p) { + // get the parent bucket whose type matches with "reporter_subtree_level". + // fall back to OSD if the level doesn't exist. + map reporter_loc = osdmap.crush->get_full_location(p->first); + map::iterator iter = reporter_loc.find(reporter_subtree_level); + if (iter == reporter_loc.end()) { + reporters_by_subtree.insert("osd." + to_string(p->first)); + } else { + reporters_by_subtree.insert(iter->second); + } + if (g_conf->mon_osd_adjust_heartbeat_grace) { const osd_xinfo_t& xi = osdmap.get_xinfo(p->first); utime_t elapsed = now - xi.down_stamp; double decay = exp((double)elapsed * decay_k); peer_grace += decay * (double)xi.laggy_interval * xi.laggy_probability; } + } + + if (g_conf->mon_osd_adjust_heartbeat_grace) { peer_grace /= (double)fi.reporters.size(); grace += peer_grace; }