mon/osdmonitor: decouple adjust_heartbeat_grace and min_down_reporters

author Zengran Zhang <zhangzengran@h3c.com>

Tue, 26 Apr 2016 02:45:56 +0000 (22:45 -0400)

committer Zengran Zhang <zhangzengran@h3c.com>

Tue, 26 Apr 2016 02:45:56 +0000 (22:45 -0400)
author Zengran Zhang <zhangzengran@h3c.com>
Tue, 26 Apr 2016 02:45:56 +0000 (22:45 -0400)
committer Zengran Zhang <zhangzengran@h3c.com>
Tue, 26 Apr 2016 02:45:56 +0000 (22:45 -0400)
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc

index 7ec912b6d1e782b949fb680a73117ecb2de4730a..bb3cc9982c92071b1f78b4e3b58434ffc9b617a4 100644 (file)
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -1717,9 +1717,10 @@ bool OSDMonitor::check_failure(utime_t now, int target_osd, failure_info_t& fi)
  
    utime_t grace = orig_grace;
    double my_grace = 0, peer_grace = 0;
+  double decay_k = 0;
    if (g_conf->mon_osd_adjust_heartbeat_grace) {
      double halflife = (double)g_conf->mon_osd_laggy_halflife;
-    double decay_k = ::log(.5) / halflife;
+    decay_k = ::log(.5) / halflife;
  
      // scale grace period based on historical probability of 'lagginess'
      // (false positive failures due to slowness).
@@ -1729,31 +1730,35 @@ bool OSDMonitor::check_failure(utime_t now, int target_osd, failure_info_t& fi)
              << " failed_for " << failed_for << " decay " << decay << dendl;
      my_grace = decay * (double)xi.laggy_interval * xi.laggy_probability;
      grace += my_grace;
+  }
  
-    // consider the peers reporting a failure a proxy for a potential
-    // 'subcluster' over the overall cluster that is similarly
-    // laggy.  this is clearly not true in all cases, but will sometimes
-    // help us localize the grace correction to a subset of the system
-    // (say, a rack with a bad switch) that is unhappy.
-    assert(fi.reporters.size());
-    for (map<int,failure_reporter_t>::iterator p = fi.reporters.begin();
-        p != fi.reporters.end();
-        ++p) {
-      // get the parent bucket whose type matches with "reporter_subtree_level".
-      // fall back to OSD if the level doesn't exist.
-      map<string, string> reporter_loc = osdmap.crush->get_full_location(p->first);
-      map<string, string>::iterator iter = reporter_loc.find(reporter_subtree_level);
-      if (iter == reporter_loc.end()) {
-       reporters_by_subtree.insert("osd." + to_string(p->first));
-      } else {
-       reporters_by_subtree.insert(iter->second);
-      }
-
+  // consider the peers reporting a failure a proxy for a potential
+  // 'subcluster' over the overall cluster that is similarly
+  // laggy.  this is clearly not true in all cases, but will sometimes
+  // help us localize the grace correction to a subset of the system
+  // (say, a rack with a bad switch) that is unhappy.
+  assert(fi.reporters.size());
+  for (map<int,failure_reporter_t>::iterator p = fi.reporters.begin();
+       p != fi.reporters.end();
+       ++p) {
+    // get the parent bucket whose type matches with "reporter_subtree_level".
+    // fall back to OSD if the level doesn't exist.
+    map<string, string> reporter_loc = osdmap.crush->get_full_location(p->first);
+    map<string, string>::iterator iter = reporter_loc.find(reporter_subtree_level);
+    if (iter == reporter_loc.end()) {
+      reporters_by_subtree.insert("osd." + to_string(p->first));
+    } else {
+      reporters_by_subtree.insert(iter->second);
+    }
+    if (g_conf->mon_osd_adjust_heartbeat_grace) {
        const osd_xinfo_t& xi = osdmap.get_xinfo(p->first);
        utime_t elapsed = now - xi.down_stamp;
        double decay = exp((double)elapsed * decay_k);
        peer_grace += decay * (double)xi.laggy_interval * xi.laggy_probability;
      }
+  }
+  
+  if (g_conf->mon_osd_adjust_heartbeat_grace) {
      peer_grace /= (double)fi.reporters.size();
      grace += peer_grace;
    }
author	Zengran Zhang <zhangzengran@h3c.com>
	Tue, 26 Apr 2016 02:45:56 +0000 (22:45 -0400)
committer	Zengran Zhang <zhangzengran@h3c.com>
	Tue, 26 Apr 2016 02:45:56 +0000 (22:45 -0400)