From 2ad62d5256aba54bc290fd0822c3ea159c365819 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@inktank.com>
Date: Tue, 4 Sep 2012 16:55:08 -0700
Subject: [PATCH] mon: decay laggy calculations over time

Add a configurable halflife for the laggy probability and duration and
apply it at the time those values are used to adjust the heartbeat grace
period.  Both are multiplied together, so it doesn't matter which you
think is being decayed (the probability or the interval).

Default to an hour.

Signed-off-by: Sage Weil <sage@inktank.com>
---
 src/common/config_opts.h |  1 +
 src/mon/OSDMonitor.cc    | 12 ++++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/common/config_opts.h b/src/common/config_opts.h
index f2a0089da725d..cd7da7e88ccec 100644
--- a/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@ -104,6 +104,7 @@ OPTION(mon_initial_members, OPT_STR, "")    // list of initial cluster mon ids;
 OPTION(mon_sync_fs_threshold, OPT_INT, 5)   // sync() when writing this many objects; 0 to disable.
 OPTION(mon_tick_interval, OPT_INT, 5)
 OPTION(mon_subscribe_interval, OPT_DOUBLE, 300)
+OPTION(mon_osd_laggy_halflife, OPT_INT, 60*60)        // (seconds) how quickly our laggy estimations decay
 OPTION(mon_osd_auto_mark_in, OPT_BOOL, false)         // mark any booting osds 'in'
 OPTION(mon_osd_auto_mark_auto_out_in, OPT_BOOL, true) // mark booting auto-marked-out osds 'in'
 OPTION(mon_osd_auto_mark_new_in, OPT_BOOL, true)      // mark booting new osds 'in'
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index a21e691a8f3e2..e5b4f323b73e2 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -688,10 +688,16 @@ bool OSDMonitor::check_failure(utime_t now, int target_osd, failure_info_t& fi)
   utime_t max_failed_since = fi.get_failed_since();
   utime_t failed_for = now - max_failed_since;
 
+  double halflife = (double)g_conf->mon_osd_laggy_halflife;
+  double decay_k = ::log(.5) / halflife;
+
   // scale grace period based on historical probability of 'lagginess'
   // (false positive failures due to slowness).
   const osd_xinfo_t& xi = osdmap.get_xinfo(target_osd);
-  double my_grace = (double)xi.laggy_interval * ((double)xi.laggy_probability / (double)0xffffffffull);
+  double decay = exp((double)failed_for * decay_k);
+  dout(20) << " halflife " << halflife << " decay_k " << decay_k
+	   << " failed_for " << failed_for << " decay " << decay << dendl;
+  double my_grace = decay * (double)xi.laggy_interval * ((double)xi.laggy_probability / (double)0xffffffffull);
   utime_t grace = orig_grace;
   grace += my_grace;
 
@@ -706,7 +712,9 @@ bool OSDMonitor::check_failure(utime_t now, int target_osd, failure_info_t& fi)
        p != fi.reporters.end();
        p++) {
     const osd_xinfo_t& xi = osdmap.get_xinfo(p->first);
-    peer_grace += (double)xi.laggy_interval * ((double)xi.laggy_probability / (double)0xffffffffull);
+    utime_t elapsed = now - xi.down_stamp;
+    double decay = exp((double)elapsed * decay_k);
+    peer_grace += decay * (double)xi.laggy_interval * ((double)xi.laggy_probability / (double)0xffffffffull);
   }
   peer_grace /= (double)fi.reporters.size();
   grace += peer_grace;
-- 
2.39.5