From 41f8343762fd60ceaf335f659f2e4d83f02a5921 Mon Sep 17 00:00:00 2001
From: Sridhar Seshasayee <sseshasa@redhat.com>
Date: Mon, 8 Jun 2020 20:58:43 +0530
Subject: [PATCH] mon/OSDMonitor: Reset grace period if failure interval
 exceeds a threshold.

Reset the grace hearbeat period if there have been no failures since the
set threshold value (48 Hrs). The mon_osd_laggy_halflife value is
leveraged to calculate the threshold.

A couple of helper functions do the following:
 - get_grace_interval_threshold():
    Calculates and returns the grace interval threshold value.
 - grace_interval_threshold_exceeded(int):
    Checks if grace interval threshold is exceeded based on the last
    down stamp.
 - set_default_laggy_params(int):
     Resets the laggy_probability and laggy_interval in the
     new_xinfo structure maintained within pending_inc to be applied
     eventually as part of update from paxos.

The threshold value is checked and the laggy parameters are reset at the
following point,
 - encode_pending() - If an existing osd is experiencing failure
   after an interval exceeding the failure threshold period.

Fixes: https://tracker.ceph.com/issues/45943
Signed-off-by: Sridhar Seshasayee <sseshasa@redhat.com>
(cherry picked from commit 9f1d4c1a9cddd942c9ea804dff8dc8068efc06b8)
---
 src/mon/OSDMonitor.cc | 44 ++++++++++++++++++++++++++++++++++++++++++-
 src/mon/OSDMonitor.h  |  4 ++++
 2 files changed, 47 insertions(+), 1 deletion(-)
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index 81052ccee2223..36f8976bcb870 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -1774,8 +1774,17 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t)
        i != pending_inc.new_state.end();
        ++i) {
     int s = i->second ? i->second : CEPH_OSD_UP;
-    if (s & CEPH_OSD_UP)
+    if (s & CEPH_OSD_UP) {
       dout(2) << " osd." << i->first << " DOWN" << dendl;
+      // Reset laggy parameters if failure interval exceeds a threshold.
+      const osd_xinfo_t& xi = osdmap.get_xinfo(i->first);
+      if ((xi.laggy_probability || xi.laggy_interval) && xi.down_stamp.sec()) {
+        int last_failure_interval = pending_inc.modified.sec() - xi.down_stamp.sec();
+        if (grace_interval_threshold_exceeded(last_failure_interval)) {
+          set_default_laggy_params(i->first);
+        }
+      }
+    }
     if (s & CEPH_OSD_EXISTS)
       dout(2) << " osd." << i->first << " DNE" << dendl;
   }
@@ -3117,6 +3126,39 @@ void OSDMonitor::take_all_failures(list<MonOpRequestRef>& ls)
   failure_info.clear();
 }
 
+int OSDMonitor::get_grace_interval_threshold()
+{
+  int halflife = g_conf()->mon_osd_laggy_halflife;
+  // Scale the halflife period (default: 1_hr) by
+  // a factor (48) to calculate the threshold.
+  int grace_threshold_factor = 48;
+  return halflife * grace_threshold_factor;
+}
+
+bool OSDMonitor::grace_interval_threshold_exceeded(int last_failed_interval)
+{
+  int grace_interval_threshold_secs = get_grace_interval_threshold();
+  if (last_failed_interval > grace_interval_threshold_secs) {
+    dout(1) << " last_failed_interval " << last_failed_interval
+            << " > grace_interval_threshold_secs " << grace_interval_threshold_secs
+            << dendl;
+    return true;
+  }
+  return false;
+}
+
+void OSDMonitor::set_default_laggy_params(int target_osd)
+{
+  if (pending_inc.new_xinfo.count(target_osd) == 0) {
+    pending_inc.new_xinfo[target_osd] = osdmap.osd_xinfo[target_osd];
+  }
+  osd_xinfo_t& xi = pending_inc.new_xinfo[target_osd];
+  xi.down_stamp = pending_inc.modified;
+  xi.laggy_probability = 0.0;
+  xi.laggy_interval = 0;
+  dout(20) << __func__ << " reset laggy, now xi " << xi << dendl;
+}
+
 
 // boot --
 
diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h
index 3ff948ad3c869..b40b3e7f7f212 100644
--- a/src/mon/OSDMonitor.h
+++ b/src/mon/OSDMonitor.h
@@ -653,6 +653,10 @@ protected:
 
   int32_t _allocate_osd_id(int32_t* existing_id);
 
+  int get_grace_interval_threshold();
+  bool grace_interval_threshold_exceeded(int last_failed);
+  void set_default_laggy_params(int target_osd);
+
 public:
   OSDMonitor(CephContext *cct, Monitor *mn, Paxos *p, const string& service_name);
 
-- 
2.39.5