From 41f8343762fd60ceaf335f659f2e4d83f02a5921 Mon Sep 17 00:00:00 2001 From: Sridhar Seshasayee Date: Mon, 8 Jun 2020 20:58:43 +0530 Subject: [PATCH] mon/OSDMonitor: Reset grace period if failure interval exceeds a threshold. Reset the grace hearbeat period if there have been no failures since the set threshold value (48 Hrs). The mon_osd_laggy_halflife value is leveraged to calculate the threshold. A couple of helper functions do the following: - get_grace_interval_threshold(): Calculates and returns the grace interval threshold value. - grace_interval_threshold_exceeded(int): Checks if grace interval threshold is exceeded based on the last down stamp. - set_default_laggy_params(int): Resets the laggy_probability and laggy_interval in the new_xinfo structure maintained within pending_inc to be applied eventually as part of update from paxos. The threshold value is checked and the laggy parameters are reset at the following point, - encode_pending() - If an existing osd is experiencing failure after an interval exceeding the failure threshold period. Fixes: https://tracker.ceph.com/issues/45943 Signed-off-by: Sridhar Seshasayee (cherry picked from commit 9f1d4c1a9cddd942c9ea804dff8dc8068efc06b8) --- src/mon/OSDMonitor.cc | 44 ++++++++++++++++++++++++++++++++++++++++++- src/mon/OSDMonitor.h | 4 ++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 81052ccee2223..36f8976bcb870 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1774,8 +1774,17 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t) i != pending_inc.new_state.end(); ++i) { int s = i->second ? i->second : CEPH_OSD_UP; - if (s & CEPH_OSD_UP) + if (s & CEPH_OSD_UP) { dout(2) << " osd." << i->first << " DOWN" << dendl; + // Reset laggy parameters if failure interval exceeds a threshold. + const osd_xinfo_t& xi = osdmap.get_xinfo(i->first); + if ((xi.laggy_probability || xi.laggy_interval) && xi.down_stamp.sec()) { + int last_failure_interval = pending_inc.modified.sec() - xi.down_stamp.sec(); + if (grace_interval_threshold_exceeded(last_failure_interval)) { + set_default_laggy_params(i->first); + } + } + } if (s & CEPH_OSD_EXISTS) dout(2) << " osd." << i->first << " DNE" << dendl; } @@ -3117,6 +3126,39 @@ void OSDMonitor::take_all_failures(list& ls) failure_info.clear(); } +int OSDMonitor::get_grace_interval_threshold() +{ + int halflife = g_conf()->mon_osd_laggy_halflife; + // Scale the halflife period (default: 1_hr) by + // a factor (48) to calculate the threshold. + int grace_threshold_factor = 48; + return halflife * grace_threshold_factor; +} + +bool OSDMonitor::grace_interval_threshold_exceeded(int last_failed_interval) +{ + int grace_interval_threshold_secs = get_grace_interval_threshold(); + if (last_failed_interval > grace_interval_threshold_secs) { + dout(1) << " last_failed_interval " << last_failed_interval + << " > grace_interval_threshold_secs " << grace_interval_threshold_secs + << dendl; + return true; + } + return false; +} + +void OSDMonitor::set_default_laggy_params(int target_osd) +{ + if (pending_inc.new_xinfo.count(target_osd) == 0) { + pending_inc.new_xinfo[target_osd] = osdmap.osd_xinfo[target_osd]; + } + osd_xinfo_t& xi = pending_inc.new_xinfo[target_osd]; + xi.down_stamp = pending_inc.modified; + xi.laggy_probability = 0.0; + xi.laggy_interval = 0; + dout(20) << __func__ << " reset laggy, now xi " << xi << dendl; +} + // boot -- diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 3ff948ad3c869..b40b3e7f7f212 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -653,6 +653,10 @@ protected: int32_t _allocate_osd_id(int32_t* existing_id); + int get_grace_interval_threshold(); + bool grace_interval_threshold_exceeded(int last_failed); + void set_default_laggy_params(int target_osd); + public: OSDMonitor(CephContext *cct, Monitor *mn, Paxos *p, const string& service_name); -- 2.39.5