From 114c65fc0b04971fb93093c1e3fce6a71781351a Mon Sep 17 00:00:00 2001 From: xie xingguo Date: Fri, 16 Nov 2018 14:56:59 +0800 Subject: [PATCH] osd: fix heartbeat brain-split behaviour Yet another similar issue as 8d8e8a359c66b5767be6a4a2327c5f7097885464. To reproduce, construct a cluster with 3 hosts, each containing a single osd only: - cut off osd.1's cluster network, waiting osd.1 to be marked as down - cut off both osd.2 & osd.3's cluster network It is possible we'll get __two__ down osds (e.g., both osd.1 & osd.2 are down) now and then restore osd.1 and osd.2's cluster network won't change anything. The root cause is that by default we always call for at least 1/3 active heartbeat connections with all current __up__ osds to bring a previously dead (unhealthy) osd back to life. However, it is possible that the __up__ set could be the minority part that has been cut off from the rest of the cluster entirely and hence cause brain-split behaviour as demonstrated above. The simplest way to fix is to try to re-activate an unhealthy osd whenever we are still safe to do so. Also please keep in mind that frequent up-to-down transitions will kill off the osd process entirely, and that is why the ```osd_markdown_log``` related checking is needed here.. Signed-off-by: xie xingguo --- src/osd/OSD.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index e239e4961896..a68dcded2ada 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -5619,8 +5619,17 @@ bool OSD::_is_healthy() } if (is_waiting_for_healthy()) { + utime_t now = ceph_clock_now(); + utime_t grace = utime_t(cct->_conf->osd_max_markdown_period, 0); + while (!osd_markdown_log.empty() && + osd_markdown_log.front() + grace < now) + osd_markdown_log.pop_front(); + if (osd_markdown_log.size() <= 1) { + dout(5) << __func__ << " first time marked as down," + << " try reboot unconditionally" << dendl; + return true; + } std::lock_guard l(heartbeat_lock); - utime_t now = ceph_clock_now(); int num = 0, up = 0; for (map::iterator p = heartbeat_peers.begin(); p != heartbeat_peers.end(); -- 2.47.3