From 37a758a14a4dbbb96fa89c569914b22fbf260025 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=E5=AE=8B=E9=A1=BA10180185?= Date: Mon, 16 Sep 2019 20:26:52 -0400 Subject: [PATCH] OSD: avoid failure peer info to resent MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit maybe_update_heartbeat_peers may remove one peer and never add in. if that peer is in failure_pending but that peer is not really has problem, failure_pending will hold that peer until it really down or add in again. once ms_handle_connect called, these pending failure will be resent again. 2019-09-12 09:44:47.080933 7f1fad781700 10 osd.13 6175 ms_handle_connect on mon 2019-09-12 09:44:47.080937 7f1fad781700 10 osd.13 6175 send_alive up_thru currently 6159 want 6155 2019-09-12 09:44:47.080945 7f1fad781700 10 osd.13 6175 requeue_failures 0 + 1 -> 1 Signed-off-by: 宋顺10180185 --- src/osd/OSD.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index d67002909e9ef..f2b928b9468c1 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -4482,6 +4482,16 @@ void OSD::maybe_update_heartbeat_peers() } dout(10) << "maybe_update_heartbeat_peers " << heartbeat_peers.size() << " peers, extras " << extras << dendl; + + // clean up stale failure pending + for (auto it = failure_pending.begin(); it != failure_pending.end();) { + if (heartbeat_peers.count(it->first) == 0) { + send_still_alive(osdmap->get_epoch(), it->first, it->second.second); + failure_pending.erase(it++); + } else { + it++; + } + } } void OSD::reset_heartbeat_peers(bool all) -- 2.39.5