From 81a257b6ff2d7f0eb8c7f9b9510bc49ba3a92359 Mon Sep 17 00:00:00 2001 From: Yingxin Cheng Date: Thu, 26 Mar 2020 12:54:13 +0800 Subject: [PATCH] crimson/osd: heartbeat include first sent time as unresponsive Signed-off-by: Yingxin Cheng --- src/crimson/osd/heartbeat.cc | 22 ++++++++++++++++------ src/crimson/osd/heartbeat.h | 1 + 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/crimson/osd/heartbeat.cc b/src/crimson/osd/heartbeat.cc index f6d0d7b3b7a..e91452c01db 100644 --- a/src/crimson/osd/heartbeat.cc +++ b/src/crimson/osd/heartbeat.cc @@ -315,12 +315,22 @@ void Heartbeat::heartbeat_check() } if (peer_info.is_unhealthy(now)) { - logger().error(" heartbeat_check: no reply from osd.{} " - "since back {} front {} (oldest deadline {})", - osd, peer_info.last_rx_back, peer_info.last_rx_front, - peer_info.ping_history.begin()->second.deadline); - failure_queue[osd] = std::min(peer_info.last_rx_back, - peer_info.last_rx_front); + auto oldest_deadline = peer_info.ping_history.begin()->second.deadline; + auto failed_since = std::min(peer_info.last_rx_back, + peer_info.last_rx_front); + if (clock::is_zero(failed_since)) { + logger().error("heartbeat_check: no reply from osd.{} " + "ever on either front or back, first ping sent {} " + "(oldest deadline {})", + osd, peer_info.first_tx, oldest_deadline); + failed_since = peer_info.first_tx; + } else { + logger().error("heartbeat_check: no reply from osd.{} " + "since back {} front {} (oldest deadline {})", + osd, peer_info.last_rx_back, peer_info.last_rx_front, + oldest_deadline); + } + failure_queue.emplace(osd, failed_since); } } if (!failure_queue.empty()) { diff --git a/src/crimson/osd/heartbeat.h b/src/crimson/osd/heartbeat.h index f5e2ad596cf..2ab4970f59e 100644 --- a/src/crimson/osd/heartbeat.h +++ b/src/crimson/osd/heartbeat.h @@ -111,6 +111,7 @@ private: peers_map_t peers; // osds which are considered failed // osd_id => when was the last time that both front and back pings were acked + // or sent. // use for calculating how long the OSD has been unresponsive using failure_queue_t = std::map; seastar::future<> send_failures(failure_queue_t&& failure_queue); -- 2.39.5