From cf16d37e2236c6c1b99cf1b4967ff4c1dc68058a Mon Sep 17 00:00:00 2001
From: Xuehan Xu <xxhdx1985126@163.com>
Date: Sun, 8 Mar 2020 13:59:32 +0800
Subject: [PATCH] crimson/os/heartbeat: report peer failure to monitors

Signed-off-by: Xuehan Xu <xxhdx1985126@163.com>
---
 src/crimson/osd/heartbeat.cc | 40 ++++++++++++++++++++++++++++++------
 src/crimson/osd/heartbeat.h  |  9 ++++----
 2 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/src/crimson/osd/heartbeat.cc b/src/crimson/osd/heartbeat.cc
index 0fb5eff7dcc93..dd289e777171c 100644
--- a/src/crimson/osd/heartbeat.cc
+++ b/src/crimson/osd/heartbeat.cc
@@ -9,6 +9,7 @@
 #include "messages/MOSDFailure.h"
 
 #include "crimson/common/config_proxy.h"
+#include "crimson/common/formatter.h"
 #include "crimson/net/Connection.h"
 #include "crimson/net/Messenger.h"
 #include "crimson/osd/shard_services.h"
@@ -33,7 +34,10 @@ Heartbeat::Heartbeat(const crimson::osd::ShardServices& service,
     front_msgr{front_msgr},
     back_msgr{back_msgr},
     // do this in background
-    timer{[this] { (void)send_heartbeats(); }}
+    timer{[this] {
+      heartbeat_check();
+      (void)send_heartbeats();
+    }}
 {}
 
 seastar::future<> Heartbeat::start(entity_addrvec_t front_addrs,
@@ -294,7 +298,6 @@ seastar::future<> Heartbeat::handle_reply(crimson::net::Connection* conn,
   }
   if (peer.is_healthy(now)) {
     // cancel false reports
-    failure_queue.erase(from);
     if (auto pending = failure_pending.find(from);
         pending != failure_pending.end()) {
       return send_still_alive(from, pending->second.addrs);
@@ -309,6 +312,34 @@ seastar::future<> Heartbeat::handle_you_died()
   return seastar::now();
 }
 
+void Heartbeat::heartbeat_check()
+{
+  failure_queue_t failure_queue;
+  const auto now = clock::now();
+  for (const auto& [osd, peer_info]: peers) {
+    if (clock::is_zero(peer_info.first_tx)) {
+      continue;
+    }
+
+    if (peer_info.is_unhealthy(now)) {
+      logger().error(" heartbeat_check: no reply from osd.{} "
+		     "since back {} front {} (oldest deadline {})",
+		     osd, peer_info.last_rx_back, peer_info.last_rx_front,
+		     peer_info.ping_history.begin()->second.deadline);
+      failure_queue[osd] = std::min(peer_info.last_rx_back,
+				    peer_info.last_rx_front);
+    }
+  }
+  if (!failure_queue.empty()) {
+    // send_failures can run in background, because messages
+    // are sent in order, if later checks find out the previous
+    // "failed" peers to be healthy, that "still alive" messages
+    // would be sent after the previous "osd failure" messages
+    // which is totally safe.
+    (void)send_failures(std::move(failure_queue));
+  }
+}
+
 seastar::future<> Heartbeat::send_heartbeats()
 {
   using peers_item_t = typename peers_map_t::value_type;
@@ -353,7 +384,7 @@ seastar::future<> Heartbeat::send_heartbeats()
     });
 }
 
-seastar::future<> Heartbeat::send_failures()
+seastar::future<> Heartbeat::send_failures(failure_queue_t&& failure_queue)
 {
   using failure_item_t = typename failure_queue_t::value_type;
   return seastar::parallel_for_each(failure_queue,
@@ -374,9 +405,6 @@ seastar::future<> Heartbeat::send_failures()
       failure_pending.emplace(osd, failure_info_t{failed_since,
                                                   osdmap->get_addrs(osd)});
       return monc.send_message(failure_report);
-    }).then([this] {
-      failure_queue.clear();
-      return seastar::now();
     });
 }
 
diff --git a/src/crimson/osd/heartbeat.h b/src/crimson/osd/heartbeat.h
index 036299f389902..c51e81de67b07 100644
--- a/src/crimson/osd/heartbeat.h
+++ b/src/crimson/osd/heartbeat.h
@@ -38,9 +38,6 @@ public:
   seastar::future<> update_peers(int whoami);
   seastar::future<> remove_peer(osd_id_t peer);
 
-  seastar::future<> send_heartbeats();
-  seastar::future<> send_failures();
-
   const entity_addrvec_t& get_front_addrs() const;
   const entity_addrvec_t& get_back_addrs() const;
 
@@ -109,12 +106,14 @@ private:
   };
   using peers_map_t = std::map<osd_id_t, PeerInfo>;
   peers_map_t peers;
-
   // osds which are considered failed
   // osd_id => when was the last time that both front and back pings were acked
   //           use for calculating how long the OSD has been unresponsive
   using failure_queue_t = std::map<osd_id_t, clock::time_point>;
-  failure_queue_t failure_queue;
+  seastar::future<> send_failures(failure_queue_t&& failure_queue);
+  seastar::future<> send_heartbeats();
+  void heartbeat_check();
+
   struct failure_info_t {
     clock::time_point failed_since;
     entity_addrvec_t addrs;
-- 
2.39.5