]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
crimson/osd: heartbeat include first sent time as unresponsive
authorYingxin Cheng <yingxin.cheng@intel.com>
Thu, 26 Mar 2020 04:54:13 +0000 (12:54 +0800)
committerKefu Chai <kchai@redhat.com>
Mon, 15 Jun 2020 12:49:09 +0000 (20:49 +0800)
Signed-off-by: Yingxin Cheng <yingxin.cheng@intel.com>
src/crimson/osd/heartbeat.cc
src/crimson/osd/heartbeat.h

index f6d0d7b3b7a48ab789ce0da275c62da1a443b2b6..e91452c01db8a07aae5b612a7c4eaf8f32734bd5 100644 (file)
@@ -315,12 +315,22 @@ void Heartbeat::heartbeat_check()
     }
 
     if (peer_info.is_unhealthy(now)) {
-      logger().error(" heartbeat_check: no reply from osd.{} "
-                    "since back {} front {} (oldest deadline {})",
-                    osd, peer_info.last_rx_back, peer_info.last_rx_front,
-                    peer_info.ping_history.begin()->second.deadline);
-      failure_queue[osd] = std::min(peer_info.last_rx_back,
-                                   peer_info.last_rx_front);
+      auto oldest_deadline = peer_info.ping_history.begin()->second.deadline;
+      auto failed_since = std::min(peer_info.last_rx_back,
+                                   peer_info.last_rx_front);
+      if (clock::is_zero(failed_since)) {
+        logger().error("heartbeat_check: no reply from osd.{} "
+                       "ever on either front or back, first ping sent {} "
+                       "(oldest deadline {})",
+                       osd, peer_info.first_tx, oldest_deadline);
+        failed_since = peer_info.first_tx;
+      } else {
+        logger().error("heartbeat_check: no reply from osd.{} "
+                       "since back {} front {} (oldest deadline {})",
+                       osd, peer_info.last_rx_back, peer_info.last_rx_front,
+                       oldest_deadline);
+      }
+      failure_queue.emplace(osd, failed_since);
     }
   }
   if (!failure_queue.empty()) {
index f5e2ad596cf56ba07d8440f7eafe48d9145ea30b..2ab4970f59e7ad91529430d57b76005d1cfbf712 100644 (file)
@@ -111,6 +111,7 @@ private:
   peers_map_t peers;
   // osds which are considered failed
   // osd_id => when was the last time that both front and back pings were acked
+  //           or sent.
   //           use for calculating how long the OSD has been unresponsive
   using failure_queue_t = std::map<osd_id_t, clock::time_point>;
   seastar::future<> send_failures(failure_queue_t&& failure_queue);