From cf16d37e2236c6c1b99cf1b4967ff4c1dc68058a Mon Sep 17 00:00:00 2001 From: Xuehan Xu Date: Sun, 8 Mar 2020 13:59:32 +0800 Subject: [PATCH] crimson/os/heartbeat: report peer failure to monitors Signed-off-by: Xuehan Xu --- src/crimson/osd/heartbeat.cc | 40 ++++++++++++++++++++++++++++++------ src/crimson/osd/heartbeat.h | 9 ++++---- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/src/crimson/osd/heartbeat.cc b/src/crimson/osd/heartbeat.cc index 0fb5eff7dcc93..dd289e777171c 100644 --- a/src/crimson/osd/heartbeat.cc +++ b/src/crimson/osd/heartbeat.cc @@ -9,6 +9,7 @@ #include "messages/MOSDFailure.h" #include "crimson/common/config_proxy.h" +#include "crimson/common/formatter.h" #include "crimson/net/Connection.h" #include "crimson/net/Messenger.h" #include "crimson/osd/shard_services.h" @@ -33,7 +34,10 @@ Heartbeat::Heartbeat(const crimson::osd::ShardServices& service, front_msgr{front_msgr}, back_msgr{back_msgr}, // do this in background - timer{[this] { (void)send_heartbeats(); }} + timer{[this] { + heartbeat_check(); + (void)send_heartbeats(); + }} {} seastar::future<> Heartbeat::start(entity_addrvec_t front_addrs, @@ -294,7 +298,6 @@ seastar::future<> Heartbeat::handle_reply(crimson::net::Connection* conn, } if (peer.is_healthy(now)) { // cancel false reports - failure_queue.erase(from); if (auto pending = failure_pending.find(from); pending != failure_pending.end()) { return send_still_alive(from, pending->second.addrs); @@ -309,6 +312,34 @@ seastar::future<> Heartbeat::handle_you_died() return seastar::now(); } +void Heartbeat::heartbeat_check() +{ + failure_queue_t failure_queue; + const auto now = clock::now(); + for (const auto& [osd, peer_info]: peers) { + if (clock::is_zero(peer_info.first_tx)) { + continue; + } + + if (peer_info.is_unhealthy(now)) { + logger().error(" heartbeat_check: no reply from osd.{} " + "since back {} front {} (oldest deadline {})", + osd, peer_info.last_rx_back, peer_info.last_rx_front, + peer_info.ping_history.begin()->second.deadline); + failure_queue[osd] = std::min(peer_info.last_rx_back, + peer_info.last_rx_front); + } + } + if (!failure_queue.empty()) { + // send_failures can run in background, because messages + // are sent in order, if later checks find out the previous + // "failed" peers to be healthy, that "still alive" messages + // would be sent after the previous "osd failure" messages + // which is totally safe. + (void)send_failures(std::move(failure_queue)); + } +} + seastar::future<> Heartbeat::send_heartbeats() { using peers_item_t = typename peers_map_t::value_type; @@ -353,7 +384,7 @@ seastar::future<> Heartbeat::send_heartbeats() }); } -seastar::future<> Heartbeat::send_failures() +seastar::future<> Heartbeat::send_failures(failure_queue_t&& failure_queue) { using failure_item_t = typename failure_queue_t::value_type; return seastar::parallel_for_each(failure_queue, @@ -374,9 +405,6 @@ seastar::future<> Heartbeat::send_failures() failure_pending.emplace(osd, failure_info_t{failed_since, osdmap->get_addrs(osd)}); return monc.send_message(failure_report); - }).then([this] { - failure_queue.clear(); - return seastar::now(); }); } diff --git a/src/crimson/osd/heartbeat.h b/src/crimson/osd/heartbeat.h index 036299f389902..c51e81de67b07 100644 --- a/src/crimson/osd/heartbeat.h +++ b/src/crimson/osd/heartbeat.h @@ -38,9 +38,6 @@ public: seastar::future<> update_peers(int whoami); seastar::future<> remove_peer(osd_id_t peer); - seastar::future<> send_heartbeats(); - seastar::future<> send_failures(); - const entity_addrvec_t& get_front_addrs() const; const entity_addrvec_t& get_back_addrs() const; @@ -109,12 +106,14 @@ private: }; using peers_map_t = std::map; peers_map_t peers; - // osds which are considered failed // osd_id => when was the last time that both front and back pings were acked // use for calculating how long the OSD has been unresponsive using failure_queue_t = std::map; - failure_queue_t failure_queue; + seastar::future<> send_failures(failure_queue_t&& failure_queue); + seastar::future<> send_heartbeats(); + void heartbeat_check(); + struct failure_info_t { clock::time_point failed_since; entity_addrvec_t addrs; -- 2.39.5