From c0c5a6e7d09439b8bc23ad7ab83889ae5a921def Mon Sep 17 00:00:00 2001 From: Xinze Chi Date: Fri, 20 Nov 2015 20:59:16 +0800 Subject: [PATCH] mon: fix osd failure info in mon when the network adapter of node A run into error, osd in this node would tell mon other osd's heartbeat is timeout also. So when rebind fail after retry 3 times, the osd should cancel in-flight failure report send to mon before. Signed-off-by: Xinze Chi --- src/osd/OSD.cc | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index c2267efe7c210..5e2430f1a9342 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -6488,6 +6488,7 @@ void OSD::handle_osd_map(MOSDMap *m) bool do_shutdown = false; bool do_restart = false; + bool network_error = false; if (osdmap->get_epoch() > 0 && is_active()) { if (!osdmap->exists(whoami)) { @@ -6539,16 +6540,22 @@ void OSD::handle_osd_map(MOSDMap *m) avoid_ports.insert(hb_front_server_messenger->get_myaddr().get_port()); int r = cluster_messenger->rebind(avoid_ports); - if (r != 0) + if (r != 0) { do_shutdown = true; // FIXME: do_restart? + network_error = true; + } r = hb_back_server_messenger->rebind(avoid_ports); - if (r != 0) + if (r != 0) { do_shutdown = true; // FIXME: do_restart? + network_error = true; + } r = hb_front_server_messenger->rebind(avoid_ports); - if (r != 0) + if (r != 0) { do_shutdown = true; // FIXME: do_restart? + network_error = true; + } hbclient_messenger->mark_down_all(); @@ -6598,6 +6605,14 @@ void OSD::handle_osd_map(MOSDMap *m) else if (do_shutdown) { osd_lock.Unlock(); shutdown(); + if (network_error) { + map>::iterator it = failure_pending.begin(); + while (it != failure_pending.end()) { + dout(10) << "handle_osd_ping canceling in-flight failure report for osd." << it->first << dendl; + send_still_alive(osdmap->get_epoch(), it->second.second); + failure_pending.erase(it++); + } + } osd_lock.Lock(); } else if (is_preboot()) { @@ -6609,7 +6624,6 @@ void OSD::handle_osd_map(MOSDMap *m) else if (do_restart) start_boot(); - m->put(); } -- 2.39.5