From 865ddcac41069f7857a3066b3b04a5c62cc1db8c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 17 Sep 2015 21:48:30 -0400 Subject: [PATCH] osd: resend pending failure reports with a new mon session Signed-off-by: Sage Weil --- src/osd/OSD.cc | 21 +++++++++++++++++++-- src/osd/OSD.h | 4 ++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 8642179ef0383..b528914e39d72 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3776,7 +3776,7 @@ void OSD::handle_osd_ping(MOSDPing *m) } if (failure_pending.count(from)) { dout(10) << "handle_osd_ping canceling in-flight failure report for osd." << from<< dendl; - send_still_alive(curmap->get_epoch(), failure_pending[from]); + send_still_alive(curmap->get_epoch(), failure_pending[from].second); failure_pending.erase(from); } } @@ -4381,6 +4381,7 @@ void OSD::ms_handle_connect(Connection *con) // resend everything, it's a new session send_alive(); service.send_pg_temp(); + requeue_failures(); send_failures(); send_pg_stats(now); @@ -4719,6 +4720,22 @@ void OSD::got_full_map(epoch_t e) } } +void OSD::requeue_failures() +{ + assert(osd_lock.is_locked()); + Mutex::Locker l(heartbeat_lock); + unsigned old_queue = failure_queue.size(); + unsigned old_pending = failure_pending.size(); + for (map >::iterator p = + failure_pending.begin(); + p != failure_pending.end(); + ++p) { + failure_queue[p->first] = p->second.first; + } + dout(10) << __func__ << " " << old_queue << " + " << old_pending << " -> " + << failure_queue.size() << dendl; +} + void OSD::send_failures() { assert(osd_lock.is_locked()); @@ -4730,7 +4747,7 @@ void OSD::send_failures() entity_inst_t i = osdmap->get_inst(osd); monc->send_mon_message(new MOSDFailure(monc->get_fsid(), i, failed_for, osdmap->get_epoch())); - failure_pending[osd] = i; + failure_pending[osd] = make_pair(failure_queue.begin()->second, i); failure_queue.erase(osd); } } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 8aef344305edf..7ca705a85f717 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -2001,9 +2001,9 @@ protected: // -- failures -- map failure_queue; - map failure_pending; - + map > failure_pending; + void requeue_failures(); void send_failures(); void send_still_alive(epoch_t epoch, const entity_inst_t &i); -- 2.39.5