}
if (failure_pending.count(from)) {
dout(10) << "handle_osd_ping canceling in-flight failure report for osd." << from<< dendl;
- send_still_alive(curmap->get_epoch(), failure_pending[from]);
+ send_still_alive(curmap->get_epoch(), failure_pending[from].second);
failure_pending.erase(from);
}
}
// resend everything, it's a new session
send_alive();
service.send_pg_temp();
+ requeue_failures();
send_failures();
send_pg_stats(now);
}
}
+void OSD::requeue_failures()
+{
+ assert(osd_lock.is_locked());
+ Mutex::Locker l(heartbeat_lock);
+ unsigned old_queue = failure_queue.size();
+ unsigned old_pending = failure_pending.size();
+ for (map<int,pair<utime_t,entity_inst_t> >::iterator p =
+ failure_pending.begin();
+ p != failure_pending.end();
+ ++p) {
+ failure_queue[p->first] = p->second.first;
+ }
+ dout(10) << __func__ << " " << old_queue << " + " << old_pending << " -> "
+ << failure_queue.size() << dendl;
+}
+
void OSD::send_failures()
{
assert(osd_lock.is_locked());
entity_inst_t i = osdmap->get_inst(osd);
monc->send_mon_message(new MOSDFailure(monc->get_fsid(), i, failed_for,
osdmap->get_epoch()));
- failure_pending[osd] = i;
+ failure_pending[osd] = make_pair(failure_queue.begin()->second, i);
failure_queue.erase(osd);
}
}
// -- failures --
map<int,utime_t> failure_queue;
- map<int,entity_inst_t> failure_pending;
-
+ map<int,pair<utime_t,entity_inst_t> > failure_pending;
+ void requeue_failures();
void send_failures();
void send_still_alive(epoch_t epoch, const entity_inst_t &i);