From ef4e4c8287077e33f17c3b4f3848d1db395126d2 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 14 Nov 2012 17:00:57 -0800 Subject: [PATCH] mon: calculate failed_since relative to message receive time Instead of looking at the current time we process the message, look at the receive time. This gives us a more real failure time given that messages may be requeued. It doesn't solve the problem when messages are forwarded between monitors due to an election, but that's ok; this is still a net improvement. Signed-off-by: Sage Weil --- src/mon/OSDMonitor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 21ae96806e41c..bad579048f24b 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -768,7 +768,7 @@ bool OSDMonitor::prepare_failure(MOSDFailure *m) // calculate failure time utime_t now = ceph_clock_now(g_ceph_context); - utime_t failed_since = now - utime_t(m->failed_for ? m->failed_for : g_conf->osd_heartbeat_grace, 0); + utime_t failed_since = m->get_recv_stamp() - utime_t(m->failed_for ? m->failed_for : g_conf->osd_heartbeat_grace, 0); if (m->if_osd_failed()) { // add a report -- 2.39.5