From 37fbcb958f79bbfcba57c516b4862a14c52be398 Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Wed, 12 Feb 2014 11:30:15 -0800 Subject: [PATCH] OSD: disable the PGStatsAck timeout when we are reconnecting to a monitor Previously, the timeout counter started as soon as we issued the reopen, but if the reconnect process itself took a while, we might time out and issue another reopen just as we get to the point where it's possible to get work done. Since the mon client has its own reconnect timeouts (that is, the OSD doesn't need to trigger those), we instead disable our timeouts while the reconnect is happening, and then turn them back on again starting from when we get the reconnect callback. Signed-off-by: Greg Farnum Reviewed-by: Sage Weil (cherry picked from commit 64cedf6fa3ee309cc96554286bfb805e4ca89439) Conflicts: src/osd/OSD.cc --- src/osd/OSD.cc | 6 ++++-- src/osd/OSD.h | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 96b0b3390632..d354465aecab 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -938,6 +938,7 @@ OSD::OSD(int id, Messenger *internal_messenger, Messenger *external_messenger, debug_drop_pg_create_duration(g_conf->osd_debug_drop_pg_create_duration), debug_drop_pg_create_left(-1), outstanding_pg_stats(false), + timeout_mon_on_pg_stats(true), up_thru_wanted(0), up_thru_pending(0), pg_stat_queue_lock("OSD::pg_stat_queue_lock"), osd_stat_updated(false), @@ -3060,11 +3061,12 @@ void OSD::tick() // mon report? utime_t now = ceph_clock_now(g_ceph_context); - if (outstanding_pg_stats && + if (outstanding_pg_stats && timeout_mon_on_pg_stats && (now - g_conf->osd_mon_ack_timeout) > last_pg_stats_ack) { dout(1) << "mon hasn't acked PGStats in " << now - last_pg_stats_ack << " seconds, reconnecting elsewhere" << dendl; - monc->reopen_session(); + monc->reopen_session(new C_MonStatsAckTimer(this)); + timeout_mon_on_pg_stats = false; last_pg_stats_ack = ceph_clock_now(g_ceph_context); // reset clock last_pg_stats_sent = utime_t(); } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index c14636c5154c..a6c270d83d49 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1188,6 +1188,22 @@ protected: */ utime_t last_pg_stats_ack; bool outstanding_pg_stats; // some stat updates haven't been acked yet + bool timeout_mon_on_pg_stats; + void restart_stats_timer() { + Mutex::Locker l(osd_lock); + last_pg_stats_ack = ceph_clock_now(cct); + timeout_mon_on_pg_stats = true; + } + + class C_MonStatsAckTimer : public Context { + OSD *osd; + public: + C_MonStatsAckTimer(OSD *o) : osd(o) {} + void finish(int r) { + osd->restart_stats_timer(); + } + }; + friend class C_MonStatsAckTimer; void do_mon_report(); -- 2.47.3