From 64cedf6fa3ee309cc96554286bfb805e4ca89439 Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Wed, 12 Feb 2014 11:30:15 -0800 Subject: [PATCH] OSD: disable the PGStatsAck timeout when we are reconnecting to a monitor Previously, the timeout counter started as soon as we issued the reopen, but if the reconnect process itself took a while, we might time out and issue another reopen just as we get to the point where it's possible to get work done. Since the mon client has its own reconnect timeouts (that is, the OSD doesn't need to trigger those), we instead disable our timeouts while the reconnect is happening, and then turn them back on again starting from when we get the reconnect callback. Signed-off-by: Greg Farnum --- src/osd/OSD.cc | 6 ++++-- src/osd/OSD.h | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 0ba79a9b61675..911c0946c79c1 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -828,6 +828,7 @@ OSD::OSD(CephContext *cct_, ObjectStore *store_, debug_drop_pg_create_duration(cct->_conf->osd_debug_drop_pg_create_duration), debug_drop_pg_create_left(-1), outstanding_pg_stats(false), + timeout_mon_on_pg_stats(true), up_thru_wanted(0), up_thru_pending(0), pg_stat_queue_lock("OSD::pg_stat_queue_lock"), osd_stat_updated(false), @@ -3015,11 +3016,12 @@ void OSD::tick() // mon report? utime_t now = ceph_clock_now(cct); - if (outstanding_pg_stats && + if (outstanding_pg_stats && timeout_mon_on_pg_stats && (now - cct->_conf->osd_mon_ack_timeout) > last_pg_stats_ack) { dout(1) << "mon hasn't acked PGStats in " << now - last_pg_stats_ack << " seconds, reconnecting elsewhere" << dendl; - monc->reopen_session(); + monc->reopen_session(new C_MonStatsAckTimer(this)); + timeout_mon_on_pg_stats = false; last_pg_stats_ack = ceph_clock_now(cct); // reset clock last_pg_stats_sent = utime_t(); } diff --git a/src/osd/OSD.h b/src/osd/OSD.h index cebceb7150e95..e1c0242a34076 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1251,6 +1251,22 @@ protected: */ utime_t last_pg_stats_ack; bool outstanding_pg_stats; // some stat updates haven't been acked yet + bool timeout_mon_on_pg_stats; + void restart_stats_timer() { + Mutex::Locker l(osd_lock); + last_pg_stats_ack = ceph_clock_now(cct); + timeout_mon_on_pg_stats = true; + } + + class C_MonStatsAckTimer : public Context { + OSD *osd; + public: + C_MonStatsAckTimer(OSD *o) : osd(o) {} + void finish(int r) { + osd->restart_stats_timer(); + } + }; + friend class C_MonStatsAckTimer; void do_mon_report(); -- 2.47.3