From 9950062f90d35b20d195317e78d7ab51f851da4a Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 21 Sep 2016 11:45:38 +0100 Subject: [PATCH] mon: make MDSMonitor tolerant of slow mon elections Previously MDS daemons would get failed incorrectly when they appeared to have timed out due to delays in calling into MDSMonitor that were actually caused by e.g. slow leveldb writes leading to slow mon elections. Fixes: http://tracker.ceph.com/issues/17308 Signed-off-by: John Spray --- src/mon/MDSMonitor.cc | 28 ++++++++++++++++++++++++++-- src/mon/MDSMonitor.h | 6 ++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index 55ee61b1e44ed..e0b8be5de1430 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -2965,8 +2965,25 @@ void MDSMonitor::tick() do_propose |= maybe_expand_cluster(i.second); } + const auto now = ceph_clock_now(g_ceph_context); + if (last_tick.is_zero()) { + last_tick = now; + } + + if (now - last_tick > (g_conf->mds_beacon_grace - g_conf->mds_beacon_interval)) { + // This case handles either local slowness (calls being delayed + // for whatever reason) or cluster election slowness (a long gap + // between calls while an election happened) + dout(4) << __func__ << ": resetting beacon timeouts due to mon delay " + "(slow election?) of " << now - last_tick << " seconds" << dendl; + for (auto &i : last_beacon) { + i.second.stamp = now; + } + } + + last_tick = now; + // check beacon timestamps - utime_t now = ceph_clock_now(g_ceph_context); utime_t cutoff = now; cutoff -= g_conf->mds_beacon_grace; @@ -2974,7 +2991,7 @@ void MDSMonitor::tick() for (const auto &p : pending_fsmap.mds_roles) { auto &gid = p.first; if (last_beacon.count(gid) == 0) { - last_beacon[gid].stamp = ceph_clock_now(g_ceph_context); + last_beacon[gid].stamp = now; last_beacon[gid].seq = 0; } } @@ -3056,3 +3073,10 @@ MDSMonitor::MDSMonitor(Monitor *mn, Paxos *p, string service_name) "mds rm_data_pool")); } +void MDSMonitor::on_restart() +{ + // Clear out the leader-specific state. + last_tick = utime_t(); + last_beacon.clear(); +} + diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h index 776948cc63ec7..7b79f443be4c6 100644 --- a/src/mon/MDSMonitor.h +++ b/src/mon/MDSMonitor.h @@ -55,6 +55,7 @@ class MDSMonitor : public PaxosService { bool should_propose(double& delay); void on_active(); + void on_restart(); void check_subs(); void check_sub(Subscription *sub); @@ -153,6 +154,11 @@ class MDSMonitor : public PaxosService { int _check_pool(const int64_t pool_id, std::stringstream *ss) const; mds_gid_t gid_from_arg(const std::string& arg, std::ostream& err); + + // When did the mon last call into our tick() method? Used for detecting + // when the mon was not updating us for some period (e.g. during slow + // election) to reset last_beacon timeouts + utime_t last_tick; }; #endif -- 2.39.5