mon: make MDSMonitor tolerant of slow mon elections

author John Spray <john.spray@redhat.com>

Wed, 21 Sep 2016 10:45:38 +0000 (11:45 +0100)

committer John Spray <john.spray@redhat.com>

Tue, 15 Nov 2016 23:09:25 +0000 (23:09 +0000)
author John Spray <john.spray@redhat.com>
Wed, 21 Sep 2016 10:45:38 +0000 (11:45 +0100)
committer John Spray <john.spray@redhat.com>
Tue, 15 Nov 2016 23:09:25 +0000 (23:09 +0000)
diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc

index 55ee61b1e44ed7fa958212ba12d97b07a95f842b..e0b8be5de1430defebdd96ae02970ce3e086b2e0 100644 (file)
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -2965,8 +2965,25 @@ void MDSMonitor::tick()
      do_propose |= maybe_expand_cluster(i.second);
    }
  
+  const auto now = ceph_clock_now(g_ceph_context);
+  if (last_tick.is_zero()) {
+    last_tick = now;
+  }
+
+  if (now - last_tick > (g_conf->mds_beacon_grace - g_conf->mds_beacon_interval)) {
+    // This case handles either local slowness (calls being delayed
+    // for whatever reason) or cluster election slowness (a long gap
+    // between calls while an election happened)
+    dout(4) << __func__ << ": resetting beacon timeouts due to mon delay "
+            "(slow election?) of " << now - last_tick << " seconds" << dendl;
+    for (auto &i : last_beacon) {
+      i.second.stamp = now;
+    }
+  }
+
+  last_tick = now;
+
    // check beacon timestamps
-  utime_t now = ceph_clock_now(g_ceph_context);
    utime_t cutoff = now;
    cutoff -= g_conf->mds_beacon_grace;
  
@@ -2974,7 +2991,7 @@ void MDSMonitor::tick()
    for (const auto &p : pending_fsmap.mds_roles) {
      auto &gid = p.first;
      if (last_beacon.count(gid) == 0) {
-      last_beacon[gid].stamp = ceph_clock_now(g_ceph_context);
+      last_beacon[gid].stamp = now;
        last_beacon[gid].seq = 0;
      }
    }
@@ -3056,3 +3073,10 @@ MDSMonitor::MDSMonitor(Monitor *mn, Paxos *p, string service_name)
          "mds rm_data_pool"));
  }
  
+void MDSMonitor::on_restart()
+{
+  // Clear out the leader-specific state.
+  last_tick = utime_t();
+  last_beacon.clear();
+}
+
diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h

index 776948cc63ec735b15d25b0f9e4dfd0d3dbbf96c..7b79f443be4c62753423e0d1dae2cb5d0a38e740 100644 (file)
--- a/src/mon/MDSMonitor.h
+++ b/src/mon/MDSMonitor.h
@@ -55,6 +55,7 @@ class MDSMonitor : public PaxosService {
    bool should_propose(double& delay);
  
    void on_active();
+  void on_restart();
  
    void check_subs();
    void check_sub(Subscription *sub);
@@ -153,6 +154,11 @@ class MDSMonitor : public PaxosService {
  
    int _check_pool(const int64_t pool_id, std::stringstream *ss) const;
    mds_gid_t gid_from_arg(const std::string& arg, std::ostream& err);
+
+  // When did the mon last call into our tick() method?  Used for detecting
+  // when the mon was not updating us for some period (e.g. during slow
+  // election) to reset last_beacon timeouts
+  utime_t last_tick;
  };
  
  #endif
author	John Spray <john.spray@redhat.com>
	Wed, 21 Sep 2016 10:45:38 +0000 (11:45 +0100)
committer	John Spray <john.spray@redhat.com>
	Tue, 15 Nov 2016 23:09:25 +0000 (23:09 +0000)
src/mon/MDSMonitor.cc		patch \| blob \| history
src/mon/MDSMonitor.h		patch \| blob \| history