]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: kill client sessions when we're not in quorum
authorGreg Farnum <gregory.farnum@dreamhost.com>
Thu, 5 Jan 2012 22:03:43 +0000 (14:03 -0800)
committerGreg Farnum <gregory.farnum@dreamhost.com>
Thu, 5 Jan 2012 23:41:47 +0000 (15:41 -0800)
After a timeout of 2*mon_lease length (ie, two election rounds),
kill existing client sessions so they can reconnect to a
monitor that's (hopefully) remained in the quorum. Let any
new client sessions stick around for a mon_lease interval, then
do the same to them.

Signed-off-by: Greg Farnum <gregory.farnum@dreamhost.com>
src/mon/Monitor.cc
src/mon/Monitor.h
src/mon/Session.h

index 2f8ee63324b275fbe62644a2e803e4d69e1c639f..7e0e0e75793d4005bc2409016e491620b6d01177 100644 (file)
@@ -122,6 +122,8 @@ Monitor::Monitor(CephContext* cct_, string nm, MonitorStore *s, Messenger *m, Mo
   mon_caps = new MonCaps();
   mon_caps->set_allow_all(true);
   mon_caps->text = "allow *";
+
+  exited_quorum = ceph_clock_now(g_ceph_context);
 }
 
 Paxos *Monitor::add_paxos(int type)
@@ -352,6 +354,9 @@ void Monitor::reset()
 {
   dout(10) << "reset" << dendl;
   leader_since = utime_t();
+  if (!quorum.empty()) {
+    exited_quorum = ceph_clock_now(g_ceph_context);
+  }
   quorum.clear();
   outside_quorum.clear();
 
@@ -737,6 +742,7 @@ void Monitor::lose_election(epoch_t epoch, set<int> &q, int l)
 
 void Monitor::finish_election()
 {
+  exited_quorum = utime_t();
   finish_contexts(g_ceph_context, waitfor_quorum);
   resend_routed_requests();
   update_logger();
@@ -1605,6 +1611,21 @@ void Monitor::tick()
               << " (until " << s->until << " < now " << now << ")" << dendl;
       messenger->mark_down(s->inst.addr);
       remove_session(s);
+    } else if (!exited_quorum.is_zero()) {
+      if (s->time_established < exited_quorum) {
+        if (now > (exited_quorum + 2 * g_conf->mon_lease)) {
+          // boot the client Session because we've taken too long getting back in
+          dout(10) << " trimming session " << s->inst
+              << " because we've been out of quorum too long" << dendl;
+          messenger->mark_down(s->inst.addr);
+          remove_session(s);
+        }
+      } else if (s->time_established + g_conf->mon_lease < now) {
+        dout(10) << " trimming session " << s->inst
+                 << " because we're still not in quorum" << dendl;
+        messenger->mark_down(s->inst.addr);
+        remove_session(s);
+      }
     }
   }
 
index e215972103cc76b14d76149451cdbe01e178c55c..31ac4f5273f82f13e3e374afd2d56c43908ece54 100644 (file)
@@ -160,6 +160,7 @@ private:
   int leader;            // current leader (to best of knowledge)
   set<int> quorum;       // current active set of monitors (if !starting)
   utime_t leader_since;  // when this monitor became the leader, if it is the leader
+  utime_t exited_quorum; // time detected as not in quorum; 0 if in
 
   set<string> outside_quorum;
   entity_inst_t slurp_source;
index fc3a658c912e3c1aa1a131356ec45645f158bd98..0aa8a777fa8e1af0e4b5d9b95bc7f50aa9e8adb4 100644 (file)
@@ -40,6 +40,7 @@ struct MonSession : public RefCountedObject {
   Connection *con;
   entity_inst_t inst;
   utime_t until;
+  utime_t time_established;
   bool closed;
   xlist<MonSession*>::item item;
   set<uint64_t> routed_request_tids;
@@ -57,7 +58,9 @@ struct MonSession : public RefCountedObject {
   MonSession(entity_inst_t i, Connection *c) :
     con(c->get()), inst(i), closed(false), item(this),
     global_id(0), notified_global_id(0), auth_handler(NULL),
-    proxy_con(NULL), proxy_tid(0) {}
+    proxy_con(NULL), proxy_tid(0) {
+    time_established = ceph_clock_now(g_ceph_context);
+  }
   ~MonSession() {
     if (con)
       con->put();