From: Greg Farnum Date: Thu, 5 Jan 2012 22:03:43 +0000 (-0800) Subject: mon: kill client sessions when we're not in quorum X-Git-Tag: v0.40~60^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=99e5f85e97fea6d6421835afabf5a026aaac1cf2;p=ceph.git mon: kill client sessions when we're not in quorum After a timeout of 2*mon_lease length (ie, two election rounds), kill existing client sessions so they can reconnect to a monitor that's (hopefully) remained in the quorum. Let any new client sessions stick around for a mon_lease interval, then do the same to them. Signed-off-by: Greg Farnum --- diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 2f8ee63324b2..7e0e0e75793d 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -122,6 +122,8 @@ Monitor::Monitor(CephContext* cct_, string nm, MonitorStore *s, Messenger *m, Mo mon_caps = new MonCaps(); mon_caps->set_allow_all(true); mon_caps->text = "allow *"; + + exited_quorum = ceph_clock_now(g_ceph_context); } Paxos *Monitor::add_paxos(int type) @@ -352,6 +354,9 @@ void Monitor::reset() { dout(10) << "reset" << dendl; leader_since = utime_t(); + if (!quorum.empty()) { + exited_quorum = ceph_clock_now(g_ceph_context); + } quorum.clear(); outside_quorum.clear(); @@ -737,6 +742,7 @@ void Monitor::lose_election(epoch_t epoch, set &q, int l) void Monitor::finish_election() { + exited_quorum = utime_t(); finish_contexts(g_ceph_context, waitfor_quorum); resend_routed_requests(); update_logger(); @@ -1605,6 +1611,21 @@ void Monitor::tick() << " (until " << s->until << " < now " << now << ")" << dendl; messenger->mark_down(s->inst.addr); remove_session(s); + } else if (!exited_quorum.is_zero()) { + if (s->time_established < exited_quorum) { + if (now > (exited_quorum + 2 * g_conf->mon_lease)) { + // boot the client Session because we've taken too long getting back in + dout(10) << " trimming session " << s->inst + << " because we've been out of quorum too long" << dendl; + messenger->mark_down(s->inst.addr); + remove_session(s); + } + } else if (s->time_established + g_conf->mon_lease < now) { + dout(10) << " trimming session " << s->inst + << " because we're still not in quorum" << dendl; + messenger->mark_down(s->inst.addr); + remove_session(s); + } } } diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index e215972103cc..31ac4f5273f8 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -160,6 +160,7 @@ private: int leader; // current leader (to best of knowledge) set quorum; // current active set of monitors (if !starting) utime_t leader_since; // when this monitor became the leader, if it is the leader + utime_t exited_quorum; // time detected as not in quorum; 0 if in set outside_quorum; entity_inst_t slurp_source; diff --git a/src/mon/Session.h b/src/mon/Session.h index fc3a658c912e..0aa8a777fa8e 100644 --- a/src/mon/Session.h +++ b/src/mon/Session.h @@ -40,6 +40,7 @@ struct MonSession : public RefCountedObject { Connection *con; entity_inst_t inst; utime_t until; + utime_t time_established; bool closed; xlist::item item; set routed_request_tids; @@ -57,7 +58,9 @@ struct MonSession : public RefCountedObject { MonSession(entity_inst_t i, Connection *c) : con(c->get()), inst(i), closed(false), item(this), global_id(0), notified_global_id(0), auth_handler(NULL), - proxy_con(NULL), proxy_tid(0) {} + proxy_con(NULL), proxy_tid(0) { + time_established = ceph_clock_now(g_ceph_context); + } ~MonSession() { if (con) con->put();