]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
paxos: add timeout to collect stage
authorSage Weil <sage@newdream.net>
Fri, 30 May 2008 19:16:07 +0000 (12:16 -0700)
committerSage Weil <sage@newdream.net>
Fri, 30 May 2008 19:16:07 +0000 (12:16 -0700)
src/mon/Paxos.cc
src/mon/Paxos.h

index 23eb9fc419d0fddfc5385c9aceafdc621ca5f884..2af1488bde7be11b81ad135ed2876dce2f60e439 100644 (file)
@@ -79,6 +79,9 @@ void Paxos::collect(version_t oldpn)
     mon->messenger->send_message(collect, mon->monmap->get_inst(*p));
   }
 
+  // set timeout event
+  collect_timeout_event = new C_CollectTimeout(this);
+  mon->timer.add_event_after(g_conf.mon_accept_timeout, collect_timeout_event);
 }
 
 
@@ -186,6 +189,11 @@ void Paxos::handle_last(MMonPaxos *last)
   if (last->pn > accepted_pn) {
     // no, try again.
     dout(10) << " they had a higher pn than us, picking a new one." << dendl;
+
+    // cancel timeout event
+    mon->timer.cancel_event(collect_timeout_event);
+    collect_timeout_event = 0;
+
     collect(last->pn);
   } else {
     // yes, they accepted our pn.  great.
@@ -207,6 +215,10 @@ void Paxos::handle_last(MMonPaxos *last)
     
     // is that everyone?
     if (num_last == mon->get_quorum().size()) {
+      // cancel timeout event
+      mon->timer.cancel_event(collect_timeout_event);
+      collect_timeout_event = 0;
+
       // almost...
       state = STATE_ACTIVE;
 
@@ -231,6 +243,15 @@ void Paxos::handle_last(MMonPaxos *last)
   delete last;
 }
 
+void Paxos::collect_timeout()
+{
+  dout(5) << "collect timeout, calling fresh election" << dendl;
+  collect_timeout_event = 0;
+  assert(mon->is_leader());
+  cancel_events();
+  mon->call_election();
+}
+
 
 // leader
 void Paxos::begin(bufferlist& v)
@@ -626,6 +647,10 @@ version_t Paxos::get_new_proposal_number(version_t gt)
 
 void Paxos::cancel_events()
 {
+  if (collect_timeout_event) {
+    mon->timer.cancel_event(collect_timeout_event);
+    collect_timeout_event = 0;
+  }
   if (accept_timeout_event) {
     mon->timer.cancel_event(accept_timeout_event);
     accept_timeout_event = 0;
index be1bf68634b0a327073b574c12063495df6833e5..425dd0884754794237d0f400499333fe23e680ac 100644 (file)
@@ -120,6 +120,8 @@ private:
   version_t  uncommitted_pn;
   bufferlist uncommitted_value;
 
+  Context    *collect_timeout_event;
+
   // active
   set<int>   acked_lease;
   Context    *lease_renew_event;
@@ -135,6 +137,15 @@ private:
   list<Context*> waiting_for_writeable;
   list<Context*> waiting_for_commit;
 
+  class C_CollectTimeout : public Context {
+    Paxos *paxos;
+  public:
+    C_CollectTimeout(Paxos *p) : paxos(p) {}
+    void finish(int r) {
+      paxos->collect_timeout();
+    }
+  };
+
   class C_AcceptTimeout : public Context {
     Paxos *paxos;
   public:
@@ -176,10 +187,13 @@ private:
   void collect(version_t oldpn);
   void handle_collect(MMonPaxos*);
   void handle_last(MMonPaxos*);
+  void collect_timeout();
+
   void begin(bufferlist& value);
   void handle_begin(MMonPaxos*);
   void handle_accept(MMonPaxos*);
   void accept_timeout();
+
   void commit();
   void handle_commit(MMonPaxos*);
   void extend_lease();
@@ -200,6 +214,7 @@ public:
                   machine_id(mid), 
                   machine_name(get_paxos_name(mid)),
                   state(STATE_RECOVERING),
+                  collect_timeout_event(0),
                   lease_renew_event(0),
                   lease_ack_timeout_event(0),
                   lease_timeout_event(0),