From: Sage Weil Date: Fri, 30 May 2008 19:16:07 +0000 (-0700) Subject: paxos: add timeout to collect stage X-Git-Tag: v0.3~176 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2dbc0674201456f77752888a61a36bdb2fb6ccd8;p=ceph.git paxos: add timeout to collect stage --- diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc index 23eb9fc419d0..2af1488bde7b 100644 --- a/src/mon/Paxos.cc +++ b/src/mon/Paxos.cc @@ -79,6 +79,9 @@ void Paxos::collect(version_t oldpn) mon->messenger->send_message(collect, mon->monmap->get_inst(*p)); } + // set timeout event + collect_timeout_event = new C_CollectTimeout(this); + mon->timer.add_event_after(g_conf.mon_accept_timeout, collect_timeout_event); } @@ -186,6 +189,11 @@ void Paxos::handle_last(MMonPaxos *last) if (last->pn > accepted_pn) { // no, try again. dout(10) << " they had a higher pn than us, picking a new one." << dendl; + + // cancel timeout event + mon->timer.cancel_event(collect_timeout_event); + collect_timeout_event = 0; + collect(last->pn); } else { // yes, they accepted our pn. great. @@ -207,6 +215,10 @@ void Paxos::handle_last(MMonPaxos *last) // is that everyone? if (num_last == mon->get_quorum().size()) { + // cancel timeout event + mon->timer.cancel_event(collect_timeout_event); + collect_timeout_event = 0; + // almost... state = STATE_ACTIVE; @@ -231,6 +243,15 @@ void Paxos::handle_last(MMonPaxos *last) delete last; } +void Paxos::collect_timeout() +{ + dout(5) << "collect timeout, calling fresh election" << dendl; + collect_timeout_event = 0; + assert(mon->is_leader()); + cancel_events(); + mon->call_election(); +} + // leader void Paxos::begin(bufferlist& v) @@ -626,6 +647,10 @@ version_t Paxos::get_new_proposal_number(version_t gt) void Paxos::cancel_events() { + if (collect_timeout_event) { + mon->timer.cancel_event(collect_timeout_event); + collect_timeout_event = 0; + } if (accept_timeout_event) { mon->timer.cancel_event(accept_timeout_event); accept_timeout_event = 0; diff --git a/src/mon/Paxos.h b/src/mon/Paxos.h index be1bf68634b0..425dd0884754 100644 --- a/src/mon/Paxos.h +++ b/src/mon/Paxos.h @@ -120,6 +120,8 @@ private: version_t uncommitted_pn; bufferlist uncommitted_value; + Context *collect_timeout_event; + // active set acked_lease; Context *lease_renew_event; @@ -135,6 +137,15 @@ private: list waiting_for_writeable; list waiting_for_commit; + class C_CollectTimeout : public Context { + Paxos *paxos; + public: + C_CollectTimeout(Paxos *p) : paxos(p) {} + void finish(int r) { + paxos->collect_timeout(); + } + }; + class C_AcceptTimeout : public Context { Paxos *paxos; public: @@ -176,10 +187,13 @@ private: void collect(version_t oldpn); void handle_collect(MMonPaxos*); void handle_last(MMonPaxos*); + void collect_timeout(); + void begin(bufferlist& value); void handle_begin(MMonPaxos*); void handle_accept(MMonPaxos*); void accept_timeout(); + void commit(); void handle_commit(MMonPaxos*); void extend_lease(); @@ -200,6 +214,7 @@ public: machine_id(mid), machine_name(get_paxos_name(mid)), state(STATE_RECOVERING), + collect_timeout_event(0), lease_renew_event(0), lease_ack_timeout_event(0), lease_timeout_event(0),