From c54f1e4d66b22bad715ac17e9baa72ab93e48c46 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 13 Aug 2014 16:17:02 -0700 Subject: [PATCH] mon/Paxos: share state and verify contiguity early in collect phase We verify peons are contiguous and share new paxos states to catch peons up at the end of the round. Do this each time we (potentially) get new states via a collect message. This will allow peons to be pulled forward and remain contiguous when they otherwise would not have been able to. For example, if mon.0 (leader) 20..30 mon.1 (peon) 15..25 mon.2 (peon) 28..40 If we got mon.1 first and then mon.2 second, we would store the new txns and then boot mon.1 out at the end because 15..25 is not contiguous with 28..40. However, with this change, we share 26..30 to mon.1 when we get the collect, and then 31..40 when we get mon.2's collect, pulling them both into the final quorum. It also breaks the 'catch-up' work into smaller pieces, which ought to smooth out latency a bit. Signed-off-by: Sage Weil --- src/mon/Paxos.cc | 50 ++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc index 5100a9491b1b..31c57d9c0fc6 100644 --- a/src/mon/Paxos.cc +++ b/src/mon/Paxos.cc @@ -418,6 +418,31 @@ void Paxos::handle_last(MMonPaxos *last) assert(g_conf->paxos_kill_at != 2); + // is everyone contiguous and up to date? + for (map::iterator p = peer_last_committed.begin(); + p != peer_last_committed.end(); + ++p) { + if (p->second < first_committed && first_committed > 1) { + dout(5) << __func__ + << " peon " << p->first + << " last_committed (" << p->second + << ") is too low for our first_committed (" << first_committed + << ") -- bootstrap!" << dendl; + last->put(); + mon->bootstrap(); + return; + } + if (p->second < last_committed) { + // share committed values + dout(10) << " sending commit to mon." << p->first << dendl; + MMonPaxos *commit = new MMonPaxos(mon->get_epoch(), + MMonPaxos::OP_COMMIT, + ceph_clock_now(g_ceph_context)); + share_state(commit, peer_first_committed[p->first], p->second); + mon->messenger->send_message(commit, mon->monmap->get_inst(p->first)); + } + } + // do they accept your pn? if (last->pn > accepted_pn) { // no, try again. @@ -459,31 +484,6 @@ void Paxos::handle_last(MMonPaxos *last) // cancel timeout event mon->timer.cancel_event(collect_timeout_event); collect_timeout_event = 0; - - // is everyone contiguous and up to date? - for (map::iterator p = peer_last_committed.begin(); - p != peer_last_committed.end(); - ++p) { - if (p->second < first_committed && first_committed > 1) { - dout(5) << __func__ - << " peon " << p->first - << " last_committed (" << p->second - << ") is too low for our first_committed (" << first_committed - << ") -- bootstrap!" << dendl; - last->put(); - mon->bootstrap(); - return; - } - if (p->second < last_committed) { - // share committed values - dout(10) << " sending commit to mon." << p->first << dendl; - MMonPaxos *commit = new MMonPaxos(mon->get_epoch(), - MMonPaxos::OP_COMMIT, - ceph_clock_now(g_ceph_context)); - share_state(commit, peer_first_committed[p->first], p->second); - mon->messenger->send_message(commit, mon->monmap->get_inst(p->first)); - } - } peer_first_committed.clear(); peer_last_committed.clear(); -- 2.47.3