From: Sage Weil Date: Wed, 13 Aug 2014 23:17:02 +0000 (-0700) Subject: mon/Paxos: share state and verify contiguity early in collect phase X-Git-Tag: v0.80.8~32^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1f4aaf648f4aa6f6056d0e8ce629eeea05c5424d;p=ceph.git mon/Paxos: share state and verify contiguity early in collect phase We verify peons are contiguous and share new paxos states to catch peons up at the end of the round. Do this each time we (potentially) get new states via a collect message. This will allow peons to be pulled forward and remain contiguous when they otherwise would not have been able to. For example, if mon.0 (leader) 20..30 mon.1 (peon) 15..25 mon.2 (peon) 28..40 If we got mon.1 first and then mon.2 second, we would store the new txns and then boot mon.1 out at the end because 15..25 is not contiguous with 28..40. However, with this change, we share 26..30 to mon.1 when we get the collect, and then 31..40 when we get mon.2's collect, pulling them both into the final quorum. It also breaks the 'catch-up' work into smaller pieces, which ought to smooth out latency a bit. Signed-off-by: Sage Weil (cherry picked from commit c54f1e4d66b22bad715ac17e9baa72ab93e48c46) --- diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc index 0a5083dc712f..4e7ff50312db 100644 --- a/src/mon/Paxos.cc +++ b/src/mon/Paxos.cc @@ -482,6 +482,31 @@ void Paxos::handle_last(MMonPaxos *last) assert(g_conf->paxos_kill_at != 2); + // is everyone contiguous and up to date? + for (map::iterator p = peer_last_committed.begin(); + p != peer_last_committed.end(); + ++p) { + if (p->second < first_committed && first_committed > 1) { + dout(5) << __func__ + << " peon " << p->first + << " last_committed (" << p->second + << ") is too low for our first_committed (" << first_committed + << ") -- bootstrap!" << dendl; + last->put(); + mon->bootstrap(); + return; + } + if (p->second < last_committed) { + // share committed values + dout(10) << " sending commit to mon." << p->first << dendl; + MMonPaxos *commit = new MMonPaxos(mon->get_epoch(), + MMonPaxos::OP_COMMIT, + ceph_clock_now(g_ceph_context)); + share_state(commit, peer_first_committed[p->first], p->second); + mon->messenger->send_message(commit, mon->monmap->get_inst(p->first)); + } + } + // do they accept your pn? if (last->pn > accepted_pn) { // no, try again. @@ -523,31 +548,6 @@ void Paxos::handle_last(MMonPaxos *last) // cancel timeout event mon->timer.cancel_event(collect_timeout_event); collect_timeout_event = 0; - - // is everyone contiguous and up to date? - for (map::iterator p = peer_last_committed.begin(); - p != peer_last_committed.end(); - ++p) { - if (p->second < first_committed && first_committed > 1) { - dout(5) << __func__ - << " peon " << p->first - << " last_committed (" << p->second - << ") is too low for our first_committed (" << first_committed - << ") -- bootstrap!" << dendl; - last->put(); - mon->bootstrap(); - return; - } - if (p->second < last_committed) { - // share committed values - dout(10) << " sending commit to mon." << p->first << dendl; - MMonPaxos *commit = new MMonPaxos(mon->get_epoch(), - MMonPaxos::OP_COMMIT, - ceph_clock_now(g_ceph_context)); - share_state(commit, peer_first_committed[p->first], p->second); - mon->messenger->send_message(commit, mon->monmap->get_inst(p->first)); - } - } peer_first_committed.clear(); peer_last_committed.clear();