From f60702822ab7681e6b53b31d04913ecf9eb9a7bd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 20 Nov 2011 14:26:09 -0800 Subject: [PATCH] paxos: fix sharing of learned commits during collect/last We can learn either an uncommitted or committed value during the collect/last recovery phase. For the committed values, we need to remember each peer's first/last_committed and share only at the end to avoid a situation like: - mon.1 has same last_committed as us - mon.2 has newer last_commited, we save it - mon.3 has same last_commited as mon.1, we share new value - done... but mon.1 never got mon.2's newer commit. Instead, save the commit sharing until the collect process completes, so we know that any committed value learned from anyone is shared with everyone who needs it. This fixes a crash like mon/Paxos.cc: In function 'void Paxos::handle_begin(MMonPaxos*)', in thread '7fd91192c700' mon/Paxos.cc: 400: FAILED assert(begin->last_committed == last_committed) ceph version 0.38-208-g9aabd39 (commit:9aabd3982cceb7e8489412b4bfbb4c2387880de2) 1: (ceph::__ceph_assert_fail(char const*, char const*, int, char const*)+0x76) [0x72454e] 2: (Paxos::handle_begin(MMonPaxos*)+0x363) [0x6499ef] 3: (Paxos::dispatch(PaxosServiceMessage*)+0x2b4) [0x64db2c] 4: (Monitor::_ms_dispatch(Message*)+0xdc6) [0x6205c2] 5: (Monitor::ms_dispatch(Message*)+0x3a) [0x62831a] 6: (Messenger::ms_deliver_dispatch(Message*)+0x63) [0x7d1f31] 7: (SimpleMessenger::dispatch_entry()+0x7c2) [0x7bb786] 8: (SimpleMessenger::DispatchThread::entry()+0x2c) [0x6070fa] 9: (Thread::_entry_func(void*)+0x23) [0x6f3f69] 10: (()+0x7971) [0x7fd9153a1971] 11: (clone()+0x6d) [0x7fd913c3092d] Signed-off-by: Sage Weil --- src/mon/Paxos.cc | 28 ++++++++++++++++++++-------- src/mon/Paxos.h | 1 + 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc index 437cc393d8b77..40600d1b8bafc 100644 --- a/src/mon/Paxos.cc +++ b/src/mon/Paxos.cc @@ -63,6 +63,8 @@ void Paxos::collect(version_t oldpn) uncommitted_v = 0; uncommitted_pn = 0; uncommitted_value.clear(); + peer_first_committed.clear(); + peer_last_committed.clear(); // look for uncommitted value if (mon->store->exists_bl_sn(machine_name, last_committed+1)) { @@ -247,14 +249,9 @@ void Paxos::handle_last(MMonPaxos *last) return; } - // share committed values? - if (last->last_committed < last_committed) { - // share committed values - dout(10) << "sending commit to " << last->get_source() << dendl; - MMonPaxos *commit = new MMonPaxos(mon->get_epoch(), MMonPaxos::OP_COMMIT, machine_id); - share_state(commit, last->first_committed, last->last_committed); - mon->messenger->send_message(commit, last->get_source_inst()); - } + // note peer's last_committed, in case we learn a new commit and need to + // push it to them. + peer_last_committed[last->get_source().num()] = last->last_committed; // did we receive a committed value? store_state(last); @@ -293,6 +290,21 @@ void Paxos::handle_last(MMonPaxos *last) mon->timer.cancel_event(collect_timeout_event); collect_timeout_event = 0; + // share committed values? + for (map::iterator p = peer_last_committed.begin(); + p != peer_last_committed.end(); + ++p) { + if (p->second < last_committed) { + // share committed values + dout(10) << " sending commit to mon." << p->first << dendl; + MMonPaxos *commit = new MMonPaxos(mon->get_epoch(), MMonPaxos::OP_COMMIT, machine_id); + share_state(commit, peer_first_committed[p->first], p->second); + mon->messenger->send_message(commit, mon->monmap->get_inst(p->first)); + } + } + peer_first_committed.clear(); + peer_last_committed.clear(); + // almost... state = STATE_ACTIVE; diff --git a/src/mon/Paxos.h b/src/mon/Paxos.h index a340cdab3b9e9..e67a975f4fa6a 100644 --- a/src/mon/Paxos.h +++ b/src/mon/Paxos.h @@ -112,6 +112,7 @@ private: utime_t last_commit_time; version_t accepted_pn; version_t accepted_pn_from; + map peer_first_committed, peer_last_committed; // active (phase 2) utime_t lease_expire; -- 2.39.5