From: Sage Weil Date: Fri, 12 Jul 2013 21:47:09 +0000 (-0700) Subject: mon: fix scrub vs paxos race: refresh on commit, not round completion X-Git-Tag: v0.67-rc1~78^2~8 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=f1ce8d7c955a2443111bf7d9e16b4c563d445712;p=ceph.git mon: fix scrub vs paxos race: refresh on commit, not round completion Consider: - paxos starts a commit N+1 - a majority of the peers ack it - paxos::commit() writes N+1 it to disk - tells peers to commit - peers commit N+1, *and* refresh_from_paxos(), and generate N+1 full map - leader does _scrub on N+1, without latest full osdmap - peers do _scrub on N+1, with latest full osdmap - leader finishes paxos gather, does refresh_from_paxos() -> scrub fails. Fix this by doing the refresh_from_paxos() at commit time and not when the paxos round finishes. We move the refresh out of finish_proposal and into its own helper, and update all callers accordingly. This keeps on-disk state more tightly in sync with in-memory state and avoids the need for a e.g., kludgey workaround in the scrub code. We also simplify the bootstrap checks a bit by doing so immediately and relying on the normal bootstrap paxos reset paths to clean up any waiters. Signed-off-by: Sage Weil --- diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc index d988c6415476a..e6f20a6734874 100644 --- a/src/mon/Paxos.cc +++ b/src/mon/Paxos.cc @@ -427,11 +427,13 @@ void Paxos::handle_last(MMonPaxos *last) dout(10) << "that's everyone. active!" << dendl; extend_lease(); - finish_proposal(); + if (do_refresh()) { + finish_proposal(); - finish_contexts(g_ceph_context, waiting_for_active); - finish_contexts(g_ceph_context, waiting_for_readable); - finish_contexts(g_ceph_context, waiting_for_writeable); + finish_contexts(g_ceph_context, waiting_for_active); + finish_contexts(g_ceph_context, waiting_for_readable); + finish_contexts(g_ceph_context, waiting_for_writeable); + } } } } else { @@ -507,11 +509,13 @@ void Paxos::begin(bufferlist& v) if (mon->get_quorum().size() == 1) { // we're alone, take it easy commit(); - finish_proposal(); - finish_contexts(g_ceph_context, waiting_for_active); - finish_contexts(g_ceph_context, waiting_for_commit); - finish_contexts(g_ceph_context, waiting_for_readable); - finish_contexts(g_ceph_context, waiting_for_writeable); + if (do_refresh()) { + finish_proposal(); + finish_contexts(g_ceph_context, waiting_for_active); + finish_contexts(g_ceph_context, waiting_for_commit); + finish_contexts(g_ceph_context, waiting_for_readable); + finish_contexts(g_ceph_context, waiting_for_writeable); + } return; } @@ -612,6 +616,8 @@ void Paxos::handle_accept(MMonPaxos *accept) // note: this may happen before the lease is reextended (below) dout(10) << " got majority, committing" << dendl; commit(); + if (!do_refresh()) + goto out; } // done? @@ -632,6 +638,8 @@ void Paxos::handle_accept(MMonPaxos *accept) finish_contexts(g_ceph_context, waiting_for_readable); finish_contexts(g_ceph_context, waiting_for_writeable); } + + out: accept->put(); } @@ -787,14 +795,26 @@ void Paxos::warn_on_future_time(utime_t t, entity_name_t from) } -void Paxos::finish_proposal() +bool Paxos::do_refresh() { - assert(mon->is_leader()); + bool need_bootstrap = false; // make sure we have the latest state loaded up - bool need_bootstrap = false; mon->refresh_from_paxos(&need_bootstrap); + if (need_bootstrap) { + dout(10) << " doing requested bootstrap" << dendl; + mon->bootstrap(); + return false; + } + + return true; +} + +void Paxos::finish_proposal() +{ + assert(mon->is_leader()); + // ok, now go active! state = STATE_ACTIVE; @@ -819,12 +839,6 @@ void Paxos::finish_proposal() dout(10) << __func__ << " state " << state << " proposals left " << proposals.size() << dendl; - if (need_bootstrap) { - dout(10) << " doing requested bootstrap" << dendl; - mon->bootstrap(); - return; - } - if (should_trim()) { trim(); } diff --git a/src/mon/Paxos.h b/src/mon/Paxos.h index 1cdad50e5bbed..5860aaac2abf4 100644 --- a/src/mon/Paxos.h +++ b/src/mon/Paxos.h @@ -983,6 +983,17 @@ private: * Begin proposing the Proposal at the front of the proposals queue. */ void propose_queued(); + + /** + * refresh state from store + * + * Called when we have new state for the mon to consume. If we return false, + * abort (we triggered a bootstrap). + * + * @returns true on success, false if we are now bootstrapping + */ + bool do_refresh(); + void finish_proposal(); public: