From c1404574895bc5a091f36b8119c7500fe1e287f1 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Mon, 7 Aug 2017 12:48:27 -0700 Subject: [PATCH] osd: Fix Paxos shutdown handling for commit_finish race Fixes: http://tracker.ceph.com/issues/20921 Signed-off-by: David Zafman --- src/mon/Paxos.cc | 23 +++++++++++++++++++++++ src/mon/Paxos.h | 11 +++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/mon/Paxos.cc b/src/mon/Paxos.cc index 0109643b52044..3555d60dba5d7 100644 --- a/src/mon/Paxos.cc +++ b/src/mon/Paxos.cc @@ -817,10 +817,22 @@ struct C_Committed : public Context { void finish(int r) override { assert(r >= 0); Mutex::Locker l(paxos->mon->lock); + if (paxos->is_shutdown()) { + paxos->abort_commit(); + return; + } paxos->commit_finish(); } }; +void Paxos::abort_commit() +{ + assert(commits_started > 0); + --commits_started; + if (commits_started == 0) + shutdown_cond.Signal(); +} + void Paxos::commit_start() { dout(10) << __func__ << " " << (last_committed+1) << dendl; @@ -855,6 +867,7 @@ void Paxos::commit_start() state = STATE_WRITING; else ceph_abort(); + ++commits_started; if (mon->get_quorum().size() > 1) { // cancel timeout event @@ -910,6 +923,8 @@ void Paxos::commit_finish() // it doesn't need to flush the store queue assert(is_writing() || is_writing_previous()); state = STATE_REFRESH; + assert(commits_started > 0); + --commits_started; if (do_refresh()) { commit_proposal(); @@ -1301,9 +1316,17 @@ void Paxos::shutdown() { dout(10) << __func__ << " cancel all contexts" << dendl; + state = STATE_SHUTDOWN; + // discard pending transaction pending_proposal.reset(); + // Let store finish commits in progress + // XXX: I assume I can't use finish_contexts() because the store + // is going to trigger + while(commits_started > 0) + shutdown_cond.Wait(mon->lock); + finish_contexts(g_ceph_context, waiting_for_writeable, -ECANCELED); finish_contexts(g_ceph_context, waiting_for_commit, -ECANCELED); finish_contexts(g_ceph_context, waiting_for_readable, -ECANCELED); diff --git a/src/mon/Paxos.h b/src/mon/Paxos.h index 4ba84e6cc45b5..896aafa07eaf1 100644 --- a/src/mon/Paxos.h +++ b/src/mon/Paxos.h @@ -230,6 +230,8 @@ public: STATE_WRITING_PREVIOUS, // leader: refresh following a commit STATE_REFRESH, + // Shutdown after WRITING or WRITING_PREVIOUS + STATE_SHUTDOWN }; /** @@ -257,6 +259,8 @@ public: return "writing-previous"; case STATE_REFRESH: return "refresh"; + case STATE_SHUTDOWN: + return "shutdown"; default: return "UNKNOWN"; } @@ -270,6 +274,9 @@ private: /** * @} */ + int commits_started = 0; + + Cond shutdown_cond; public: /** @@ -306,6 +313,9 @@ public: /// @return 'true' if we are refreshing an update just committed bool is_refresh() const { return state == STATE_REFRESH; } + /// @return 'true' if we are in the process of shutting down + bool is_shutdown() const { return state == STATE_SHUTDOWN; } + private: /** * @defgroup Paxos_h_recovery_vars Common recovery-related member variables @@ -880,6 +890,7 @@ private: */ void commit_start(); void commit_finish(); ///< finish a commit after txn becomes durable + void abort_commit(); ///< Handle commit finish after shutdown started /** * Commit the new value to stable storage as being the latest available * version. -- 2.39.5