From a4042d51bcf1444350fa21763003e57aca984f87 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 18 May 2018 09:18:11 -0500 Subject: [PATCH] os/bluestore: fix flush_commit locking We were updating the txc state to KV_DONE and queuing the oncommits waiters without holding any locks. This was mostly fine, *except* that Collection|OpSequencer::flush_commit(Context *) was looking at the state (under qlock) and also adding items to oncommits. The flush_commit() method is only used in 2 places: osd bench, and the PG reset_interval_flush outgoing message blocking machinery (which is a bit ick). The first we could get rid of, but the second is hard to remove (despite its ick factor). The simple fix is to take qlock while updating the state value and working with oncommits. Fixes: http://tracker.ceph.com/issues/21480 Signed-off-by: Sage Weil --- src/os/bluestore/BlueStore.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index b1cfed2651e94..a4a150cebf0db 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -8451,8 +8451,6 @@ void BlueStore::_txc_state_proc(TransContext *txc) } return; case TransContext::STATE_KV_SUBMITTED: - txc->log_state_latency(logger, l_bluestore_state_kv_committing_lat); - txc->state = TransContext::STATE_KV_DONE; _txc_committed_kv(txc); // ** fall-thru ** @@ -8644,8 +8642,13 @@ void BlueStore::_txc_applied_kv(TransContext *txc) void BlueStore::_txc_committed_kv(TransContext *txc) { dout(20) << __func__ << " txc " << txc << dendl; + { + std::lock_guard l(txc->osr->qlock); + txc->state = TransContext::STATE_KV_DONE; + finishers[txc->osr->shard]->queue(txc->oncommits); + } + txc->log_state_latency(logger, l_bluestore_state_kv_committing_lat); logger->tinc(l_bluestore_commit_lat, ceph_clock_now() - txc->start); - finishers[txc->osr->shard]->queue(txc->oncommits); } void BlueStore::_txc_finish(TransContext *txc) -- 2.39.5