From: Sage Weil Date: Fri, 18 May 2018 14:18:11 +0000 (-0500) Subject: os/bluestore: fix flush_commit locking X-Git-Tag: v14.0.0~53^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=a4042d51bcf1444350fa21763003e57aca984f87;p=ceph.git os/bluestore: fix flush_commit locking We were updating the txc state to KV_DONE and queuing the oncommits waiters without holding any locks. This was mostly fine, *except* that Collection|OpSequencer::flush_commit(Context *) was looking at the state (under qlock) and also adding items to oncommits. The flush_commit() method is only used in 2 places: osd bench, and the PG reset_interval_flush outgoing message blocking machinery (which is a bit ick). The first we could get rid of, but the second is hard to remove (despite its ick factor). The simple fix is to take qlock while updating the state value and working with oncommits. Fixes: http://tracker.ceph.com/issues/21480 Signed-off-by: Sage Weil --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index b1cfed2651e94..a4a150cebf0db 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -8451,8 +8451,6 @@ void BlueStore::_txc_state_proc(TransContext *txc) } return; case TransContext::STATE_KV_SUBMITTED: - txc->log_state_latency(logger, l_bluestore_state_kv_committing_lat); - txc->state = TransContext::STATE_KV_DONE; _txc_committed_kv(txc); // ** fall-thru ** @@ -8644,8 +8642,13 @@ void BlueStore::_txc_applied_kv(TransContext *txc) void BlueStore::_txc_committed_kv(TransContext *txc) { dout(20) << __func__ << " txc " << txc << dendl; + { + std::lock_guard l(txc->osr->qlock); + txc->state = TransContext::STATE_KV_DONE; + finishers[txc->osr->shard]->queue(txc->oncommits); + } + txc->log_state_latency(logger, l_bluestore_state_kv_committing_lat); logger->tinc(l_bluestore_commit_lat, ceph_clock_now() - txc->start); - finishers[txc->osr->shard]->queue(txc->oncommits); } void BlueStore::_txc_finish(TransContext *txc)