From: Sage Weil Date: Tue, 8 Aug 2017 13:23:31 +0000 (-0400) Subject: Revert "os/bluestore: allow multiple DeferredBatches in flight at once" X-Git-Tag: v13.0.0~187^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=917858516a904c82fabe1bd65d2fa88436319713;p=ceph.git Revert "os/bluestore: allow multiple DeferredBatches in flight at once" This reverts commit ca32d575eb2673737198a63643d5d1923151eba3. If we have multiple batches in flight then we have to worry about writes to the same blocks reordering. Also, 3c6a6c46d5808d6c42ed4dcfb441bad64366686b is sufficient to avoid the stall/deadlock in http://tracker.ceph.com/issues/20295. # Conflicts: # src/os/bluestore/BlueStore.cc Fixes: http://tracker.ceph.com/issues/20925 Signed-off-by: Sage Weil --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 0dbb2ca67537..e74ea82c1a79 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -8073,7 +8073,7 @@ void BlueStore::_txc_finish(TransContext *txc) if (submit_deferred) { // we're pinning memory; flush! we could be more fine-grained here but // i'm not sure it's worth the bother. - deferred_submit_all(); + deferred_try_submit(); } if (empty && osr->zombie) { @@ -8136,7 +8136,7 @@ void BlueStore::_osr_drain_all() ++deferred_aggressive; { // submit anything pending - deferred_submit_all(); + deferred_try_submit(); } { // wake up any previously finished deferred events @@ -8546,7 +8546,7 @@ void BlueStore::_kv_finalize_thread() if (!deferred_aggressive) { if (deferred_queue_size >= deferred_batch_ops.load() || throttle_deferred_bytes.past_midpoint()) { - deferred_submit_all(); + deferred_try_submit(); } } @@ -8575,7 +8575,7 @@ void BlueStore::_deferred_queue(TransContext *txc) dout(20) << __func__ << " txc " << txc << " osr " << txc->osr << dendl; deferred_lock.lock(); if (!txc->osr->deferred_pending && - txc->osr->deferred_running.empty()) { + !txc->osr->deferred_running) { deferred_queue.push_back(*txc->osr); } if (!txc->osr->deferred_pending) { @@ -8593,14 +8593,15 @@ void BlueStore::_deferred_queue(TransContext *txc) cct, wt.seq, e.offset, e.length, p); } } - if (deferred_aggressive) { + if (deferred_aggressive && + !txc->osr->deferred_running) { _deferred_submit_unlock(txc->osr.get()); } else { deferred_lock.unlock(); } } -void BlueStore::deferred_submit_all() +void BlueStore::deferred_try_submit() { dout(20) << __func__ << " " << deferred_queue.size() << " osrs, " << deferred_queue_size << " txcs" << dendl; @@ -8611,7 +8612,7 @@ void BlueStore::deferred_submit_all() osrs.push_back(&osr); } for (auto& osr : osrs) { - if (osr->deferred_pending) { + if (osr->deferred_pending && !osr->deferred_running) { _deferred_submit_unlock(osr.get()); deferred_lock.lock(); } @@ -8620,15 +8621,17 @@ void BlueStore::deferred_submit_all() void BlueStore::_deferred_submit_unlock(OpSequencer *osr) { - auto b = osr->deferred_pending; - dout(10) << __func__ << " osr " << osr << " b " << b + dout(10) << __func__ << " osr " << osr << " " << osr->deferred_pending->iomap.size() << " ios pending " << dendl; - assert(b); + assert(osr->deferred_pending); + assert(!osr->deferred_running); + + auto b = osr->deferred_pending; deferred_queue_size -= b->seq_bytes.size(); assert(deferred_queue_size >= 0); - osr->deferred_running.push_back(osr->deferred_pending); + osr->deferred_running = osr->deferred_pending; osr->deferred_pending = nullptr; uint64_t start = 0, pos = 0; @@ -8671,21 +8674,24 @@ void BlueStore::_deferred_submit_unlock(OpSequencer *osr) bdev->aio_submit(&b->ioc); } -void BlueStore::_deferred_aio_finish(DeferredBatch *b) +void BlueStore::_deferred_aio_finish(OpSequencer *osr) { - OpSequencer *osr = b->osr; - dout(10) << __func__ << " osr " << osr << " b " << b << dendl; + dout(10) << __func__ << " osr " << osr << dendl; + assert(osr->deferred_running); + DeferredBatch *b = osr->deferred_running; { std::lock_guard l(deferred_lock); - auto p = std::find(osr->deferred_running.begin(), - osr->deferred_running.end(), - b); - assert(p != osr->deferred_running.end()); - osr->deferred_running.erase(p); - if (osr->deferred_running.empty() && !osr->deferred_pending) { + assert(osr->deferred_running == b); + osr->deferred_running = nullptr; + if (!osr->deferred_pending) { auto q = deferred_queue.iterator_to(*osr); deferred_queue.erase(q); + } else if (deferred_aggressive) { + dout(20) << __func__ << " queuing async deferred_try_submit" << dendl; + finishers[0]->queue(new FunctionContext([&](int) { + deferred_try_submit(); + })); } } @@ -8822,7 +8828,7 @@ int BlueStore::queue_transactions( dout(10) << __func__ << " failed get throttle_deferred_bytes, aggressive" << dendl; ++deferred_aggressive; - deferred_submit_all(); + deferred_try_submit(); throttle_deferred_bytes.get(txc->cost); --deferred_aggressive; } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index ca996759d81d..d7a1980320eb 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -1629,7 +1629,7 @@ public: bufferlist::const_iterator& p); void aio_finish(BlueStore *store) override { - store->_deferred_aio_finish(this); + store->_deferred_aio_finish(osr); } }; @@ -1647,7 +1647,7 @@ public: boost::intrusive::list_member_hook<> deferred_osr_queue_item; - deque deferred_running; + DeferredBatch *deferred_running = nullptr; DeferredBatch *deferred_pending = nullptr; Sequencer *parent; @@ -2035,9 +2035,9 @@ private: bluestore_deferred_op_t *_get_deferred_op(TransContext *txc, OnodeRef o); void _deferred_queue(TransContext *txc); - void deferred_submit_all(); + void deferred_try_submit(); void _deferred_submit_unlock(OpSequencer *osr); - void _deferred_aio_finish(DeferredBatch *b); + void _deferred_aio_finish(OpSequencer *osr); int _deferred_replay(); public: