From 7d7f5856bc5b547b461f63f15acd7c27b0fba3a6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 1 Jul 2017 10:56:58 -0400 Subject: [PATCH] os/bluestore: fix deferred_aio deadlock We submit aios while holding deferred_lock, and we take deferred_lock in the aio completion handler (of which there is only 1). That means that if the aio queue fills up, the submitter will block with the lock while the finisher won't be able to take the lock, progress, and drain completed aios, leading to a deadlock. Fix this by submiting aios *without* deferred_lock held. Instead, demote to a deferred_submit_lock. Further, in the finisher, submit aios via a finisher (this only happens when in deferred_aggressive mode which is rare and not performance-sensitive). Signed-off-by: Sage Weil --- src/os/bluestore/BlueStore.cc | 42 ++++++++++++++++++++++++----------- src/os/bluestore/BlueStore.h | 10 +++------ 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index aa3d051b929..bef5a331134 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -8020,9 +8020,11 @@ void BlueStore::_osr_drain_preceding(TransContext *txc) ++deferred_aggressive; // FIXME: maybe osr-local aggressive flag? { // submit anything pending - std::lock_guard l(deferred_lock); + deferred_lock.lock(); if (osr->deferred_pending) { - _deferred_submit(osr); + _deferred_submit_unlock(osr); + } else { + deferred_lock.unlock(); } } { @@ -8049,8 +8051,7 @@ void BlueStore::_osr_drain_all() ++deferred_aggressive; { // submit anything pending - std::lock_guard l(deferred_lock); - _deferred_try_submit(); + deferred_try_submit(); } { // wake up any previously finished deferred events @@ -8455,10 +8456,9 @@ void BlueStore::_kv_finalize_thread() deferred_stable.clear(); if (!deferred_aggressive) { - std::lock_guard l(deferred_lock); if (deferred_queue_size >= deferred_batch_ops.load() || throttle_deferred_bytes.past_midpoint()) { - _deferred_try_submit(); + deferred_try_submit(); } } @@ -8485,7 +8485,7 @@ bluestore_deferred_op_t *BlueStore::_get_deferred_op( void BlueStore::_deferred_queue(TransContext *txc) { dout(20) << __func__ << " txc " << txc << " osr " << txc->osr << dendl; - std::lock_guard l(deferred_lock); + deferred_lock.lock(); if (!txc->osr->deferred_pending && !txc->osr->deferred_running) { deferred_queue.push_back(*txc->osr); @@ -8507,22 +8507,31 @@ void BlueStore::_deferred_queue(TransContext *txc) } if (deferred_aggressive && !txc->osr->deferred_running) { - _deferred_submit(txc->osr.get()); + _deferred_submit_unlock(txc->osr.get()); + } else { + deferred_lock.unlock(); } } -void BlueStore::_deferred_try_submit() +void BlueStore::deferred_try_submit() { dout(20) << __func__ << " " << deferred_queue.size() << " osrs, " << deferred_queue_size << " txcs" << dendl; + std::lock_guard l(deferred_lock); + vector osrs; + osrs.reserve(deferred_queue.size()); for (auto& osr : deferred_queue) { - if (!osr.deferred_running) { - _deferred_submit(&osr); + osrs.push_back(&osr); + } + for (auto& osr : osrs) { + if (!osr->deferred_running) { + _deferred_submit_unlock(osr.get()); + deferred_lock.lock(); } } } -void BlueStore::_deferred_submit(OpSequencer *osr) +void BlueStore::_deferred_submit_unlock(OpSequencer *osr) { dout(10) << __func__ << " osr " << osr << " " << osr->deferred_pending->iomap.size() << " ios pending " @@ -8570,6 +8579,10 @@ void BlueStore::_deferred_submit(OpSequencer *osr) bl.claim_append(i->second.bl); ++i; } + + // demote to deferred_submit_lock, then drop that too + std::lock_guard l(deferred_submit_lock); + deferred_lock.unlock(); bdev->aio_submit(&b->ioc); } @@ -8587,7 +8600,10 @@ void BlueStore::_deferred_aio_finish(OpSequencer *osr) auto q = deferred_queue.iterator_to(*osr); deferred_queue.erase(q); } else if (deferred_aggressive) { - _deferred_submit(osr); + dout(20) << __func__ << " queuing async deferred_try_submit" << dendl; + finishers[0]->queue(new FunctionContext([&](int) { + deferred_try_submit(); + })); } } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 1e2543ee4aa..bb85b051449 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -1831,7 +1831,7 @@ private: interval_set bluefs_extents; ///< block extents owned by bluefs interval_set bluefs_extents_reclaiming; ///< currently reclaiming - std::mutex deferred_lock; + std::mutex deferred_lock, deferred_submit_lock; std::atomic deferred_seq = {0}; deferred_osr_queue_t deferred_queue; ///< osr's with deferred io pending int deferred_queue_size = 0; ///< num txc's queued across all osrs @@ -2024,12 +2024,8 @@ private: bluestore_deferred_op_t *_get_deferred_op(TransContext *txc, OnodeRef o); void _deferred_queue(TransContext *txc); - void deferred_try_submit() { - std::lock_guard l(deferred_lock); - _deferred_try_submit(); - } - void _deferred_try_submit(); - void _deferred_submit(OpSequencer *osr); + void deferred_try_submit(); + void _deferred_submit_unlock(OpSequencer *osr); void _deferred_aio_finish(OpSequencer *osr); int _deferred_replay(); -- 2.39.5