From d3a425faf88db8f14eaa825fc1a3ea87b7e3ac5d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 9 Mar 2017 11:53:06 -0500 Subject: [PATCH] os/bluestore: avoid waking up kv thread on deferred write completion In a simple HDD workload with queue depth of 1, we halve our throughput because the kv thread does a full commit twice per IO: once for the initial commit, and then again to clean up the deferred write record. The second wakeup is unnecessary; we can clean it up on the next commit. We do need to do this wakeup in a few cases, though, when draining the OpSequencers: (1) on replay during startup, and (2) on shutdown in _osr_drain_all(). Send everything through _osr_drain_all() for simplicity. This doubles HDD qd=1 IOPS from ~50 to ~100 on my 7200 rpm test device (rados bench 30 write -b 4096 -t 1). Signed-off-by: Sage Weil --- src/os/bluestore/BlueStore.cc | 20 +++++++++++++++++--- src/os/bluestore/BlueStore.h | 1 + 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 358e8c7d8ffb2..e98badf45c6cb 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -7434,10 +7434,18 @@ void BlueStore::_osr_drain_all() std::lock_guard l(osr_lock); s = osr_set; } + + deferred_aggressive_cleanup = true; + { + // wake up any previously finished deferred events + std::lock_guard l(kv_lock); + kv_cond.notify_one(); + } for (auto osr : s) { dout(20) << __func__ << " drain " << osr << dendl; osr->drain(); } + deferred_aggressive_cleanup = false; dout(10) << __func__ << " done" << dendl; } @@ -7699,7 +7707,7 @@ void BlueStore::_deferred_try_submit(OpSequencer *osr) } } osr->deferred_txc = last; - dout(20) << " osr " << osr << " deferred_blocks 0x" << std::hex + dout(20) << __func__ << " osr " << osr << " deferred_blocks 0x" << std::hex << osr->deferred_blocks << std::dec << dendl; _txc_aio_submit(last); } @@ -7734,7 +7742,12 @@ int BlueStore::_deferred_finish(TransContext *txc) deferred_cleanup_queue.push_back(txc); } finished.clear(); - kv_cond.notify_one(); + + // in the normal case, do not bother waking up the kv thread; it will + // catch us on the next commit anyway. + if (deferred_aggressive_cleanup) { + kv_cond.notify_one(); + } return 0; } @@ -7775,7 +7788,8 @@ int BlueStore::_deferred_replay() for (it->lower_bound(string()); it->valid(); it->next(), ++count) { dout(20) << __func__ << " replay " << pretty_binary_string(it->key()) << dendl; - bluestore_deferred_transaction_t *deferred_txn = new bluestore_deferred_transaction_t; + bluestore_deferred_transaction_t *deferred_txn = + new bluestore_deferred_transaction_t; bufferlist bl = it->value(); bufferlist::iterator p = bl.begin(); try { diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 3469e9267767f..3f316082e0bd7 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -1685,6 +1685,7 @@ private: std::mutex deferred_lock; std::atomic deferred_seq = {0}; deferred_osr_queue_t deferred_queue; ///< osr's with deferred io pending + bool deferred_aggressive_cleanup = false; ///< aggressive wakeup of kv thread int m_finisher_num = 1; vector finishers; -- 2.39.5