From: Sage Weil Date: Fri, 19 May 2017 16:27:55 +0000 (-0400) Subject: os/bluestore: separate kv_sync_thread into two parts X-Git-Tag: v12.1.0~10^2~4^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=bedcbcd0cdc24760d6a7577a2736028656f5357a;p=ceph.git os/bluestore: separate kv_sync_thread into two parts The kv_sync_thread is a bottleneck; making it do less work improves performance on fast devices. Signed-off-by: Jianpeng Ma Signed-off-by: Igor Fedotov Signed-off-by: Sage Weil --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 394c46c917f1..4dc87b62c55c 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -3281,6 +3281,7 @@ BlueStore::BlueStore(CephContext *cct, const string& path) cct->_conf->bluestore_throttle_bytes + cct->_conf->bluestore_throttle_deferred_bytes), kv_sync_thread(this), + kv_finalize_thread(this), mempool_thread(this) { _init_logger(); @@ -3309,6 +3310,7 @@ BlueStore::BlueStore(CephContext *cct, cct->_conf->bluestore_throttle_bytes + cct->_conf->bluestore_throttle_deferred_bytes), kv_sync_thread(this), + kv_finalize_thread(this), min_alloc_size(_min_alloc_size), min_alloc_size_order(ctz(_min_alloc_size)), mempool_thread(this) @@ -5012,6 +5014,7 @@ int BlueStore::_mount(bool kv_only) f->start(); } kv_sync_thread.create("bstore_kv_sync"); + kv_finalize_thread.create("bstore_kv_final"); r = _deferred_replay(); if (r < 0) @@ -7901,6 +7904,10 @@ void BlueStore::_osr_drain_all() std::lock_guard l(kv_lock); kv_cond.notify_one(); } + { + std::lock_guard l(kv_finalize_lock); + kv_finalize_cond.notify_one(); + } for (auto osr : s) { dout(20) << __func__ << " drain " << osr << dendl; osr->drain(); @@ -8128,11 +8135,82 @@ void BlueStore::_kv_sync_thread() logger->tinc(l_bluestore_kv_commit_lat, dur_kv); logger->tinc(l_bluestore_kv_lat, dur); } - while (!kv_committing.empty()) { - TransContext *txc = kv_committing.front(); + + if (bluefs) { + if (!bluefs_gift_extents.empty()) { + _commit_bluefs_freespace(bluefs_gift_extents); + } + for (auto p = bluefs_extents_reclaiming.begin(); + p != bluefs_extents_reclaiming.end(); + ++p) { + dout(20) << __func__ << " releasing old bluefs 0x" << std::hex + << p.get_start() << "~" << p.get_len() << std::dec + << dendl; + alloc->release(p.get_start(), p.get_len()); + } + bluefs_extents_reclaiming.clear(); + } + + { + std::unique_lock m(kv_finalize_lock); + if (kv_committing_to_finalize.empty()) { + kv_committing_to_finalize.swap(kv_committing); + } else { + kv_committing_to_finalize.insert( + kv_committing_to_finalize.end(), + kv_committing.begin(), + kv_committing.end()); + kv_committing.clear(); + } + if (deferred_stable_to_finalize.empty()) { + deferred_stable_to_finalize.swap(deferred_stable); + } else { + deferred_stable_to_finalize.insert( + deferred_stable_to_finalize.end(), + deferred_stable.begin(), + deferred_stable.end()); + deferred_stable.clear(); + } + kv_finalize_cond.notify_one(); + } + + l.lock(); + // previously deferred "done" are now "stable" by virtue of this + // commit cycle. + deferred_stable_queue.swap(deferred_done); + } + } + dout(10) << __func__ << " finish" << dendl; +} + +void BlueStore::_kv_finalize_thread() +{ + deque kv_committed; + deque deferred_stable; + dout(10) << __func__ << " start" << dendl; + std::unique_lock l(kv_finalize_lock); + while (true) { + assert(kv_committed.empty()); + assert(deferred_stable.empty()); + if (kv_committing_to_finalize.empty() && + deferred_stable_to_finalize.empty()) { + if (kv_stop) + break; + dout(20) << __func__ << " sleep" << dendl; + kv_finalize_cond.wait(l); + dout(20) << __func__ << " wake" << dendl; + } else { + kv_committed.swap(kv_committing_to_finalize); + deferred_stable.swap(deferred_stable_to_finalize); + l.unlock(); + dout(20) << __func__ << " kv_committed " << kv_committed << dendl; + dout(20) << __func__ << " deferred_stable " << deferred_stable << dendl; + + while (!kv_committed.empty()) { + TransContext *txc = kv_committed.front(); assert(txc->state == TransContext::STATE_KV_SUBMITTED); _txc_state_proc(txc); - kv_committing.pop_front(); + kv_committed.pop_front(); } for (auto b : deferred_stable) { auto p = b->txcs.begin(); @@ -8143,6 +8221,7 @@ void BlueStore::_kv_sync_thread() } delete b; } + deferred_stable.clear(); if (!deferred_aggressive) { std::lock_guard l(deferred_lock); @@ -8155,25 +8234,7 @@ void BlueStore::_kv_sync_thread() // this is as good a place as any ... _reap_collections(); - if (bluefs) { - if (!bluefs_gift_extents.empty()) { - _commit_bluefs_freespace(bluefs_gift_extents); - } - for (auto p = bluefs_extents_reclaiming.begin(); - p != bluefs_extents_reclaiming.end(); - ++p) { - dout(20) << __func__ << " releasing old bluefs 0x" << std::hex - << p.get_start() << "~" << p.get_len() << std::dec - << dendl; - alloc->release(p.get_start(), p.get_len()); - } - bluefs_extents_reclaiming.clear(); - } - l.lock(); - // previously deferred "done" are now "stable" by virtue of this - // commit cycle. - deferred_stable_queue.swap(deferred_done); } } dout(10) << __func__ << " finish" << dendl; diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 50b3df8d70c7..b2cc7996fd56 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -1729,6 +1729,14 @@ public: return NULL; } }; + struct KVFinalizeThread : public Thread { + BlueStore *store; + explicit KVFinalizeThread(BlueStore *s) : store(s) {} + void *entry() { + store->_kv_finalize_thread(); + return NULL; + } + }; struct DBHistogram { struct value_dist { @@ -1809,6 +1817,12 @@ private: deque deferred_done_queue; ///< deferred ios done deque deferred_stable_queue; ///< deferred ios done + stable + KVFinalizeThread kv_finalize_thread; + std::mutex kv_finalize_lock; + std::condition_variable kv_finalize_cond; + deque kv_committing_to_finalize; ///< pending finalization + deque deferred_stable_to_finalize; ///< pending finalization + PerfCounters *logger = nullptr; std::mutex reap_lock; @@ -1968,13 +1982,20 @@ private: void _osr_unregister_all(); void _kv_sync_thread(); + void _kv_finalize_thread(); void _kv_stop() { { std::lock_guard l(kv_lock); kv_stop = true; kv_cond.notify_all(); } + { + std::lock_guard l(kv_finalize_lock); + kv_finalize_cond.notify_all(); + } + kv_sync_thread.join(); + kv_finalize_thread.join(); { std::lock_guard l(kv_lock); kv_stop = false;