From d82af301c8627abe2077eabb3e9daed5e6740f25 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 31 May 2017 18:38:36 -0400 Subject: [PATCH] os/bluestore: submit deferred if txc cleanup is blocked If we have a single deferred write, and then a uniform workload with *no* deferred writes, we will never actually submit it. Meanwhile, the txc is stuck on the osr q and nothing ever gets retired. Simple fix is to submit any deferred ops if the osr queue is blocked by a queued deferred write and the osr queue length is above some threshold. This prevents memory from being pinned indefinitely. Signed-off-by: Sage Weil --- src/common/config_opts.h | 1 + src/os/bluestore/BlueStore.cc | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 449e9a54beb..b5636883e37 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -1131,6 +1131,7 @@ OPTION(bluestore_allocator, OPT_STR, "bitmap") // stupid | bitmap OPTION(bluestore_freelist_blocks_per_key, OPT_INT, 128) OPTION(bluestore_bitmapallocator_blocks_per_zone, OPT_INT, 1024) // must be power of 2 aligned, e.g., 512, 1024, 2048... OPTION(bluestore_bitmapallocator_span_size, OPT_INT, 1024) // must be power of 2 aligned, e.g., 512, 1024, 2048... +OPTION(bluestore_max_deferred_txc, OPT_INT, 32) OPTION(bluestore_rocksdb_options, OPT_STR, "compression=kNoCompression,max_write_buffer_number=4,min_write_buffer_number_to_merge=1,recycle_log_file_num=4,write_buffer_size=268435456,writable_file_max_buffer_size=0,compaction_readahead_size=2097152") OPTION(bluestore_fsck_on_mount, OPT_BOOL, false) OPTION(bluestore_fsck_on_mount_deep, OPT_BOOL, true) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 13d1882e80f..56609a34138 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -7800,6 +7800,7 @@ void BlueStore::_txc_finish(TransContext *txc) OpSequencerRef osr = txc->osr; CollectionRef c; bool empty = false; + bool submit_deferred = false; OpSequencer::q_list_t releasing_txc; { std::lock_guard l(osr->qlock); @@ -7815,6 +7816,10 @@ void BlueStore::_txc_finish(TransContext *txc) // for _osr_drain_preceding() notify = true; } + if (txc->state == TransContext::STATE_DEFERRED_QUEUED && + osr->q.size() > g_conf->bluestore_max_deferred_txc) { + submit_deferred = true; + } break; } @@ -7848,6 +7853,11 @@ void BlueStore::_txc_finish(TransContext *txc) c->trim_cache(); } + if (submit_deferred) { + // we're pinning memory; flush! we could be more fine-grained here but + // i'm not sure it's worth the bother. + deferred_try_submit(); + } if (empty && osr->zombie) { dout(10) << __func__ << " reaping empty zombie osr " << osr << dendl; -- 2.39.5