]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: submit deferred if txc cleanup is blocked
authorSage Weil <sage@redhat.com>
Wed, 31 May 2017 22:38:36 +0000 (18:38 -0400)
committerSage Weil <sage@redhat.com>
Thu, 1 Jun 2017 16:28:43 +0000 (12:28 -0400)
If we have a single deferred write, and then a uniform workload with *no*
deferred writes, we will never actually submit it.  Meanwhile, the txc is
stuck on the osr q and nothing ever gets retired.

Simple fix is to submit any deferred ops if the osr queue is blocked by
a queued deferred write and the osr queue length is above some
threshold.  This prevents memory from being pinned indefinitely.

Signed-off-by: Sage Weil <sage@redhat.com>
src/common/config_opts.h
src/os/bluestore/BlueStore.cc

index 449e9a54beb8764118b44f2af70e6494df5c3797..b5636883e3762e643a423a28c425e880e5b6ab06 100644 (file)
@@ -1131,6 +1131,7 @@ OPTION(bluestore_allocator, OPT_STR, "bitmap")     // stupid | bitmap
 OPTION(bluestore_freelist_blocks_per_key, OPT_INT, 128)
 OPTION(bluestore_bitmapallocator_blocks_per_zone, OPT_INT, 1024) // must be power of 2 aligned, e.g., 512, 1024, 2048...
 OPTION(bluestore_bitmapallocator_span_size, OPT_INT, 1024) // must be power of 2 aligned, e.g., 512, 1024, 2048...
+OPTION(bluestore_max_deferred_txc, OPT_INT, 32)
 OPTION(bluestore_rocksdb_options, OPT_STR, "compression=kNoCompression,max_write_buffer_number=4,min_write_buffer_number_to_merge=1,recycle_log_file_num=4,write_buffer_size=268435456,writable_file_max_buffer_size=0,compaction_readahead_size=2097152")
 OPTION(bluestore_fsck_on_mount, OPT_BOOL, false)
 OPTION(bluestore_fsck_on_mount_deep, OPT_BOOL, true)
index 13d1882e80f64798441536db8bc89bccd7fb5e86..56609a34138dbcf22aff389245d8e66a0e1e263c 100644 (file)
@@ -7800,6 +7800,7 @@ void BlueStore::_txc_finish(TransContext *txc)
   OpSequencerRef osr = txc->osr;
   CollectionRef c;
   bool empty = false;
+  bool submit_deferred = false;
   OpSequencer::q_list_t releasing_txc;
   {
     std::lock_guard<std::mutex> l(osr->qlock);
@@ -7815,6 +7816,10 @@ void BlueStore::_txc_finish(TransContext *txc)
          // for _osr_drain_preceding()
           notify = true;
        }
+       if (txc->state == TransContext::STATE_DEFERRED_QUEUED &&
+           osr->q.size() > g_conf->bluestore_max_deferred_txc) {
+         submit_deferred = true;
+       }
         break;
       }
 
@@ -7848,6 +7853,11 @@ void BlueStore::_txc_finish(TransContext *txc)
     c->trim_cache();
   }
 
+  if (submit_deferred) {
+    // we're pinning memory; flush!  we could be more fine-grained here but
+    // i'm not sure it's worth the bother.
+    deferred_try_submit();
+  }
 
   if (empty && osr->zombie) {
     dout(10) << __func__ << " reaping empty zombie osr " << osr << dendl;