]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: batch up to bluestore_deferred_batch_ops before submitting
authorSage Weil <sage@redhat.com>
Thu, 9 Mar 2017 19:17:47 +0000 (14:17 -0500)
committerSage Weil <sage@redhat.com>
Tue, 21 Mar 2017 18:56:29 +0000 (13:56 -0500)
Allow several deferred writes to accumulate before we submit them.  In
general we have no time pressure, and on HDD (and perhaps sometimes SSD)
it is beneficial to accumulate and batch these so that they result in
fewer seeks.  On HDD, this is particularly true of seeks away from the
journal.  And on sequential workloads this can avoid seeks.  In may even
allow the block layer or SSD firmware to merge IOs and perform fewer
writes.

Signed-off-by: Sage Weil <sage@redhat.com>
src/common/config_opts.h
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h

index 27f8be5bd51baf6f3ec6bd50b79e0802bc76c960..f29940b8a79ad2f8c4c122cf2353733ec3413b12 100644 (file)
@@ -1108,6 +1108,7 @@ OPTION(bluestore_max_ops, OPT_U64, 512)
 OPTION(bluestore_max_bytes, OPT_U64, 64*1024*1024)
 OPTION(bluestore_deferred_max_ops, OPT_U64, 512)
 OPTION(bluestore_deferred_max_bytes, OPT_U64, 128*1024*1024)
+OPTION(bluestore_deferred_batch_ops, OPT_U64, 8)
 OPTION(bluestore_nid_prealloc, OPT_INT, 1024)
 OPTION(bluestore_blobid_prealloc, OPT_U64, 10240)
 OPTION(bluestore_clone_cow, OPT_BOOL, true)  // do copy-on-write for clones
index c47894fb65f863078a1f712d724aaa464a546722..1fa2ef1d1ed972566266095312819b92e4982456 100644 (file)
@@ -7441,7 +7441,12 @@ void BlueStore::_osr_drain_all()
     s = osr_set;
   }
 
-  deferred_aggressive_cleanup = true;
+  deferred_aggressive = true;
+  {
+    // submit anything pending
+    std::lock_guard<std::mutex> l(deferred_lock);
+    _deferred_try_submit();
+  }
   {
     // wake up any previously finished deferred events
     std::lock_guard<std::mutex> l(kv_lock);
@@ -7451,7 +7456,7 @@ void BlueStore::_osr_drain_all()
     dout(20) << __func__ << " drain " << osr << dendl;
     osr->drain();
   }
-  deferred_aggressive_cleanup = false;
+  deferred_aggressive = false;
 
   dout(10) << __func__ << " done" << dendl;
 }
@@ -7609,6 +7614,13 @@ void BlueStore::_kv_sync_thread()
        deferred_cleaning.pop_front();
       }
 
+      if (!deferred_aggressive) {
+       std::lock_guard<std::mutex> l(deferred_lock);
+       if (deferred_queue_size >= (int)g_conf->bluestore_deferred_batch_ops) {
+         _deferred_try_submit();
+       }
+      }
+
       // this is as good a place as any ...
       _reap_collections();
 
@@ -7652,14 +7664,17 @@ void BlueStore::_deferred_queue(TransContext *txc)
     deferred_queue.push_back(*txc->osr);
   }
   txc->osr->deferred_pending.push_back(*txc);
-  if (txc->osr->deferred_running.empty()) {
+  ++deferred_queue_size;
+  if (deferred_aggressive &&
+      txc->osr->deferred_running.empty()) {
     _deferred_try_submit(txc->osr.get());
   }
 }
 
 void BlueStore::_deferred_try_submit()
 {
-  dout(20) << __func__ << " " << deferred_queue.size() << " osrs" << dendl;
+  dout(20) << __func__ << " " << deferred_queue.size() << " osrs, "
+          << deferred_queue_size << " txcs" << dendl;
   for (auto& osr : deferred_queue) {
     if (osr.deferred_running.empty()) {
       _deferred_try_submit(&osr);
@@ -7671,8 +7686,12 @@ void BlueStore::_deferred_try_submit(OpSequencer *osr)
 {
   dout(10) << __func__ << " osr " << osr << " " << osr->deferred_pending.size()
           << " pending " << dendl;
+  assert(!osr->deferred_pending.empty());
   assert(osr->deferred_running.empty());
-  osr->deferred_pending.swap(osr->deferred_running);
+
+  deferred_queue_size -= osr->deferred_pending.size();
+  assert(deferred_queue_size >= 0);
+  osr->deferred_running.swap(osr->deferred_pending);
 
   // attach all IO to the last in the batch
   TransContext *last = &osr->deferred_running.back();
@@ -7729,11 +7748,11 @@ int BlueStore::_deferred_finish(TransContext *txc)
     assert(txc->osr->deferred_txc == txc);
     txc->osr->deferred_blocks.clear();
     finished.swap(txc->osr->deferred_running);
-    if (!txc->osr->deferred_pending.empty()) {
-      _deferred_try_submit(txc->osr.get());
-    } else {
+    if (txc->osr->deferred_pending.empty()) {
       auto q = deferred_queue.iterator_to(*txc->osr);
       deferred_queue.erase(q);
+    } else if (deferred_aggressive) {
+      _deferred_try_submit(txc->osr.get());
     }
   }
 
@@ -7751,7 +7770,7 @@ int BlueStore::_deferred_finish(TransContext *txc)
 
   // in the normal case, do not bother waking up the kv thread; it will
   // catch us on the next commit anyway.
-  if (deferred_aggressive_cleanup) {
+  if (deferred_aggressive) {
     kv_cond.notify_one();
   }
   return 0;
index 3f316082e0bd75809ec30af8c618e71164703a6f..8790e04bfc9e335fa939daa586c0f6d8bc21eac2 100644 (file)
@@ -1684,8 +1684,9 @@ private:
 
   std::mutex deferred_lock;
   std::atomic<uint64_t> deferred_seq = {0};
-  deferred_osr_queue_t deferred_queue;      ///< osr's with deferred io pending
-  bool deferred_aggressive_cleanup = false; ///< aggressive wakeup of kv thread
+  deferred_osr_queue_t deferred_queue; ///< osr's with deferred io pending
+  int deferred_queue_size = 0;         ///< num txc's queued across all osrs
+  bool deferred_aggressive = false;    ///< aggressive wakeup of kv thread
 
   int m_finisher_num = 1;
   vector<Finisher*> finishers;