]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
Revert "os/bluestore: allow multiple DeferredBatches in flight at once" 16900/head
authorSage Weil <sage@redhat.com>
Tue, 8 Aug 2017 13:23:31 +0000 (09:23 -0400)
committerSage Weil <sage@redhat.com>
Tue, 8 Aug 2017 13:23:31 +0000 (09:23 -0400)
This reverts commit ca32d575eb2673737198a63643d5d1923151eba3.

If we have multiple batches in flight then we have to worry about writes
to the same blocks reordering.

Also, 3c6a6c46d5808d6c42ed4dcfb441bad64366686b is sufficient to avoid the
stall/deadlock in http://tracker.ceph.com/issues/20295.

# Conflicts:
# src/os/bluestore/BlueStore.cc

Fixes: http://tracker.ceph.com/issues/20925
Signed-off-by: Sage Weil <sage@redhat.com>
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h

index 0dbb2ca67537080ccd5d1026a578ab717c983170..e74ea82c1a79f0367d7a5229a69cd437acc8655e 100644 (file)
@@ -8073,7 +8073,7 @@ void BlueStore::_txc_finish(TransContext *txc)
   if (submit_deferred) {
     // we're pinning memory; flush!  we could be more fine-grained here but
     // i'm not sure it's worth the bother.
-    deferred_submit_all();
+    deferred_try_submit();
   }
 
   if (empty && osr->zombie) {
@@ -8136,7 +8136,7 @@ void BlueStore::_osr_drain_all()
   ++deferred_aggressive;
   {
     // submit anything pending
-    deferred_submit_all();
+    deferred_try_submit();
   }
   {
     // wake up any previously finished deferred events
@@ -8546,7 +8546,7 @@ void BlueStore::_kv_finalize_thread()
       if (!deferred_aggressive) {
        if (deferred_queue_size >= deferred_batch_ops.load() ||
            throttle_deferred_bytes.past_midpoint()) {
-         deferred_submit_all();
+         deferred_try_submit();
        }
       }
 
@@ -8575,7 +8575,7 @@ void BlueStore::_deferred_queue(TransContext *txc)
   dout(20) << __func__ << " txc " << txc << " osr " << txc->osr << dendl;
   deferred_lock.lock();
   if (!txc->osr->deferred_pending &&
-      txc->osr->deferred_running.empty()) {
+      !txc->osr->deferred_running) {
     deferred_queue.push_back(*txc->osr);
   }
   if (!txc->osr->deferred_pending) {
@@ -8593,14 +8593,15 @@ void BlueStore::_deferred_queue(TransContext *txc)
        cct, wt.seq, e.offset, e.length, p);
     }
   }
-  if (deferred_aggressive) {
+  if (deferred_aggressive &&
+      !txc->osr->deferred_running) {
     _deferred_submit_unlock(txc->osr.get());
   } else {
     deferred_lock.unlock();
   }
 }
 
-void BlueStore::deferred_submit_all()
+void BlueStore::deferred_try_submit()
 {
   dout(20) << __func__ << " " << deferred_queue.size() << " osrs, "
           << deferred_queue_size << " txcs" << dendl;
@@ -8611,7 +8612,7 @@ void BlueStore::deferred_submit_all()
     osrs.push_back(&osr);
   }
   for (auto& osr : osrs) {
-    if (osr->deferred_pending) {
+    if (osr->deferred_pending && !osr->deferred_running) {
       _deferred_submit_unlock(osr.get());
       deferred_lock.lock();
     }
@@ -8620,15 +8621,17 @@ void BlueStore::deferred_submit_all()
 
 void BlueStore::_deferred_submit_unlock(OpSequencer *osr)
 {
-  auto b = osr->deferred_pending;
-  dout(10) << __func__ << " osr " << osr << " b " << b
+  dout(10) << __func__ << " osr " << osr
           << " " << osr->deferred_pending->iomap.size() << " ios pending "
           << dendl;
-  assert(b);
+  assert(osr->deferred_pending);
+  assert(!osr->deferred_running);
+
+  auto b = osr->deferred_pending;
   deferred_queue_size -= b->seq_bytes.size();
   assert(deferred_queue_size >= 0);
 
-  osr->deferred_running.push_back(osr->deferred_pending);
+  osr->deferred_running = osr->deferred_pending;
   osr->deferred_pending = nullptr;
 
   uint64_t start = 0, pos = 0;
@@ -8671,21 +8674,24 @@ void BlueStore::_deferred_submit_unlock(OpSequencer *osr)
   bdev->aio_submit(&b->ioc);
 }
 
-void BlueStore::_deferred_aio_finish(DeferredBatch *b)
+void BlueStore::_deferred_aio_finish(OpSequencer *osr)
 {
-  OpSequencer *osr = b->osr;
-  dout(10) << __func__ << " osr " << osr << " b " << b << dendl;
+  dout(10) << __func__ << " osr " << osr << dendl;
+  assert(osr->deferred_running);
+  DeferredBatch *b = osr->deferred_running;
 
   {
     std::lock_guard<std::mutex> l(deferred_lock);
-    auto p = std::find(osr->deferred_running.begin(),
-                      osr->deferred_running.end(),
-                      b);
-    assert(p != osr->deferred_running.end());
-    osr->deferred_running.erase(p);
-    if (osr->deferred_running.empty() && !osr->deferred_pending) {
+    assert(osr->deferred_running == b);
+    osr->deferred_running = nullptr;
+    if (!osr->deferred_pending) {
       auto q = deferred_queue.iterator_to(*osr);
       deferred_queue.erase(q);
+    } else if (deferred_aggressive) {
+      dout(20) << __func__ << " queuing async deferred_try_submit" << dendl;
+      finishers[0]->queue(new FunctionContext([&](int) {
+           deferred_try_submit();
+         }));
     }
   }
 
@@ -8822,7 +8828,7 @@ int BlueStore::queue_transactions(
       dout(10) << __func__ << " failed get throttle_deferred_bytes, aggressive"
               << dendl;
       ++deferred_aggressive;
-      deferred_submit_all();
+      deferred_try_submit();
       throttle_deferred_bytes.get(txc->cost);
       --deferred_aggressive;
    }
index ca996759d81d9711eaef0e0eb779df122969a935..d7a1980320ebb16151784804f71124919f802977 100644 (file)
@@ -1629,7 +1629,7 @@ public:
                       bufferlist::const_iterator& p);
 
     void aio_finish(BlueStore *store) override {
-      store->_deferred_aio_finish(this);
+      store->_deferred_aio_finish(osr);
     }
   };
 
@@ -1647,7 +1647,7 @@ public:
 
     boost::intrusive::list_member_hook<> deferred_osr_queue_item;
 
-    deque<DeferredBatch*> deferred_running;
+    DeferredBatch *deferred_running = nullptr;
     DeferredBatch *deferred_pending = nullptr;
 
     Sequencer *parent;
@@ -2035,9 +2035,9 @@ private:
 
   bluestore_deferred_op_t *_get_deferred_op(TransContext *txc, OnodeRef o);
   void _deferred_queue(TransContext *txc);
-  void deferred_submit_all();
+  void deferred_try_submit();
   void _deferred_submit_unlock(OpSequencer *osr);
-  void _deferred_aio_finish(DeferredBatch *b);
+  void _deferred_aio_finish(OpSequencer *osr);
   int _deferred_replay();
 
 public: