]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: wait for wal op that wrote cached tail before using
authorSage Weil <sage@redhat.com>
Mon, 21 Mar 2016 19:45:57 +0000 (15:45 -0400)
committerSage Weil <sage@redhat.com>
Wed, 30 Mar 2016 15:23:14 +0000 (11:23 -0400)
If the cached tail is written by a WAL op, we need to wait for it to
apply before we use the cached tail.

Signed-off-by: Sage Weil <sage@redhat.com>
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h

index 16464cdf419ed7892b7f95f4f21372faa01a7ebf..9a18964dc65c98124676c2c07b40c60e5ceb1777 100644 (file)
@@ -4075,8 +4075,10 @@ int BlueStore::_wal_finish(TransContext *txc)
   bluestore_wal_transaction_t& wt = *txc->wal_txn;
   dout(20) << __func__ << " txc " << " seq " << wt.seq << txc << dendl;
 
+  std::lock_guard<std::mutex> l2(txc->osr->qlock);
   std::lock_guard<std::mutex> l(kv_lock);
   txc->state = TransContext::STATE_WAL_CLEANUP;
+  txc->osr->qcond.notify_all();
   wal_cleanup_queue.push_back(txc);
   kv_cond.notify_one();
   return 0;
@@ -5542,6 +5544,8 @@ int BlueStore::_do_write(
        (offset / block_size == (o->onode.size - 1) / block_size)) {
       dout(20) << __func__ << " using cached tail" << dendl;
       assert((offset & block_mask) == (o->onode.size & block_mask));
+      // wait for any related wal writes to commit
+      txc->osr->wait_for_wal_on_seq(o->tail_txc_seq);
       uint64_t tail_off = offset % block_size;
       if (tail_off >= o->tail_bl.length()) {
        bufferlist t;
index ffd48fc639cf5c737ce4e8382a9a410ba2430914..edbd58b08b13134f25179268f462e74cf6260257 100644 (file)
@@ -405,6 +405,28 @@ public:
       txc->oncommits.push_back(c);
       return false;
     }
+
+    /// if there is a wal on @seq, wait for it to apply
+    void wait_for_wal_on_seq(uint64_t seq) {
+      std::unique_lock<std::mutex> l(qlock);
+      restart:
+      for (OpSequencer::q_list_t::reverse_iterator p = q.rbegin();
+          p != q.rend();
+          ++p) {
+       if (p->seq == seq) {
+         TransContext *txc = &(*p);
+         if (txc->wal_txn) {
+           while (txc->state < TransContext::STATE_WAL_CLEANUP) {
+             txc->osr->qcond.wait(l);
+             goto restart;  // txc may have gone away
+           }
+         }
+         break;
+       }
+       if (p->seq < seq)
+         break;
+      }
+    }
   };
 
   class WALWQ : public ThreadPool::WorkQueue<TransContext> {