From: Sage Weil Date: Mon, 21 Mar 2016 19:45:57 +0000 (-0400) Subject: os/bluestore: wait for wal op that wrote cached tail before using X-Git-Tag: v10.1.1~28^2~19 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3084be5defa7eb29cee868d63ca6eeccdc094c2a;p=ceph.git os/bluestore: wait for wal op that wrote cached tail before using If the cached tail is written by a WAL op, we need to wait for it to apply before we use the cached tail. Signed-off-by: Sage Weil --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 16464cdf419e..9a18964dc65c 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -4075,8 +4075,10 @@ int BlueStore::_wal_finish(TransContext *txc) bluestore_wal_transaction_t& wt = *txc->wal_txn; dout(20) << __func__ << " txc " << " seq " << wt.seq << txc << dendl; + std::lock_guard l2(txc->osr->qlock); std::lock_guard l(kv_lock); txc->state = TransContext::STATE_WAL_CLEANUP; + txc->osr->qcond.notify_all(); wal_cleanup_queue.push_back(txc); kv_cond.notify_one(); return 0; @@ -5542,6 +5544,8 @@ int BlueStore::_do_write( (offset / block_size == (o->onode.size - 1) / block_size)) { dout(20) << __func__ << " using cached tail" << dendl; assert((offset & block_mask) == (o->onode.size & block_mask)); + // wait for any related wal writes to commit + txc->osr->wait_for_wal_on_seq(o->tail_txc_seq); uint64_t tail_off = offset % block_size; if (tail_off >= o->tail_bl.length()) { bufferlist t; diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index ffd48fc639cf..edbd58b08b13 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -405,6 +405,28 @@ public: txc->oncommits.push_back(c); return false; } + + /// if there is a wal on @seq, wait for it to apply + void wait_for_wal_on_seq(uint64_t seq) { + std::unique_lock l(qlock); + restart: + for (OpSequencer::q_list_t::reverse_iterator p = q.rbegin(); + p != q.rend(); + ++p) { + if (p->seq == seq) { + TransContext *txc = &(*p); + if (txc->wal_txn) { + while (txc->state < TransContext::STATE_WAL_CLEANUP) { + txc->osr->qcond.wait(l); + goto restart; // txc may have gone away + } + } + break; + } + if (p->seq < seq) + break; + } + } }; class WALWQ : public ThreadPool::WorkQueue {