From 4a51626174297a5fa3e98d3ffe9d9fe9b20ee014 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 13 Aug 2018 12:10:31 -0500 Subject: [PATCH] os/bluestore: fix split vs finish_write race In _tcx_finish(), we were looking at the right Cache for the collection, and then calling finish_write with that Cache and taking the lock. This could race with a split_cache() such that after we got the lock the collection was not on a different cache. This would in turn lead to a failed assertion later on in _rm_buffer when the sharedblob was trimmed. Fixes: http://tracker.ceph.com/issues/24439 Signed-off-by: Sage Weil (cherry picked from commit f21f1f14f2d2a465ba072118bd8e32271bf8906e) Signed-off-by: Jonathan Brielmaier --- src/os/bluestore/BlueStore.cc | 23 +++++++++++++++++++---- src/os/bluestore/BlueStore.h | 4 +++- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index b326d39bcd2e..9b5fcface411 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -1370,10 +1370,8 @@ void BlueStore::BufferSpace::read( cache->logger->inc(l_bluestore_buffer_miss_bytes, miss_bytes); } -void BlueStore::BufferSpace::finish_write(Cache* cache, uint64_t seq) +void BlueStore::BufferSpace::_finish_write(Cache* cache, uint64_t seq) { - std::lock_guard l(cache->lock); - auto i = writing.begin(); while (i != writing.end()) { if (i->seq > seq) { @@ -1648,6 +1646,23 @@ void BlueStore::SharedBlob::put_ref(uint64_t offset, uint32_t length, } } +void BlueStore::SharedBlob::finish_write(uint64_t seq) +{ + while (true) { + Cache *cache = coll->cache; + std::lock_guard l(cache->lock); + if (coll->cache != cache) { + ldout(coll->store->cct, 20) << __func__ + << " raced with sb cache update, was " << cache + << ", now " << coll->cache << ", retrying" + << dendl; + continue; + } + bc._finish_write(cache, seq); + break; + } +} + // SharedBlobSet #undef dout_prefix @@ -8398,7 +8413,7 @@ void BlueStore::_txc_finish(TransContext *txc) assert(txc->state == TransContext::STATE_FINISHING); for (auto& sb : txc->shared_blobs_written) { - sb->bc.finish_write(sb->get_cache(), txc->seq); + sb->finish_write(txc->seq); } txc->shared_blobs_written.clear(); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 6a14042df107..3c81ebc0b9c0 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -333,7 +333,7 @@ public: b->cache_private = _discard(cache, offset, bl.length()); _add_buffer(cache, b, (flags & Buffer::FLAG_NOCACHE) ? 0 : 1, nullptr); } - void finish_write(Cache* cache, uint64_t seq); + void _finish_write(Cache* cache, uint64_t seq); void did_read(Cache* cache, uint32_t offset, bufferlist& bl) { std::lock_guard l(cache->lock); Buffer *b = new Buffer(this, Buffer::STATE_CLEAN, 0, offset, bl); @@ -410,6 +410,8 @@ public: void put_ref(uint64_t offset, uint32_t length, PExtentVector *r, set *maybe_unshared_blobs); + void finish_write(uint64_t seq); + friend bool operator==(const SharedBlob &l, const SharedBlob &r) { return l.get_sbid() == r.get_sbid(); } -- 2.47.3