From ff8833d0a0d97ae3ce249f10df40efc4195fabc2 Mon Sep 17 00:00:00 2001 From: Radoslaw Zarzynski Date: Wed, 11 Jul 2018 01:14:54 +0200 Subject: [PATCH] os/bluestore: fix races on SharedBlob::coll in ~SharedBlob. Example scenario: ``` A: BlueStore::Collection::split_cache(src, dest) A: std::lock(src->cache->lock, dest->cache->lock) B: SharedBlob::~SharedBlob B: waits coll->cache->lock // coll == src A: sb->coll := dest A: unlocks both src and dest's cache mutexes C: // any locked operation on dest's cache C: acquires dest->cache->lock C: begins the op B: // with the src's cache mutex acquired B: BufferSpace::_clear(coll->cache) // coll == dest B: // oops, B operates on cache already locked by C ``` Fixes: http://tracker.ceph.com/issues/24859 Signed-off-by: Radoslaw Zarzynski (cherry picked from commit d1b16749c7d32eead6e326c492ef9edf0d03a2b8) --- src/os/bluestore/BlueStore.cc | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 6fdd11b170b24..1da1b40bb08c0 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -1650,11 +1650,6 @@ BlueStore::SharedBlob::SharedBlob(uint64_t i, Collection *_coll) BlueStore::SharedBlob::~SharedBlob() { - if (get_cache()) { // the dummy instances have a nullptr - std::lock_guard l(get_cache()->lock); - bc._clear(get_cache()); - get_cache()->rm_blob(); - } if (loaded && persistent) { delete persistent; } @@ -1666,8 +1661,17 @@ void BlueStore::SharedBlob::put() ldout(coll->store->cct, 20) << __func__ << " " << this << " removing self from set " << get_parent() << dendl; - if (get_parent()) { - get_parent()->remove(this); + again: + auto coll_snap = coll; + if (coll_snap) { + std::lock_guard l(coll_snap->cache->lock); + if (coll_snap != coll) { + goto again; + } + coll_snap->shared_blob_set.remove(this); + + bc._clear(coll_snap->cache); + coll_snap->cache->rm_blob(); } delete this; } -- 2.39.5