From 8c8944b2c45ca9dc5b8fd4db1590e1d24206c0b3 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 21 May 2018 10:06:37 -0500 Subject: [PATCH] os/bluestore: simplify and fix SharedBlob::put() There is a narrow race possible: A: lookup foo A: put on foo A: foo --nref == 0 B: lookup foo B: put foo B: foo --nref == 0 B: try_remove() succeeds, removes A: try_remove() tries to remove foo again, probably crashes We could fix this by flagging the object in some way to indicate it was removed (maybe clearing parent?), but then we need to be careful about dereferencing foo to get parent from put(). Fix this by moving to a simpler model: make lookup fail if nref == 0. This eliminates the races around put() entirely because once nref reaches 0 it never goes up again. Fixes: http://tracker.ceph.com/issues/24211 Signed-off-by: Sage Weil --- src/os/bluestore/BlueStore.cc | 11 ++--------- src/os/bluestore/BlueStore.h | 14 ++++++-------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index e8da12cb65513..0e847177d5c9c 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -1672,16 +1672,9 @@ void BlueStore::SharedBlob::put() << " removing self from set " << get_parent() << dendl; if (get_parent()) { - if (get_parent()->try_remove(this)) { - delete this; - } else { - ldout(coll->store->cct, 20) - << __func__ << " " << this << " lost race to remove myself from set" - << dendl; - } - } else { - delete this; + get_parent()->remove_last(this); } + delete this; } } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 7903fec564ce4..46e9380eaf0af 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -437,7 +437,8 @@ public: SharedBlobRef lookup(uint64_t sbid) { std::lock_guard l(lock); auto p = sb_map.find(sbid); - if (p == sb_map.end()) { + if (p == sb_map.end() || + p->second->nref == 0) { return nullptr; } return p->second; @@ -449,14 +450,11 @@ public: sb->coll = coll; } - bool try_remove(SharedBlob *sb) { + void remove_last(SharedBlob *sb) { std::lock_guard l(lock); - if (sb->nref == 0) { - assert(sb->get_parent() == this); - sb_map.erase(sb->get_sbid()); - return true; - } - return false; + assert(sb->nref == 0); + assert(sb->get_parent() == this); + sb_map.erase(sb->get_sbid()); } void remove(SharedBlob *sb) { -- 2.39.5