From 36f8377f93cb18ef2775e293697b16d4cd97afa7 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Tue, 2 Nov 2021 15:03:39 +0300 Subject: [PATCH] os/bluestore: avoid premature onode release. This was observed when onode's removal is followed by reading and the latter causes object release before the removal is finalized. The root cause is an improper 'pinned' state assessment in Onode::get More detailed overview is: At some point Onode::get() might face the case when nref == 2 and pinned = true which means parallel incomplete put is running on the onode - ref count is decremented but pinned state is still unmodified (and even lock hasn't been acquired yet). This might finally result in two puts racing over the same onode with nref == 2 which finally results in a premature onode release: // nref =3, pinned = 1 // Thread 1 Thread 2 // o->put() o->get() // --nref(n = 2, pinned=1) // nref++ (n=3, pinned = 1) // return // ... // o->put() // --nref(n = 2) // pinned = 0, // --nref(n = 1) // ocs->_unpin_and_rm(o) -> o->put() // ... // --nref(n = 0) // release o // o->c->get_onode_cache() // FAULT! // The suggested fix is to introduce additional atomic counter tracking running put() functions. And permit onode release when both regular nref and put_nref are both equal to zero. Fixes: https://tracker.ceph.com/issues/53002 Signed-off-by: Igor Fedotov (cherry picked from commit 96f0efe6d5307a55bea32f7216ef9511da0c5a47) --- src/os/bluestore/BlueStore.cc | 10 +++++----- src/os/bluestore/BlueStore.h | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 12fed048f88e0..f534508fd9615 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -3577,6 +3577,7 @@ void BlueStore::Onode::get() { } } void BlueStore::Onode::put() { + ++put_nref; int n = --nref; if (n == 2) { OnodeCacheShard* ocs = c->get_onode_cache(); @@ -3596,19 +3597,18 @@ void BlueStore::Onode::put() { ocs->_unpin(this); } else { ocs->_unpin_and_rm(this); - // remove will also decrement nref and delete Onode + // remove will also decrement nref c->onode_map._remove(oid); } } // additional decrement for newly unpinned instance - // should be the last action since Onode can be released - // at any point after this decrement if (need_unpin) { - n = --nref; + --nref; } ocs->lock.unlock(); } - if (n == 0) { + auto pn = --put_nref; + if (nref == 0 && pn == 0) { delete this; } } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 850dfdbdc0298..3c4f2b08f90a2 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -1058,6 +1058,7 @@ public: MEMPOOL_CLASS_HELPERS(); std::atomic_int nref; ///< reference count + std::atomic_int put_nref = {0}; Collection *c; ghobject_t oid; -- 2.39.5