From d4209863b7f136ea423969792c8d3bad2cf0145d Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 3 Nov 2016 10:33:23 -0400 Subject: [PATCH] os/bluestore: fix _split_collections race with osr_reap The SharedBlobSet may not yet be empty at split time because previous transactions may not have been reaped, leaving some Blobs still alive even after the cache refs are cleared. Drop them explicitly here (they will go away shortly). Signed-off-by: Sage Weil --- src/os/bluestore/BlueStore.cc | 23 +++++++++++++---- src/os/bluestore/BlueStore.h | 9 +++++++ src/test/objectstore/store_test.cc | 40 ++++++++++++++++++++++-------- 3 files changed, 57 insertions(+), 15 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 2212774c11864..0791b6a917739 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -1143,6 +1143,12 @@ void BlueStore::OnodeSpace::clear() onode_map.clear(); } +bool BlueStore::OnodeSpace::empty() +{ + std::lock_guard l(cache->lock); + return onode_map.empty(); +} + void BlueStore::OnodeSpace::rename(OnodeRef& oldo, const ghobject_t& old_oid, const ghobject_t& new_oid, @@ -8922,17 +8928,24 @@ int BlueStore::_split_collection(TransContext *txc, { dout(15) << __func__ << " " << c->cid << " to " << d->cid << " " << " bits " << bits << dendl; - int r; RWLock::WLocker l(c->lock); RWLock::WLocker l2(d->lock); - // blow away the caches. FIXME. + // blow away src cache c->onode_map.clear(); - d->onode_map.clear(); - assert(c->shared_blob_set.empty()); + // We have an awkward race here: previous pipelinex transactions may + // still reference blobs and their shared_blobs. They will be flushed + // shortly by _osr_reap_done, but it's awkward to block for that (and + // a waste of time). Instead, explicitly remove them from the shared blob + // map. + c->shared_blob_set.violently_clear(); + + // the destination should be empty. + assert(d->onode_map.empty()); assert(d->shared_blob_set.empty()); + // adjust bits c->cnode.bits = bits; assert(d->cnode.bits == bits); r = 0; @@ -8943,7 +8956,7 @@ int BlueStore::_split_collection(TransContext *txc, dout(10) << __func__ << " " << c->cid << " to " << d->cid << " " << " bits " << bits << " = " << r << dendl; - return r; + return 0; } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index ea2ce8d685b6c..53ea198a0c575 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -393,6 +393,14 @@ public: std::lock_guard l(lock); return sb_map.empty(); } + + void violently_clear() { + std::lock_guard l(lock); + for (auto& p : sb_map) { + p.second->parent_set = nullptr; + } + sb_map.clear(); + } }; /// in-memory blob metadata and associated cached buffers (if any) @@ -1004,6 +1012,7 @@ public: const ghobject_t& new_oid, const string& new_okey); void clear(); + bool empty(); /// return true if f true for any item bool map_any(std::function f); diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc index d615355fc3def..0bf4b1585f020 100644 --- a/src/test/objectstore/store_test.cc +++ b/src/test/objectstore/store_test.cc @@ -5107,7 +5107,8 @@ TEST_P(StoreTest, XattrTest) { void colsplittest( ObjectStore *store, unsigned num_objects, - unsigned common_suffix_size + unsigned common_suffix_size, + bool clones ) { ObjectStore::Sequencer osr("test"); coll_t cid(spg_t(pg_t(0,52),shard_id_t::NO_SHARD)); @@ -5119,17 +5120,30 @@ void colsplittest( r = apply_transaction(store, &osr, std::move(t)); ASSERT_EQ(r, 0); } + bufferlist small; + small.append("small"); { ObjectStore::Transaction t; - for (uint32_t i = 0; i < 2*num_objects; ++i) { + for (uint32_t i = 0; i < (2 - (int)clones)*num_objects; ++i) { stringstream objname; objname << "obj" << i; - t.touch(cid, ghobject_t(hobject_t( - objname.str(), - "", - CEPH_NOSNAP, - i<