From: Sage Weil Date: Thu, 3 Nov 2016 14:33:23 +0000 (-0400) Subject: os/bluestore: fix _split_collections race with osr_reap X-Git-Tag: v11.1.0~434^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d4209863b7f136ea423969792c8d3bad2cf0145d;p=ceph.git os/bluestore: fix _split_collections race with osr_reap The SharedBlobSet may not yet be empty at split time because previous transactions may not have been reaped, leaving some Blobs still alive even after the cache refs are cleared. Drop them explicitly here (they will go away shortly). Signed-off-by: Sage Weil --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 2212774c1186..0791b6a91773 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -1143,6 +1143,12 @@ void BlueStore::OnodeSpace::clear() onode_map.clear(); } +bool BlueStore::OnodeSpace::empty() +{ + std::lock_guard l(cache->lock); + return onode_map.empty(); +} + void BlueStore::OnodeSpace::rename(OnodeRef& oldo, const ghobject_t& old_oid, const ghobject_t& new_oid, @@ -8922,17 +8928,24 @@ int BlueStore::_split_collection(TransContext *txc, { dout(15) << __func__ << " " << c->cid << " to " << d->cid << " " << " bits " << bits << dendl; - int r; RWLock::WLocker l(c->lock); RWLock::WLocker l2(d->lock); - // blow away the caches. FIXME. + // blow away src cache c->onode_map.clear(); - d->onode_map.clear(); - assert(c->shared_blob_set.empty()); + // We have an awkward race here: previous pipelinex transactions may + // still reference blobs and their shared_blobs. They will be flushed + // shortly by _osr_reap_done, but it's awkward to block for that (and + // a waste of time). Instead, explicitly remove them from the shared blob + // map. + c->shared_blob_set.violently_clear(); + + // the destination should be empty. + assert(d->onode_map.empty()); assert(d->shared_blob_set.empty()); + // adjust bits c->cnode.bits = bits; assert(d->cnode.bits == bits); r = 0; @@ -8943,7 +8956,7 @@ int BlueStore::_split_collection(TransContext *txc, dout(10) << __func__ << " " << c->cid << " to " << d->cid << " " << " bits " << bits << " = " << r << dendl; - return r; + return 0; } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index ea2ce8d685b6..53ea198a0c57 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -393,6 +393,14 @@ public: std::lock_guard l(lock); return sb_map.empty(); } + + void violently_clear() { + std::lock_guard l(lock); + for (auto& p : sb_map) { + p.second->parent_set = nullptr; + } + sb_map.clear(); + } }; /// in-memory blob metadata and associated cached buffers (if any) @@ -1004,6 +1012,7 @@ public: const ghobject_t& new_oid, const string& new_okey); void clear(); + bool empty(); /// return true if f true for any item bool map_any(std::function f); diff --git a/src/test/objectstore/store_test.cc b/src/test/objectstore/store_test.cc index d615355fc3de..0bf4b1585f02 100644 --- a/src/test/objectstore/store_test.cc +++ b/src/test/objectstore/store_test.cc @@ -5107,7 +5107,8 @@ TEST_P(StoreTest, XattrTest) { void colsplittest( ObjectStore *store, unsigned num_objects, - unsigned common_suffix_size + unsigned common_suffix_size, + bool clones ) { ObjectStore::Sequencer osr("test"); coll_t cid(spg_t(pg_t(0,52),shard_id_t::NO_SHARD)); @@ -5119,17 +5120,30 @@ void colsplittest( r = apply_transaction(store, &osr, std::move(t)); ASSERT_EQ(r, 0); } + bufferlist small; + small.append("small"); { ObjectStore::Transaction t; - for (uint32_t i = 0; i < 2*num_objects; ++i) { + for (uint32_t i = 0; i < (2 - (int)clones)*num_objects; ++i) { stringstream objname; objname << "obj" << i; - t.touch(cid, ghobject_t(hobject_t( - objname.str(), - "", - CEPH_NOSNAP, - i<