From: Sage Weil Date: Mon, 18 Jun 2018 12:32:08 +0000 (-0500) Subject: os/bluestore: reuse zombie OpSequencers by collection id X-Git-Tag: v14.0.1~1076^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=199a110c9e2a42be64bd7fd873b6b172ce1347a8;p=ceph-ci.git os/bluestore: reuse zombie OpSequencers by collection id We can get a sequence that deletes and then recreates a collection where the transaction removing the collection is delayed (due to pending IO on its sequencer) but colleciton create is not (new sequencer). Avoid any such reordering by recycling the old collection's sequencer if the zombie_osr has not been reaped yet. Fixes: http://tracker.ceph.com/issues/24550 Signed-off-by: Sage Weil --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 47d0e99707a..04e65c01a5c 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -3215,16 +3215,28 @@ void BlueStore::DeferredBatch::_audit(CephContext *cct) #undef dout_prefix #define dout_prefix *_dout << "bluestore(" << store->path << ").collection(" << cid << " " << this << ") " -BlueStore::Collection::Collection(BlueStore *ns, Cache *c, coll_t cid) +BlueStore::Collection::Collection(BlueStore *store_, Cache *c, coll_t cid) : CollectionImpl(cid), - store(ns), - osr(new OpSequencer(store)), + store(store_), cache(c), lock("BlueStore::Collection::lock", true, false), exists(true), onode_map(c) { - osr->shard = cid.hash_to_shard(ns->m_finisher_num); + { + std::lock_guard l(store->zombie_osr_lock); + auto p = store->zombie_osr_set.find(cid); + if (p == store->zombie_osr_set.end()) { + osr = new OpSequencer(store, cid); + osr->shard = cid.hash_to_shard(store->m_finisher_num); + } else { + osr = p->second; + store->zombie_osr_set.erase(p); + ldout(store->cct, 10) << "resurrecting zombie osr " << osr << dendl; + osr->zombie = false; + assert(osr->shard == cid.hash_to_shard(store->m_finisher_num)); + } + } } bool BlueStore::Collection::flush_commit(Context *c) @@ -8692,12 +8704,11 @@ void BlueStore::_txc_finish(TransContext *txc) if (empty && osr->zombie) { std::lock_guard l(zombie_osr_lock); - auto p = zombie_osr_set.find(osr); - if (p != zombie_osr_set.end()) { + if (zombie_osr_set.erase(osr->cid)) { dout(10) << __func__ << " reaping empty zombie osr " << osr << dendl; - zombie_osr_set.erase(p); } else { - dout(10) << __func__ << " empty zombie osr " << osr << " already reaped" << dendl; + dout(10) << __func__ << " empty zombie osr " << osr << " already reaped" + << dendl; } } logger->set(l_bluestore_fragmentation, @@ -8734,9 +8745,10 @@ out: void BlueStore::_osr_register_zombie(OpSequencer *osr) { std::lock_guard l(zombie_osr_lock); - dout(10) << __func__ << " " << osr << dendl; + dout(10) << __func__ << " " << osr << " " << osr->cid << dendl; osr->zombie = true; - zombie_osr_set.insert(osr); + auto i = zombie_osr_set.emplace(osr->cid, osr); + assert(i.second); // this should be a new insertion } void BlueStore::_osr_drain_preceding(TransContext *txc) @@ -8778,8 +8790,8 @@ void BlueStore::_osr_drain_all() { std::lock_guard l(zombie_osr_lock); for (auto& i : zombie_osr_set) { - s.insert(i); - zombies.insert(i); + s.insert(i.second); + zombies.insert(i.second); } } dout(20) << __func__ << " osr_set " << s << dendl; @@ -8807,14 +8819,17 @@ void BlueStore::_osr_drain_all() { std::lock_guard l(zombie_osr_lock); for (auto& osr : zombies) { - auto p = zombie_osr_set.find(osr); - if (p != zombie_osr_set.end()) { + if (zombie_osr_set.erase(osr->cid)) { dout(10) << __func__ << " reaping empty zombie osr " << osr << dendl; - zombie_osr_set.erase(p); + assert(osr->q.empty()); + } else if (osr->zombie) { + dout(10) << __func__ << " empty zombie osr " << osr + << " already reaped" << dendl; + assert(osr->q.empty()); } else { - dout(10) << __func__ << " empty zombie osr " << osr << " already reaped" << dendl; + dout(10) << __func__ << " empty zombie osr " << osr + << " resurrected" << dendl; } - assert(osr->q.empty()); } } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 78d89ee6d6f..3e9b0d26937 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -1682,6 +1682,7 @@ public: DeferredBatch *deferred_pending = nullptr; BlueStore *store; + coll_t cid; size_t shard; @@ -1695,9 +1696,9 @@ public: std::atomic_bool zombie = {false}; ///< in zombie_osr set (collection going away) - OpSequencer(BlueStore *store) + OpSequencer(BlueStore *store, const coll_t& c) : RefCountedObject(store->cct, 0), - store(store) { + store(store), cid(c) { } ~OpSequencer() { assert(q.empty()); @@ -1834,7 +1835,7 @@ private: vector cache_shards; std::mutex zombie_osr_lock; ///< protect zombie_osr_set - std::set zombie_osr_set; ///< set of OpSequencers for deleted collections + std::map zombie_osr_set; ///< set of OpSequencers for deleted collections std::atomic nid_last = {0}; std::atomic nid_max = {0};