From def17606fc46bf6dadf40828a419e5914c83e2a9 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sat, 18 Mar 2017 13:51:08 -0400 Subject: [PATCH] os/bluestore: handle zombie OpSequencers It's possible for the Sequencer to go away while the OpSequencer still has txcs in flight. We were handling the case where the osr was on the deferred_queue, but it may be off the deferred_queue but waiting for the commit to happen, and we still need to wait for that. Fix this by introducing a 'zombie' state for the osr, in which we keep the osr in the osr_set. Clean up the OpSequencer methods and a few other method names. Signed-off-by: Sage Weil --- src/os/bluestore/BlueStore.cc | 44 ++++++++++++++++++++++++----------- src/os/bluestore/BlueStore.h | 31 +++++++++++++++++------- 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 76d24fa5eae..e5996af4d36 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -4866,8 +4866,7 @@ int BlueStore::umount() dout(1) << __func__ << dendl; _osr_drain_all(); - _osr_discard_all(); - assert(osr_set.empty()); // nobody should be creating sequencers! + _osr_unregister_all(); mempool_thread.shutdown(); @@ -7477,7 +7476,11 @@ void BlueStore::_txc_finish(TransContext *txc) txc->state = TransContext::STATE_DONE; } - _osr_reap_done(osr.get()); + bool empty = _osr_reap_done(osr.get()); + if (empty && osr->zombie) { + dout(10) << __func__ << " reaping empty zombie osr " << osr << dendl; + osr->_unregister(); + } } void BlueStore::_txc_release_alloc(TransContext *txc) @@ -7496,10 +7499,10 @@ void BlueStore::_txc_release_alloc(TransContext *txc) txc->released.clear(); } -void BlueStore::_osr_reap_done(OpSequencer *osr) +bool BlueStore::_osr_reap_done(OpSequencer *osr) { CollectionRef c; - + bool empty = false; { std::lock_guard l(osr->qlock); dout(20) << __func__ << " osr " << osr << dendl; @@ -7525,13 +7528,17 @@ void BlueStore::_osr_reap_done(OpSequencer *osr) delete txc; osr->qcond.notify_all(); } - if (osr->q.empty()) + if (osr->q.empty()) { dout(20) << __func__ << " osr " << osr << " q now empty" << dendl; + empty = true; + } } if (c) { c->trim_cache(); } + + return empty; } void BlueStore::_osr_drain_all() @@ -7543,16 +7550,12 @@ void BlueStore::_osr_drain_all() std::lock_guard l(osr_lock); s = osr_set; } + dout(20) << __func__ << " osr_set " << s << dendl; deferred_aggressive = true; { // submit anything pending std::lock_guard l(deferred_lock); - // include deferred osrs in our wait list; these may have been - // deregistered already! - for (auto& osr : deferred_queue) { - s.insert(&osr); - } _deferred_try_submit(); } { @@ -7569,16 +7572,29 @@ void BlueStore::_osr_drain_all() dout(10) << __func__ << " done" << dendl; } -void BlueStore::_osr_discard_all() +void BlueStore::_osr_unregister_all() { - dout(10) << __func__ << " " << osr_set << dendl; set s; { std::lock_guard l(osr_lock); s = osr_set; } + dout(10) << __func__ << " " << s << dendl; for (auto osr : s) { - osr->discard(); + osr->_unregister(); + + if (!osr->zombie) { + // break link from Sequencer to us so that this OpSequencer + // instance can die with this mount/umount cycle. note that + // we assume umount() will not race against ~Sequencer. + assert(osr->parent); + osr->parent->p.reset(); + } + } + // nobody should be creating sequencers during umount either. + { + std::lock_guard l(osr_lock); + assert(osr_set.empty()); } } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index a93ae37785b..04cb5536e5f 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -1550,25 +1550,40 @@ public: std::atomic_int kv_submitted_waiters = {0}; - std::mutex register_lock; - bool registered = true; + std::atomic_bool registered = {true}; ///< registered in BlueStore's osr_set + std::atomic_bool zombie = {false}; ///< owning Sequencer has gone away OpSequencer(CephContext* cct, BlueStore *store) : Sequencer_impl(cct), parent(NULL), store(store) { - std::lock_guard l(register_lock); store->register_osr(this); } ~OpSequencer() { assert(q.empty()); - discard(); + _unregister(); } void discard() override { - std::lock_guard l(register_lock); + // Note that we may have txc's in flight when the parent Sequencer + // goes away. Reflect this with zombie==registered==true and let + // _osr_reap_done or _osr_drain_all clean up later. + assert(!zombie); + zombie = true; + parent = nullptr; + bool empty; + { + std::lock_guard l(qlock); + empty = q.empty(); + } + if (empty) { + _unregister(); + } + } + + void _unregister() { if (registered) { - registered = false; store->unregister_osr(this); + registered = false; } } @@ -1850,9 +1865,9 @@ private: void _txc_finish(TransContext *txc); void _txc_release_alloc(TransContext *txc); - void _osr_reap_done(OpSequencer *osr); + bool _osr_reap_done(OpSequencer *osr); void _osr_drain_all(); - void _osr_discard_all(); + void _osr_unregister_all(); void _kv_sync_thread(); void _kv_stop() { -- 2.47.3