From: Sage Weil Date: Mon, 9 Jul 2018 22:22:58 +0000 (-0500) Subject: os/bluestore: fix osr_drain before merge X-Git-Tag: v14.0.1~371^2~62 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d39337fdf8dded82e3dba2e5739595fccdd8fb9c;p=ceph.git os/bluestore: fix osr_drain before merge We need to make sure the deferred writes on the source collection finish before the merge so that ops ordered via the final target sequencer will occur after those writes. Signed-off-by: Sage Weil --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index a96d163202d8..66d728888ff0 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -9081,6 +9081,29 @@ void BlueStore::_osr_drain_preceding(TransContext *txc) dout(10) << __func__ << " " << osr << " done" << dendl; } +void BlueStore::_osr_drain(OpSequencer *osr) +{ + dout(10) << __func__ << " " << osr << dendl; + ++deferred_aggressive; // FIXME: maybe osr-local aggressive flag? + { + // submit anything pending + deferred_lock.lock(); + if (osr->deferred_pending && !osr->deferred_running) { + _deferred_submit_unlock(osr); + } else { + deferred_lock.unlock(); + } + } + { + // wake up any previously finished deferred events + std::lock_guard l(kv_lock); + kv_cond.notify_one(); + } + osr->drain(); + --deferred_aggressive; + dout(10) << __func__ << " " << osr << " done" << dendl; +} + void BlueStore::_osr_drain_all() { dout(10) << __func__ << dendl; @@ -12263,12 +12286,11 @@ int BlueStore::_merge_collection( coll_t cid = (*c)->cid; - // flush all previous deferred writes on this sequencer. this is a bit - // heavyweight, but we need to make sure all deferred writes complete - // before we split as the new collection's sequencer may need to order - // this after those writes, and we don't bother with the complexity of - // moving those TransContexts over to the new osr. - _osr_drain_preceding(txc); + // flush all previous deferred writes on the source collection to ensure + // that all deferred writes complete before we merge as the target collection's + // sequencer may need to order new ops after those writes. + + _osr_drain((*c)->osr.get()); // move any cached items (onodes and referenced shared blobs) that will // belong to the child collection post-split. leave everything else behind. diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index ff7dcff90409..e809edd34a7e 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2194,6 +2194,7 @@ private: void _osr_attach(Collection *c); void _osr_register_zombie(OpSequencer *osr); + void _osr_drain(OpSequencer *osr); void _osr_drain_preceding(TransContext *txc); void _osr_drain_all();