From d39337fdf8dded82e3dba2e5739595fccdd8fb9c Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 9 Jul 2018 17:22:58 -0500 Subject: [PATCH] os/bluestore: fix osr_drain before merge We need to make sure the deferred writes on the source collection finish before the merge so that ops ordered via the final target sequencer will occur after those writes. Signed-off-by: Sage Weil --- src/os/bluestore/BlueStore.cc | 34 ++++++++++++++++++++++++++++------ src/os/bluestore/BlueStore.h | 1 + 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index a96d163202d8a..66d728888ff0b 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -9081,6 +9081,29 @@ void BlueStore::_osr_drain_preceding(TransContext *txc) dout(10) << __func__ << " " << osr << " done" << dendl; } +void BlueStore::_osr_drain(OpSequencer *osr) +{ + dout(10) << __func__ << " " << osr << dendl; + ++deferred_aggressive; // FIXME: maybe osr-local aggressive flag? + { + // submit anything pending + deferred_lock.lock(); + if (osr->deferred_pending && !osr->deferred_running) { + _deferred_submit_unlock(osr); + } else { + deferred_lock.unlock(); + } + } + { + // wake up any previously finished deferred events + std::lock_guard l(kv_lock); + kv_cond.notify_one(); + } + osr->drain(); + --deferred_aggressive; + dout(10) << __func__ << " " << osr << " done" << dendl; +} + void BlueStore::_osr_drain_all() { dout(10) << __func__ << dendl; @@ -12263,12 +12286,11 @@ int BlueStore::_merge_collection( coll_t cid = (*c)->cid; - // flush all previous deferred writes on this sequencer. this is a bit - // heavyweight, but we need to make sure all deferred writes complete - // before we split as the new collection's sequencer may need to order - // this after those writes, and we don't bother with the complexity of - // moving those TransContexts over to the new osr. - _osr_drain_preceding(txc); + // flush all previous deferred writes on the source collection to ensure + // that all deferred writes complete before we merge as the target collection's + // sequencer may need to order new ops after those writes. + + _osr_drain((*c)->osr.get()); // move any cached items (onodes and referenced shared blobs) that will // belong to the child collection post-split. leave everything else behind. diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index ff7dcff90409a..e809edd34a7e2 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2194,6 +2194,7 @@ private: void _osr_attach(Collection *c); void _osr_register_zombie(OpSequencer *osr); + void _osr_drain(OpSequencer *osr); void _osr_drain_preceding(TransContext *txc); void _osr_drain_all(); -- 2.39.5