From: Jason Dillaman Date: Thu, 11 Feb 2016 00:06:02 +0000 (-0500) Subject: librbd: protect journal replay against overlapping writes X-Git-Tag: v10.1.0~395^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F7583%2Fhead;p=ceph.git librbd: protect journal replay against overlapping writes If fsx issues a back-to-back synchronous write, these will be replayed as AIO writes. If object map is enabled, it's possible for the two writes to commit to disk out-of-order if the first write required an object map update. Signed-off-by: Jason Dillaman --- diff --git a/src/librbd/journal/Replay.cc b/src/librbd/journal/Replay.cc index e9a434eba4a..0db4bb3da6c 100644 --- a/src/librbd/journal/Replay.cc +++ b/src/librbd/journal/Replay.cc @@ -428,12 +428,16 @@ void Replay::handle_event(const journal::UnknownEvent &event, } template -void Replay::handle_aio_modify_complete(Context *on_safe, int r) { +void Replay::handle_aio_modify_complete(Context *on_ready, Context *on_safe, + int r) { Mutex::Locker locker(m_lock); CephContext *cct = m_image_ctx.cct; - ldout(cct, 20) << this << " " << __func__ << ": on_safe=" << on_safe << ", " - << "r=" << r << dendl; + ldout(cct, 20) << this << " " << __func__ << ": on_ready=" << on_ready << ", " + << "on_safe=" << on_safe << ", r=" << r << dendl; + if (on_ready != nullptr) { + on_ready->complete(0); + } if (r < 0) { lderr(cct) << "AIO modify op failed: " << cpp_strerror(r) << dendl; on_safe->complete(r); @@ -566,11 +570,6 @@ AioCompletion *Replay::create_aio_modify_completion(Context *on_ready, // completed by flushes-only so that we don't move the journal // commit position until safely on-disk - // when safe, the completion of the next flush will fire the on_safe - // callback - AioCompletion *aio_comp = AioCompletion::create( - new C_AioModifyComplete(this, on_safe)); - *flush_required = (m_aio_modify_unsafe_contexts.size() == IN_FLIGHT_IO_LOW_WATER_MARK); if (*flush_required) { @@ -586,11 +585,15 @@ AioCompletion *Replay::create_aio_modify_completion(Context *on_ready, if (m_in_flight_aio_modify == IN_FLIGHT_IO_HIGH_WATER_MARK) { ldout(cct, 10) << "hit AIO replay high-water mark: pausing replay" << dendl; - m_on_aio_ready = on_ready; - } else { - on_ready->complete(0); + assert(m_on_aio_ready == nullptr); + std::swap(m_on_aio_ready, on_ready); } + // when the modification is ACKed by librbd, we can process the next + // event. when flushed, the completion of the next flush will fire the + // on_safe callback + AioCompletion *aio_comp = AioCompletion::create( + new C_AioModifyComplete(this, on_ready, on_safe)); return aio_comp; } diff --git a/src/librbd/journal/Replay.h b/src/librbd/journal/Replay.h index 6eca842563d..c8c58cbe2d2 100644 --- a/src/librbd/journal/Replay.h +++ b/src/librbd/journal/Replay.h @@ -66,12 +66,13 @@ private: struct C_AioModifyComplete : public Context { Replay *replay; + Context *on_ready; Context *on_safe; - C_AioModifyComplete(Replay *replay, Context *on_safe) - : replay(replay), on_safe(on_safe) { + C_AioModifyComplete(Replay *replay, Context *on_ready, Context *on_safe) + : replay(replay), on_ready(on_ready), on_safe(on_safe) { } virtual void finish(int r) { - replay->handle_aio_modify_complete(on_safe, r); + replay->handle_aio_modify_complete(on_ready, on_safe, r); } }; @@ -147,7 +148,7 @@ private: void handle_event(const UnknownEvent &event, Context *on_ready, Context *on_safe); - void handle_aio_modify_complete(Context *on_safe, int r); + void handle_aio_modify_complete(Context *on_ready, Context *on_safe, int r); void handle_aio_flush_complete(Context *on_flush_safe, Contexts &on_safe_ctxs, int r); diff --git a/src/test/librbd/journal/test_mock_Replay.cc b/src/test/librbd/journal/test_mock_Replay.cc index 5302b8f94ba..174f52bf88e 100644 --- a/src/test/librbd/journal/test_mock_Replay.cc +++ b/src/test/librbd/journal/test_mock_Replay.cc @@ -338,9 +338,9 @@ TEST_F(TestMockJournalReplay, SoftFlushIO) { when_process(mock_journal_replay, EventEntry{AioDiscardEvent(123, 456)}, &on_ready, &on_safes[i]); + when_complete(mock_image_ctx, aio_comp, 0); ASSERT_EQ(0, on_ready.wait()); - when_complete(mock_image_ctx, aio_comp, 0); if (flush_comp != nullptr) { when_complete(mock_image_ctx, flush_comp, 0); } @@ -378,16 +378,14 @@ TEST_F(TestMockJournalReplay, PauseIO) { when_process(mock_journal_replay, EventEntry{AioWriteEvent(123, 456, to_bl("test"))}, &on_ready, &on_safes[i]); + when_complete(mock_image_ctx, aio_comp, 0); if (i < io_count - 1) { ASSERT_EQ(0, on_ready.wait()); - } - - when_complete(mock_image_ctx, aio_comp, 0); - if (i == io_count - 1) { + } else { for (auto flush_comp : flush_comps) { when_complete(mock_image_ctx, flush_comp, 0); - ASSERT_EQ(0, on_ready.wait()); } + ASSERT_EQ(0, on_ready.wait()); } } for (auto &on_safe : on_safes) {