From 70bf7465ad7c75c9c7623a446218ab501f329bd3 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Mon, 20 Jun 2016 09:39:24 -0400 Subject: [PATCH] librbd: potential race when replaying journal ops Fixes: http://tracker.ceph.com/issues/16198 Signed-off-by: Jason Dillaman (cherry picked from commit 75b0f58e37b0ede5e8cf8dcaea980cf71a5ca908) --- src/librbd/journal/Replay.cc | 22 +++++++++++++--------- src/librbd/journal/Replay.h | 1 + 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/librbd/journal/Replay.cc b/src/librbd/journal/Replay.cc index c57202a0ba31b..0016b3b723d09 100644 --- a/src/librbd/journal/Replay.cc +++ b/src/librbd/journal/Replay.cc @@ -143,6 +143,7 @@ Replay::~Replay() { assert(m_aio_modify_unsafe_contexts.empty()); assert(m_aio_modify_safe_contexts.empty()); assert(m_op_events.empty()); + assert(m_in_flight_op_events == 0); } template @@ -208,7 +209,7 @@ void Replay::shut_down(bool cancel_ops, Context *on_finish) { } assert(m_flush_ctx == nullptr); - if (!m_op_events.empty() || flush_comp != nullptr) { + if (m_in_flight_op_events > 0 || flush_comp != nullptr) { std::swap(m_flush_ctx, on_finish); } } @@ -664,7 +665,7 @@ void Replay::handle_aio_flush_complete(Context *on_flush_safe, m_in_flight_aio_modify -= on_safe_ctxs.size(); std::swap(on_aio_ready, m_on_aio_ready); - if (m_op_events.empty() && + if (m_in_flight_op_events == 0 && (m_in_flight_aio_flush + m_in_flight_aio_modify) == 0) { on_flush = m_flush_ctx; } @@ -713,6 +714,7 @@ Context *Replay::create_op_context_callback(uint64_t op_tid, return nullptr; } + ++m_in_flight_op_events; *op_event = &m_op_events[op_tid]; (*op_event)->on_start_safe = on_safe; @@ -728,7 +730,6 @@ void Replay::handle_op_complete(uint64_t op_tid, int r) { << "r=" << r << dendl; OpEvent op_event; - Context *on_flush = nullptr; bool shutting_down = false; { Mutex::Locker locker(m_lock); @@ -739,10 +740,6 @@ void Replay::handle_op_complete(uint64_t op_tid, int r) { m_op_events.erase(op_it); shutting_down = (m_flush_ctx != nullptr); - if (m_op_events.empty() && - (m_in_flight_aio_flush + m_in_flight_aio_modify) == 0) { - on_flush = m_flush_ctx; - } } assert(op_event.on_start_ready == nullptr || (r < 0 && r != -ERESTART)); @@ -775,8 +772,15 @@ void Replay::handle_op_complete(uint64_t op_tid, int r) { if (op_event.on_finish_safe != nullptr) { op_event.on_finish_safe->complete(r); } - if (on_flush != nullptr) { - on_flush->complete(0); + + // shut down request might have occurred while lock was + // dropped -- handle if pending + Mutex::Locker locker(m_lock); + assert(m_in_flight_op_events > 0); + --m_in_flight_op_events; + if (m_flush_ctx != nullptr && m_in_flight_op_events == 0 && + (m_in_flight_aio_flush + m_in_flight_aio_modify) == 0) { + m_image_ctx.op_work_queue->queue(m_flush_ctx, 0); } } diff --git a/src/librbd/journal/Replay.h b/src/librbd/journal/Replay.h index aeca5ba26db68..26775df56c1f2 100644 --- a/src/librbd/journal/Replay.h +++ b/src/librbd/journal/Replay.h @@ -122,6 +122,7 @@ private: ContextSet m_aio_modify_safe_contexts; OpEvents m_op_events; + uint64_t m_in_flight_op_events = 0; Context *m_flush_ctx = nullptr; Context *m_on_aio_ready = nullptr; -- 2.39.5