From 75b0f58e37b0ede5e8cf8dcaea980cf71a5ca908 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Mon, 20 Jun 2016 09:39:24 -0400 Subject: [PATCH] librbd: potential race when replaying journal ops Fixes: http://tracker.ceph.com/issues/16198 Signed-off-by: Jason Dillaman --- src/librbd/journal/Replay.cc | 22 +++++++++++++--------- src/librbd/journal/Replay.h | 1 + 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/librbd/journal/Replay.cc b/src/librbd/journal/Replay.cc index 8576e2ab3c236..ac617f09a8cc6 100644 --- a/src/librbd/journal/Replay.cc +++ b/src/librbd/journal/Replay.cc @@ -147,6 +147,7 @@ Replay::~Replay() { assert(m_aio_modify_unsafe_contexts.empty()); assert(m_aio_modify_safe_contexts.empty()); assert(m_op_events.empty()); + assert(m_in_flight_op_events == 0); } template @@ -212,7 +213,7 @@ void Replay::shut_down(bool cancel_ops, Context *on_finish) { } assert(m_flush_ctx == nullptr); - if (!m_op_events.empty() || flush_comp != nullptr) { + if (m_in_flight_op_events > 0 || flush_comp != nullptr) { std::swap(m_flush_ctx, on_finish); } } @@ -691,7 +692,7 @@ void Replay::handle_aio_flush_complete(Context *on_flush_safe, m_in_flight_aio_modify -= on_safe_ctxs.size(); std::swap(on_aio_ready, m_on_aio_ready); - if (m_op_events.empty() && + if (m_in_flight_op_events == 0 && (m_in_flight_aio_flush + m_in_flight_aio_modify) == 0) { on_flush = m_flush_ctx; } @@ -740,6 +741,7 @@ Context *Replay::create_op_context_callback(uint64_t op_tid, return nullptr; } + ++m_in_flight_op_events; *op_event = &m_op_events[op_tid]; (*op_event)->on_start_safe = on_safe; @@ -755,7 +757,6 @@ void Replay::handle_op_complete(uint64_t op_tid, int r) { << "r=" << r << dendl; OpEvent op_event; - Context *on_flush = nullptr; bool shutting_down = false; { Mutex::Locker locker(m_lock); @@ -766,10 +767,6 @@ void Replay::handle_op_complete(uint64_t op_tid, int r) { m_op_events.erase(op_it); shutting_down = (m_flush_ctx != nullptr); - if (m_op_events.empty() && - (m_in_flight_aio_flush + m_in_flight_aio_modify) == 0) { - on_flush = m_flush_ctx; - } } assert(op_event.on_start_ready == nullptr || (r < 0 && r != -ERESTART)); @@ -802,8 +799,15 @@ void Replay::handle_op_complete(uint64_t op_tid, int r) { if (op_event.on_finish_safe != nullptr) { op_event.on_finish_safe->complete(r); } - if (on_flush != nullptr) { - on_flush->complete(0); + + // shut down request might have occurred while lock was + // dropped -- handle if pending + Mutex::Locker locker(m_lock); + assert(m_in_flight_op_events > 0); + --m_in_flight_op_events; + if (m_flush_ctx != nullptr && m_in_flight_op_events == 0 && + (m_in_flight_aio_flush + m_in_flight_aio_modify) == 0) { + m_image_ctx.op_work_queue->queue(m_flush_ctx, 0); } } diff --git a/src/librbd/journal/Replay.h b/src/librbd/journal/Replay.h index ceb67466b011c..bbea390f784b3 100644 --- a/src/librbd/journal/Replay.h +++ b/src/librbd/journal/Replay.h @@ -122,6 +122,7 @@ private: ContextSet m_aio_modify_safe_contexts; OpEvents m_op_events; + uint64_t m_in_flight_op_events = 0; Context *m_flush_ctx = nullptr; Context *m_on_aio_ready = nullptr; -- 2.39.5