From 94f9873718a82d2def8f268c1581fbf97fee0f49 Mon Sep 17 00:00:00 2001 From: Yin Congmin Date: Wed, 15 Sep 2021 11:23:43 +0000 Subject: [PATCH] librbd/cache/pwl: fix assert in _aio_stop() during shutdown For wait_for_ops(next_ctx). this next_ctx may run in aio_thread. Then the next program runs on the aio thread. remove_pool_file() calls bdev->close(), then calles _aio_stop(), exec aio_thread.join(), cause assert. Thread can't join itself. Fix it by adding close ctx to m_work_queue, so close() can run in work queue thread. At the same time, correct the order of wait_for_ops(). flush_dirty_entries(next_ctx) may call wake_up() and start_op(). so moving wait_for_ops() behind flush_dirty_entries(next_ctx) is more appropriate. Fixes: https://tracker.ceph.com/issues/52566 Signed-off-by: Yin Congmin --- src/librbd/cache/pwl/AbstractWriteLog.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/librbd/cache/pwl/AbstractWriteLog.cc b/src/librbd/cache/pwl/AbstractWriteLog.cc index 5b7799e664b59..9f4e567bb1382 100644 --- a/src/librbd/cache/pwl/AbstractWriteLog.cc +++ b/src/librbd/cache/pwl/AbstractWriteLog.cc @@ -637,6 +637,14 @@ void AbstractWriteLog::shut_down(Context *on_finish) { } update_image_cache_state(next_ctx); }); + ctx = new LambdaContext( + [this, ctx](int r) { + Context *next_ctx = override_ctx(r, ctx); + ldout(m_image_ctx.cct, 6) << "waiting for in flight operations" << dendl; + // Wait for in progress IOs to complete + next_ctx = util::create_async_context_callback(&m_work_queue, next_ctx); + m_async_op_tracker.wait_for_ops(next_ctx); + }); ctx = new LambdaContext( [this, ctx](int r) { Context *next_ctx = override_ctx(r, ctx); @@ -653,14 +661,6 @@ void AbstractWriteLog::shut_down(Context *on_finish) { } flush_dirty_entries(next_ctx); }); - ctx = new LambdaContext( - [this, ctx](int r) { - Context *next_ctx = override_ctx(r, ctx); - ldout(m_image_ctx.cct, 6) << "waiting for in flight operations" << dendl; - // Wait for in progress IOs to complete - next_ctx = util::create_async_context_callback(m_image_ctx, next_ctx); - m_async_op_tracker.wait_for_ops(next_ctx); - }); ctx = new LambdaContext( [this, ctx](int r) { ldout(m_image_ctx.cct, 6) << "Done internal_flush in shutdown" << dendl; -- 2.39.5