From: Yin Congmin Date: Wed, 15 Sep 2021 11:23:43 +0000 (+0000) Subject: librbd/cache/pwl: fix assert in _aio_stop() during shutdown X-Git-Tag: v16.2.7~50^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5bb4ae8f26e1dfc0fab7df9fe2cac0eb1a515ce0;p=ceph.git librbd/cache/pwl: fix assert in _aio_stop() during shutdown For wait_for_ops(next_ctx). this next_ctx may run in aio_thread. Then the next program runs on the aio thread. remove_pool_file() calls bdev->close(), then calles _aio_stop(), exec aio_thread.join(), cause assert. Thread can't join itself. Fix it by adding close ctx to m_work_queue, so close() can run in work queue thread. At the same time, correct the order of wait_for_ops(). flush_dirty_entries(next_ctx) may call wake_up() and start_op(). so moving wait_for_ops() behind flush_dirty_entries(next_ctx) is more appropriate. Fixes: https://tracker.ceph.com/issues/52566 Signed-off-by: Yin Congmin (cherry picked from commit 94f9873718a82d2def8f268c1581fbf97fee0f49) --- diff --git a/src/librbd/cache/pwl/AbstractWriteLog.cc b/src/librbd/cache/pwl/AbstractWriteLog.cc index 3da2d9a82c2d..9415967611ed 100644 --- a/src/librbd/cache/pwl/AbstractWriteLog.cc +++ b/src/librbd/cache/pwl/AbstractWriteLog.cc @@ -636,6 +636,14 @@ void AbstractWriteLog::shut_down(Context *on_finish) { } update_image_cache_state(next_ctx); }); + ctx = new LambdaContext( + [this, ctx](int r) { + Context *next_ctx = override_ctx(r, ctx); + ldout(m_image_ctx.cct, 6) << "waiting for in flight operations" << dendl; + // Wait for in progress IOs to complete + next_ctx = util::create_async_context_callback(&m_work_queue, next_ctx); + m_async_op_tracker.wait_for_ops(next_ctx); + }); ctx = new LambdaContext( [this, ctx](int r) { Context *next_ctx = override_ctx(r, ctx); @@ -652,14 +660,6 @@ void AbstractWriteLog::shut_down(Context *on_finish) { } flush_dirty_entries(next_ctx); }); - ctx = new LambdaContext( - [this, ctx](int r) { - Context *next_ctx = override_ctx(r, ctx); - ldout(m_image_ctx.cct, 6) << "waiting for in flight operations" << dendl; - // Wait for in progress IOs to complete - next_ctx = util::create_async_context_callback(m_image_ctx, next_ctx); - m_async_op_tracker.wait_for_ops(next_ctx); - }); ctx = new LambdaContext( [this, ctx](int r) { ldout(m_image_ctx.cct, 6) << "Done internal_flush in shutdown" << dendl;