From: Yin Congmin Date: Fri, 27 Aug 2021 15:41:49 +0000 (+0000) Subject: librbd/cache/pwl/ssd: move finish_op() to the end of callback function X-Git-Tag: v17.1.0~526^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c531768838e44ed8eb28e8b27594d7e03ca3ffcf;p=ceph.git librbd/cache/pwl/ssd: move finish_op() to the end of callback function finish_op() of ssd cache is not in the end of callback function in append_op_log_entries(), and after finish_op(), some operation also need to get m_lock. So, during shutdown, wait_for_ops() thinks all OPs are over, and no thread will acquire the m_lock, In the subsequent operation of shutdown, the m_lock is obtained, and _aio_stop() in bdev->close() waits for all aio_writes() and aio_submit() to end when the m_lock is held, but the callback function of aio_write() is waiting for the m_lock, causing a deadlock. Move finish_op() to the end to fix dead lock. Fixes: https://tracker.ceph.com/issues/52235 Signed-off-by: Yin Congmin --- diff --git a/src/librbd/cache/pwl/ssd/WriteLog.cc b/src/librbd/cache/pwl/ssd/WriteLog.cc index 550123924de0..e6bcd17c35e4 100644 --- a/src/librbd/cache/pwl/ssd/WriteLog.cc +++ b/src/librbd/cache/pwl/ssd/WriteLog.cc @@ -448,8 +448,6 @@ void WriteLog::append_op_log_entries(GenericLogOperations &ops) { Context *ctx = new LambdaContext([this, ops](int r) { assert(r == 0); ldout(m_image_ctx.cct, 20) << "Finished root update " << dendl; - this->m_async_update_superblock--; - this->m_async_op_tracker.finish_op(); auto captured_ops = std::move(ops); this->complete_op_log_entries(std::move(captured_ops), r); @@ -469,6 +467,8 @@ void WriteLog::append_op_log_entries(GenericLogOperations &ops) { if (need_finisher) { this->enlist_op_appender(); } + this->m_async_update_superblock--; + this->m_async_op_tracker.finish_op(); }); uint64_t *new_first_free_entry = new(uint64_t); Context *append_ctx = new LambdaContext( @@ -481,8 +481,6 @@ void WriteLog::append_op_log_entries(GenericLogOperations &ops) { for (auto &operation : ops) { operation->log_append_comp_time = now; } - this->m_async_append_ops--; - this->m_async_op_tracker.finish_op(); std::lock_guard locker(this->m_log_append_lock); std::lock_guard locker1(m_lock); @@ -494,6 +492,8 @@ void WriteLog::append_op_log_entries(GenericLogOperations &ops) { delete new_first_free_entry; schedule_update_root(new_root, ctx); } + this->m_async_append_ops--; + this->m_async_op_tracker.finish_op(); }); // Append logs and update first_free_update append_ops(ops, append_ctx, new_first_free_entry); @@ -756,12 +756,12 @@ bool WriteLog::retire_entries(const unsigned long int frees_per_tx) { this->m_alloc_failed_since_retire = false; this->wake_up(); - m_async_update_superblock--; - this->m_async_op_tracker.finish_op(); } this->dispatch_deferred_writes(); this->process_writeback_dirty_entries(); + m_async_update_superblock--; + this->m_async_op_tracker.finish_op(); }); std::lock_guard locker(m_lock);