From e0a3902432d00eeab956291e63984609e59d11ea Mon Sep 17 00:00:00 2001 From: Nitzan Mordechai Date: Tue, 22 Apr 2025 16:23:16 +0000 Subject: [PATCH] msg: drain stack before stopping processors to avoid shutdown hang `AsyncMessenger::shutdown()` called WorkerProcessor::stop() first, killing the worker threads, then queued a C_drain callback via stack->drain(). If a worker had already exited its event loop it never processed the callback, so drain.wait() blocked forever and the monitor shutdown hung for minutes. Move stack->drain() ahead of the processors->stop() loop. With the new order the workers are still alive to acknowledge the drain. Fixes: https://tracker.ceph.com/issues/71303 Signed-off-by: Nitzan Mordechai (cherry picked from commit 5fbb9c5e464e3a2227f0c4729b2e6a1bc2f6f9d6) --- src/msg/async/AsyncMessenger.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/msg/async/AsyncMessenger.cc b/src/msg/async/AsyncMessenger.cc index 64868f93c01..3a8f9e0c189 100644 --- a/src/msg/async/AsyncMessenger.cc +++ b/src/msg/async/AsyncMessenger.cc @@ -481,6 +481,7 @@ int AsyncMessenger::shutdown() { ldout(cct,10) << __func__ << " " << get_myaddrs() << dendl; + stack->drain(); // done! clean up. for (auto &&p : processors) p->stop(); @@ -493,7 +494,7 @@ int AsyncMessenger::shutdown() stop_cond.notify_all(); stopped = true; lock.unlock(); - stack->drain(); + return 0; } -- 2.39.5