]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
msg: drain stack before stopping processors to avoid shutdown hang
authorNitzan Mordechai <nmordech@redhat.com>
Tue, 22 Apr 2025 16:23:16 +0000 (16:23 +0000)
committerNitzan Mordechai <nmordech@redhat.com>
Sun, 10 Aug 2025 06:50:12 +0000 (06:50 +0000)
`AsyncMessenger::shutdown()` called WorkerProcessor::stop() first,
killing the worker threads, then queued a C_drain callback via
stack->drain().  If a worker had already exited its event loop it never
processed the callback, so drain.wait() blocked forever and the monitor
shutdown hung for minutes.

Move stack->drain() ahead of the processors->stop() loop.  With the new
order the workers are still alive to acknowledge the drain.

Fixes: https://tracker.ceph.com/issues/71303
Signed-off-by: Nitzan Mordechai <nmordec@redhat.com>
(cherry picked from commit 5fbb9c5e464e3a2227f0c4729b2e6a1bc2f6f9d6)

src/msg/async/AsyncMessenger.cc

index 64868f93c0137ed6b8ce8a15f71dea03f97b0d06..3a8f9e0c1894cfc67c7a6baffbd5f94da3081d44 100644 (file)
@@ -481,6 +481,7 @@ int AsyncMessenger::shutdown()
 {
   ldout(cct,10) << __func__ << " " << get_myaddrs() << dendl;
 
+  stack->drain();
   // done!  clean up.
   for (auto &&p : processors)
     p->stop();
@@ -493,7 +494,7 @@ int AsyncMessenger::shutdown()
   stop_cond.notify_all();
   stopped = true;
   lock.unlock();
-  stack->drain();
+
   return 0;
 }