]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
msg: drain stack before stopping processors to avoid shutdown hang
authorNitzan Mordechai <nmordech@redhat.com>
Tue, 22 Apr 2025 16:23:16 +0000 (16:23 +0000)
committerNitzan Mordechai <nmordech@redhat.com>
Sun, 10 Aug 2025 06:48:49 +0000 (06:48 +0000)
`AsyncMessenger::shutdown()` called WorkerProcessor::stop() first,
killing the worker threads, then queued a C_drain callback via
stack->drain().  If a worker had already exited its event loop it never
processed the callback, so drain.wait() blocked forever and the monitor
shutdown hung for minutes.

Move stack->drain() ahead of the processors->stop() loop.  With the new
order the workers are still alive to acknowledge the drain.

Fixes: https://tracker.ceph.com/issues/71303
Signed-off-by: Nitzan Mordechai <nmordec@redhat.com>
(cherry picked from commit 5fbb9c5e464e3a2227f0c4729b2e6a1bc2f6f9d6)

src/msg/async/AsyncMessenger.cc

index 6b3a8c3f6dcd8d515eadd3b189e4f45efbd8d00a..eedf22d53ddbffc8b6c593deb8ce881011236280 100644 (file)
@@ -341,6 +341,7 @@ int AsyncMessenger::shutdown()
 {
   ldout(cct,10) << __func__ << " " << get_myaddrs() << dendl;
 
+  stack->drain();
   // done!  clean up.
   for (auto &&p : processors)
     p->stop();
@@ -353,7 +354,7 @@ int AsyncMessenger::shutdown()
   stop_cond.notify_all();
   stopped = true;
   lock.unlock();
-  stack->drain();
+
   return 0;
 }