From 98997f3b2216aac07ee3702dc64dfd8fba278a27 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 3 Aug 2014 18:26:34 -0700 Subject: [PATCH] msg/SimpleMessenger: drop msgr lock when joining a Pipe Avoid this deadlock: - a fault - delay thread entry gets a fast dispatch message - drops delay_lock - calls into fast_dispatch - reaper tries to reap the pipe - pipe->join() - delay_thread->join() - blocks waiting for delay_thread to exit - delay thread / fast dispatch blocks on msgr->lock trying to mark_down The solution is to drop the msgr lock while joining the thread. This will allow the join() to complete. Adjust the reaper thread to recheck the exit condition since the lock may have been dropped. The other two callers do not care. Fixes: #8891 Signed-off-by: Sage Weil --- src/msg/SimpleMessenger.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/msg/SimpleMessenger.cc b/src/msg/SimpleMessenger.cc index 19de92577490..16de54b48b97 100644 --- a/src/msg/SimpleMessenger.cc +++ b/src/msg/SimpleMessenger.cc @@ -204,7 +204,9 @@ void SimpleMessenger::reaper_entry() ldout(cct,10) << "reaper_entry start" << dendl; lock.Lock(); while (!reaper_stop) { - reaper(); + reaper(); // may drop and retake the lock + if (reaper_stop) + break; reaper_cond.Wait(lock); } lock.Unlock(); @@ -236,7 +238,14 @@ void SimpleMessenger::reaper() p->unregister_pipe(); assert(pipes.count(p)); pipes.erase(p); + + // drop msgr lock while joining thread; the delay through could be + // trying to fast dispatch, preventing it from joining without + // blocking and deadlocking. + lock.Unlock(); p->join(); + lock.Lock(); + if (p->sd >= 0) ::close(p->sd); ldout(cct,10) << "reaper reaped pipe " << p << " " << p->get_peer_addr() << dendl; -- 2.47.3