]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
msg/SimpleMessenger: drop msgr lock when joining a Pipe 2192/head
authorSage Weil <sage@redhat.com>
Mon, 4 Aug 2014 01:26:34 +0000 (18:26 -0700)
committerSage Weil <sage@redhat.com>
Mon, 4 Aug 2014 01:26:34 +0000 (18:26 -0700)
Avoid this deadlock:

- a fault
- delay thread entry gets a fast dispatch message
 - drops delay_lock
 - calls into fast_dispatch
- reaper tries to reap the pipe
 - pipe->join()
  - delay_thread->join()
   - blocks waiting for delay_thread to exit
- delay thread / fast dispatch blocks on msgr->lock trying to mark_down

The solution is to drop the msgr lock while joining the thread.  This will
allow the join() to complete.  Adjust the reaper thread to recheck the
exit condition since the lock may have been dropped.  The other two callers
do not care.

Fixes: #8891
Signed-off-by: Sage Weil <sage@redhat.com>
src/msg/SimpleMessenger.cc

index 19de925774909b31be8083668f6ae6b6355f75be..16de54b48b97534d7c6445d37d51ad7b59537413 100644 (file)
@@ -204,7 +204,9 @@ void SimpleMessenger::reaper_entry()
   ldout(cct,10) << "reaper_entry start" << dendl;
   lock.Lock();
   while (!reaper_stop) {
-    reaper();
+    reaper();  // may drop and retake the lock
+    if (reaper_stop)
+      break;
     reaper_cond.Wait(lock);
   }
   lock.Unlock();
@@ -236,7 +238,14 @@ void SimpleMessenger::reaper()
     p->unregister_pipe();
     assert(pipes.count(p));
     pipes.erase(p);
+
+    // drop msgr lock while joining thread; the delay through could be
+    // trying to fast dispatch, preventing it from joining without
+    // blocking and deadlocking.
+    lock.Unlock();
     p->join();
+    lock.Lock();
+
     if (p->sd >= 0)
       ::close(p->sd);
     ldout(cct,10) << "reaper reaped pipe " << p << " " << p->get_peer_addr() << dendl;