]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
msg: fix deadlock when handling existing but closed v2 connection 46732/head
authorRadosław Zarzyński <rzarzyns@redhat.com>
Fri, 17 Jun 2022 12:17:25 +0000 (14:17 +0200)
committerRadosław Zarzyński <rzarzyns@redhat.com>
Fri, 17 Jun 2022 12:27:50 +0000 (14:27 +0200)
The deadlock is illustrated best by the following snippet
provided by jianwei zhang who also made the problem analysis
(many thanks!).

```
thread-35
AsyncMessenger::shutdown_connections         hold             AsyncMessenger::lock            std::lock_guard l{lock}
AsyncConnection::stop                         wait                AsyncConnection::lock            lock.lock()

thread-3
ProtocolV2::handle_existing_connection        hold                AsyncConnection::lock            std::lock_guard<std::mutex> l(existing->lock)
AsyncMessenger::accept_conn                wait                AsyncMessenger::lock            std::lock_guard l{lock}
```

Fixes: https://tracker.ceph.com/issues/55355
Signed-off-by: Radosław Zarzyński <rzarzyns@redhat.com>
src/msg/async/ProtocolV2.cc

index 74d94a87e1861aa719fbf9d6c9dbbc3eb6d0a4c5..0cd5f96ac2b192b9e79daeaec595c6626e6b8eb3 100644 (file)
@@ -2603,7 +2603,7 @@ CtPtr ProtocolV2::handle_reconnect(ceph::bufferlist &payload)
 CtPtr ProtocolV2::handle_existing_connection(const AsyncConnectionRef& existing) {
   ldout(cct, 20) << __func__ << " existing=" << existing << dendl;
 
-  std::lock_guard<std::mutex> l(existing->lock);
+  std::unique_lock<std::mutex> l(existing->lock);
 
   ProtocolV2 *exproto = dynamic_cast<ProtocolV2 *>(existing->protocol.get());
   if (!exproto) {
@@ -2614,6 +2614,7 @@ CtPtr ProtocolV2::handle_existing_connection(const AsyncConnectionRef& existing)
   if (exproto->state == CLOSED) {
     ldout(cct, 1) << __func__ << " existing " << existing << " already closed."
                   << dendl;
+    l.unlock();
     return send_server_ident();
   }
 
@@ -2643,6 +2644,7 @@ CtPtr ProtocolV2::handle_existing_connection(const AsyncConnectionRef& existing)
         << dendl;
     existing->protocol->stop();
     existing->dispatch_queue->queue_reset(existing.get());
+    l.unlock();
     return send_server_ident();
   }