]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
msg: fix deadlock when handling existing but closed v2 connection 47930/head
authorRadosław Zarzyński <rzarzyns@redhat.com>
Fri, 17 Jun 2022 12:17:25 +0000 (14:17 +0200)
committerRadoslaw Zarzynski <rzarzyns@redhat.com>
Fri, 2 Sep 2022 16:48:29 +0000 (16:48 +0000)
The deadlock is illustrated best by the following snippet
provided by jianwei zhang who also made the problem analysis
(many thanks!).

```
thread-35
AsyncMessenger::shutdown_connections         hold             AsyncMessenger::lock            std::lock_guard l{lock}
AsyncConnection::stop                         wait                AsyncConnection::lock            lock.lock()

thread-3
ProtocolV2::handle_existing_connection        hold                AsyncConnection::lock            std::lock_guard<std::mutex> l(existing->lock)
AsyncMessenger::accept_conn                wait                AsyncMessenger::lock            std::lock_guard l{lock}
```

Fixes: https://tracker.ceph.com/issues/55355
Signed-off-by: Radosław Zarzyński <rzarzyns@redhat.com>
(cherry picked from commit a6fcb1ccbc44e60416eb5f4e2c7291afe3a9d44d)

src/msg/async/ProtocolV2.cc

index a176fc2c808ac9eb8d465310781daf4eacc20b98..40ebbf01633420049e1f6f6017516ea0a473f442 100644 (file)
@@ -2596,7 +2596,7 @@ CtPtr ProtocolV2::handle_reconnect(ceph::bufferlist &payload)
 CtPtr ProtocolV2::handle_existing_connection(const AsyncConnectionRef& existing) {
   ldout(cct, 20) << __func__ << " existing=" << existing << dendl;
 
-  std::lock_guard<std::mutex> l(existing->lock);
+  std::unique_lock<std::mutex> l(existing->lock);
 
   ProtocolV2 *exproto = dynamic_cast<ProtocolV2 *>(existing->protocol.get());
   if (!exproto) {
@@ -2607,6 +2607,7 @@ CtPtr ProtocolV2::handle_existing_connection(const AsyncConnectionRef& existing)
   if (exproto->state == CLOSED) {
     ldout(cct, 1) << __func__ << " existing " << existing << " already closed."
                   << dendl;
+    l.unlock();
     return send_server_ident();
   }
 
@@ -2636,6 +2637,7 @@ CtPtr ProtocolV2::handle_existing_connection(const AsyncConnectionRef& existing)
         << dendl;
     existing->protocol->stop();
     existing->dispatch_queue->queue_reset(existing.get());
+    l.unlock();
     return send_server_ident();
   }