]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
msg/async/rdma: avoid enqueue_dead_qp deadlock 33852/head
authorChunsong Feng <fengchunsong@huawei.com>
Mon, 17 Aug 2020 11:59:34 +0000 (19:59 +0800)
committerluo rixin <luorixin@huawei.com>
Sat, 29 Aug 2020 06:26:26 +0000 (14:26 +0800)
In RDMADispatcher::handle_async_event, when case
IBV_EVENT_QP_LAST_WQE_REACHED matched, enqueue_dead_qp wants the lock,
result in deadlock here. Change enqueue_dead_qp to lockless.

Fixes: https://tracker.ceph.com/issues/44298
Signed-off-by: Chunsong Feng <fengchunsong@huawei.com>
Signed-off-by: luorixin <luorixin@huawei.com>
src/msg/async/rdma/RDMAStack.cc
src/msg/async/rdma/RDMAStack.h

index 5e88e7d01dd29925a61fa47063e3c8becf6e9752..9d2dd8027530bd3fbd21a17f69885d465c21a529 100644 (file)
@@ -178,8 +178,9 @@ void RDMADispatcher::handle_async_event()
           } else {
              conn->fault();
              if (qp) {
-                if (!cct->_conf->ms_async_rdma_cm)
-                enqueue_dead_qp(qpn);
+                if (!cct->_conf->ms_async_rdma_cm) {
+                  enqueue_dead_qp_lockless(qpn);
+                }
              }
           }
         }
@@ -413,9 +414,8 @@ Infiniband::QueuePair* RDMADispatcher::get_qp(uint32_t qp)
   return get_qp_lockless(qp);
 }
 
-void RDMADispatcher::enqueue_dead_qp(uint32_t qpn)
+void RDMADispatcher::enqueue_dead_qp_lockless(uint32_t qpn)
 {
-  std::lock_guard l{lock};
   auto it = qp_conns.find(qpn);
   if (it == qp_conns.end()) {
     lderr(cct) << __func__ << " QP [" << qpn << "] is not registered." << dendl;
@@ -427,6 +427,12 @@ void RDMADispatcher::enqueue_dead_qp(uint32_t qpn)
   --num_qp_conn;
 }
 
+void RDMADispatcher::enqueue_dead_qp(uint32_t qpn)
+{
+  std::lock_guard l{lock};
+  enqueue_dead_qp_lockless(qpn);
+}
+
 void RDMADispatcher::schedule_qp_destroy(uint32_t qpn)
 {
   std::lock_guard l{lock};
index 84434b00cb77dc361668c7f02dab1da31947687c..ac33fcae8e7877047fb6fbfb50963efa489c5948 100644 (file)
@@ -76,7 +76,8 @@ class RDMADispatcher {
     ceph::make_mutex("RDMADispatcher::for worker pending list");
   // fixme: lockfree
   std::list<RDMAWorker*> pending_workers;
-  void enqueue_dead_qp(uint32_t qp);
+  void enqueue_dead_qp_lockless(uint32_t qp);
+  void enqueue_dead_qp(uint32_t qpn);
 
  public:
   PerfCounters *perf_logger;