From: Chunsong Feng Date: Mon, 17 Aug 2020 11:59:34 +0000 (+0800) Subject: msg/async/rdma: avoid enqueue_dead_qp deadlock X-Git-Tag: v16.1.0~1278^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8a9412ee883704e8e35207f40c0bd03ae2578cfa;p=ceph.git msg/async/rdma: avoid enqueue_dead_qp deadlock In RDMADispatcher::handle_async_event, when case IBV_EVENT_QP_LAST_WQE_REACHED matched, enqueue_dead_qp wants the lock, result in deadlock here. Change enqueue_dead_qp to lockless. Fixes: https://tracker.ceph.com/issues/44298 Signed-off-by: Chunsong Feng Signed-off-by: luorixin --- diff --git a/src/msg/async/rdma/RDMAStack.cc b/src/msg/async/rdma/RDMAStack.cc index 5e88e7d01dd2..9d2dd8027530 100644 --- a/src/msg/async/rdma/RDMAStack.cc +++ b/src/msg/async/rdma/RDMAStack.cc @@ -178,8 +178,9 @@ void RDMADispatcher::handle_async_event() } else { conn->fault(); if (qp) { - if (!cct->_conf->ms_async_rdma_cm) - enqueue_dead_qp(qpn); + if (!cct->_conf->ms_async_rdma_cm) { + enqueue_dead_qp_lockless(qpn); + } } } } @@ -413,9 +414,8 @@ Infiniband::QueuePair* RDMADispatcher::get_qp(uint32_t qp) return get_qp_lockless(qp); } -void RDMADispatcher::enqueue_dead_qp(uint32_t qpn) +void RDMADispatcher::enqueue_dead_qp_lockless(uint32_t qpn) { - std::lock_guard l{lock}; auto it = qp_conns.find(qpn); if (it == qp_conns.end()) { lderr(cct) << __func__ << " QP [" << qpn << "] is not registered." << dendl; @@ -427,6 +427,12 @@ void RDMADispatcher::enqueue_dead_qp(uint32_t qpn) --num_qp_conn; } +void RDMADispatcher::enqueue_dead_qp(uint32_t qpn) +{ + std::lock_guard l{lock}; + enqueue_dead_qp_lockless(qpn); +} + void RDMADispatcher::schedule_qp_destroy(uint32_t qpn) { std::lock_guard l{lock}; diff --git a/src/msg/async/rdma/RDMAStack.h b/src/msg/async/rdma/RDMAStack.h index 84434b00cb77..ac33fcae8e78 100644 --- a/src/msg/async/rdma/RDMAStack.h +++ b/src/msg/async/rdma/RDMAStack.h @@ -76,7 +76,8 @@ class RDMADispatcher { ceph::make_mutex("RDMADispatcher::for worker pending list"); // fixme: lockfree std::list pending_workers; - void enqueue_dead_qp(uint32_t qp); + void enqueue_dead_qp_lockless(uint32_t qp); + void enqueue_dead_qp(uint32_t qpn); public: PerfCounters *perf_logger;