From: tone.zhang Date: Thu, 20 Dec 2018 10:12:38 +0000 (+0800) Subject: bluestore/NVMEDevice.cc: fix NVMEManager thread halt X-Git-Tag: v14.1.0~476^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F25646%2Fhead;p=ceph.git bluestore/NVMEDevice.cc: fix NVMEManager thread halt When enable SPDK in Ceph and start up Ceph development cluster, met NVMEManager thread halt. On aarch64 platform, the log as below: Starting SPDK v18.04.1 / DPDK 18.05.0 initialization... [ DPDK EAL parameters: nvme-device-manager -c 0x1 -m 2048 --file-prefix=spdk_pid16987 ] EAL: Detected 46 lcore(s) EAL: Detected 1 NUMA nodes EAL: Multi-process socket /var/run/dpdk/spdk_pid16987/mp_socket EAL: Probing VFIO support... EAL: VFIO support initialized EAL: PCI device 0000:01:00.0 on NUMA socket 0 EAL: probe driver: 8086:953 spdk_nvme EAL: using IOMMU type 1 (Type 1) ^C The reason is that pthread_cond_destroy() cannot destroy the active condition_variable parameter. Also on x86 debug builds we get the following error messages due to probe_queue_lock still being active during ~NVMEManager: /home/ubuntu/ceph/src/common/mutex_debug.h: 114: FAILED ceph_assert(r == 0) ceph version 14.0.1-1862-g403622b (403622be721a460f3dff2d84f6bfc628f5026704) nautilus (dev) The change fixes the issue. Fixes: http://tracker.ceph.com/issues/37720 Signed-off-by: tone.zhang Signed-off-by: Steve Capper --- diff --git a/src/os/bluestore/NVMEDevice.cc b/src/os/bluestore/NVMEDevice.cc index daec8fb6980a..acd9eb035465 100644 --- a/src/os/bluestore/NVMEDevice.cc +++ b/src/os/bluestore/NVMEDevice.cc @@ -477,7 +477,7 @@ class NVMEManager { private: ceph::mutex lock = ceph::make_mutex("NVMEManager::lock"); - bool init = false; + bool stopping = false; std::vector shared_driver_datas; std::thread dpdk_thread; ceph::mutex probe_queue_lock = ceph::make_mutex("NVMEManager::probe_queue_lock"); @@ -486,6 +486,17 @@ class NVMEManager { public: NVMEManager() {} + ~NVMEManager() { + if (!dpdk_thread.joinable()) + return; + { + std::lock_guard guard(probe_queue_lock); + stopping = true; + probe_queue_cond.notify_all(); + } + dpdk_thread.join(); + } + int try_get(const spdk_nvme_transport_id& trid, SharedDriverData **driver); void register_ctrlr(const spdk_nvme_transport_id& trid, spdk_nvme_ctrlr *c, SharedDriverData **driver) { ceph_assert(ceph_mutex_is_locked(lock)); @@ -569,9 +580,7 @@ int NVMEManager::try_get(const spdk_nvme_transport_id& trid, SharedDriverData ** uint32_t mem_size_arg = (uint32_t)g_conf().get_val("bluestore_spdk_mem"); - - if (!init) { - init = true; + if (!dpdk_thread.joinable()) { dpdk_thread = std::thread( [this, coremask_arg, m_core_arg, mem_size_arg]() { static struct spdk_env_opts opts; @@ -590,7 +599,7 @@ int NVMEManager::try_get(const spdk_nvme_transport_id& trid, SharedDriverData ** spdk_nvme_retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT; std::unique_lock l(probe_queue_lock); - while (true) { + while (!stopping) { if (!probe_queue.empty()) { ProbeContext* ctxt = probe_queue.front(); probe_queue.pop_front(); @@ -605,9 +614,11 @@ int NVMEManager::try_get(const spdk_nvme_transport_id& trid, SharedDriverData ** probe_queue_cond.wait(l); } } + for (auto p : probe_queue) + p->done = true; + probe_queue_cond.notify_all(); } ); - dpdk_thread.detach(); } ProbeContext ctx{trid, this, nullptr, false};