]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
bluestore/NVMEDevice.cc: fix NVMEManager thread halt 25646/head
authortone.zhang <tone.zhang@arm.com>
Thu, 20 Dec 2018 10:12:38 +0000 (18:12 +0800)
committertone.zhang <tone.zhang@arm.com>
Mon, 7 Jan 2019 03:14:43 +0000 (11:14 +0800)
When enable SPDK in Ceph and start up Ceph development cluster, met
NVMEManager thread halt.

On aarch64 platform, the log as below:

Starting SPDK v18.04.1 / DPDK 18.05.0 initialization...
[ DPDK EAL parameters: nvme-device-manager -c 0x1 -m 2048 --file-prefix=spdk_pid16987 ]
EAL: Detected 46 lcore(s)
EAL: Detected 1 NUMA nodes
EAL: Multi-process socket /var/run/dpdk/spdk_pid16987/mp_socket
EAL: Probing VFIO support...
EAL: VFIO support initialized
EAL: PCI device 0000:01:00.0 on NUMA socket 0
EAL:   probe driver: 8086:953 spdk_nvme
EAL:   using IOMMU type 1 (Type 1)
^C

The reason is that pthread_cond_destroy() cannot destroy the active
condition_variable parameter.

Also on x86 debug builds we get the following error messages due to
probe_queue_lock still being active during ~NVMEManager:

/home/ubuntu/ceph/src/common/mutex_debug.h: 114: FAILED ceph_assert(r == 0)
 ceph version 14.0.1-1862-g403622b (403622be721a460f3dff2d84f6bfc628f5026704) nautilus (dev)

The change fixes the issue.

Fixes: http://tracker.ceph.com/issues/37720
Signed-off-by: tone.zhang <tone.zhang@arm.com>
Signed-off-by: Steve Capper <steve.capper@arm.com>
src/os/bluestore/NVMEDevice.cc

index daec8fb6980a8941cd7e196761e09ae0a65a4199..acd9eb0354659ea7bc680654165296e5f8b11b7a 100644 (file)
@@ -477,7 +477,7 @@ class NVMEManager {
 
  private:
   ceph::mutex lock = ceph::make_mutex("NVMEManager::lock");
-  bool init = false;
+  bool stopping = false;
   std::vector<SharedDriverData*> shared_driver_datas;
   std::thread dpdk_thread;
   ceph::mutex probe_queue_lock = ceph::make_mutex("NVMEManager::probe_queue_lock");
@@ -486,6 +486,17 @@ class NVMEManager {
 
  public:
   NVMEManager() {}
+  ~NVMEManager() {
+    if (!dpdk_thread.joinable())
+      return;
+    {
+      std::lock_guard guard(probe_queue_lock);
+      stopping = true;
+      probe_queue_cond.notify_all();
+    }
+    dpdk_thread.join();
+  }
+
   int try_get(const spdk_nvme_transport_id& trid, SharedDriverData **driver);
   void register_ctrlr(const spdk_nvme_transport_id& trid, spdk_nvme_ctrlr *c, SharedDriverData **driver) {
     ceph_assert(ceph_mutex_is_locked(lock));
@@ -569,9 +580,7 @@ int NVMEManager::try_get(const spdk_nvme_transport_id& trid, SharedDriverData **
 
   uint32_t mem_size_arg = (uint32_t)g_conf().get_val<Option::size_t>("bluestore_spdk_mem");
 
-
-  if (!init) {
-    init = true;
+  if (!dpdk_thread.joinable()) {
     dpdk_thread = std::thread(
       [this, coremask_arg, m_core_arg, mem_size_arg]() {
         static struct spdk_env_opts opts;
@@ -590,7 +599,7 @@ int NVMEManager::try_get(const spdk_nvme_transport_id& trid, SharedDriverData **
           spdk_nvme_retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT;
 
         std::unique_lock l(probe_queue_lock);
-        while (true) {
+        while (!stopping) {
           if (!probe_queue.empty()) {
             ProbeContext* ctxt = probe_queue.front();
             probe_queue.pop_front();
@@ -605,9 +614,11 @@ int NVMEManager::try_get(const spdk_nvme_transport_id& trid, SharedDriverData **
             probe_queue_cond.wait(l);
           }
         }
+        for (auto p : probe_queue)
+          p->done = true;
+        probe_queue_cond.notify_all();
       }
     );
-    dpdk_thread.detach();
   }
 
   ProbeContext ctx{trid, this, nullptr, false};