From 3d4a899d6c5f0c3c4c663d5d6dbb78285fbd7672 Mon Sep 17 00:00:00 2001
From: Joshua Baergen <jbaergen@digitalocean.com>
Date: Tue, 2 Jul 2024 13:57:15 -0600
Subject: [PATCH] blk/KernelDevice: Fix several issues with stopping discard
 threads

1. In _discard_stop(), the wait for !discard_threads.empty() was there
   from a prior implementation where there could theoretically be a race
   between _discard_start() and _discard_stop(). If that race does
   exist, this check won't help; not only is _discard_stop() not called
   if discard_threads is empty, but discard_threads won't be populated
   until _discard_start() runs and thus this won't detect such a race.
2. Calling _discard_stop() from handle_conf_change() is a guaranteed
   deadlock because discard_lock is already held, so don't do that. Use
   the same flow whether we're stopping a subset of threads or all
   threads.
3. Asking a subset of discard threads to stop was not guaranteed to take
   effect, since if they continued to find contents in discard_queue
   then they would continue to run indefinitely. Add additional logic to
   _discard_thread() to have threads stop if they have been requested to
   stop and other threads exist to continue draining discard_queue.
4. Make the flow of _discard_stop() and handle_conf_change() more
   similar.

Fixes: https://tracker.ceph.com/issues/66817
Signed-off-by: Joshua Baergen <jbaergen@digitalocean.com>
---
 src/blk/kernel/KernelDevice.cc | 37 ++++++++++++++++------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc
index 057dd04cba898..4eb9702b6110b 100644
--- a/src/blk/kernel/KernelDevice.cc
+++ b/src/blk/kernel/KernelDevice.cc
@@ -562,21 +562,17 @@ void KernelDevice::_discard_stop()
   // Signal threads to stop, then wait for them to join
   {
     std::unique_lock l(discard_lock);
-    while (discard_threads.empty()) {
-      discard_cond.wait(l);
-    }
 
     for(auto &t : discard_threads) {
       t->stop = true;
+      t->detach();
     }
+    discard_threads.clear();
 
     discard_cond.notify_all();
   }
 
-  // Threads are shared pointers and are cleaned up for us
-  for(auto &t : discard_threads)
-    t->join();
-  discard_threads.clear();
+  discard_drain();
 
   dout(10) << __func__ << " stopped" << dendl;
 }
@@ -761,6 +757,13 @@ void KernelDevice::_discard_thread(uint64_t tid)
       discard_cond.wait(l);
       dout(20) << __func__ << " wake" << dendl;
     } else {
+      // If there are non-stopped discard threads and we have been requested
+      // to stop, do so now. Otherwise, we need to proceed because
+      // discard_queued is non-empty and at least one thread is needed to
+      // drain it.
+      if (thr->stop && !discard_threads.empty())
+        break;
+
       // Limit local processing to MAX_LOCAL_DISCARD items.
       // This will allow threads to work in parallel
       //      instead of a single thread taking over the whole discard_queued.
@@ -1547,19 +1550,13 @@ void KernelDevice::handle_conf_change(const ConfigProxy& conf,
     } else if (newval < oldval) {
       dout(10) << __func__ << " stopping " << (oldval - newval) << " existing discard threads" << dendl;
 
-      // Decreasing to zero is exactly the same as disabling async discard.
-      // Signal all threads to stop
-      if(newval == 0) {
-        _discard_stop();
-      } else {
-        // Signal the last threads to quit, and stop tracking them
-        for(uint64_t i = oldval - 1; i >= newval; i--)
-        {
-          // Also detach the thread so we no longer need to join
-          discard_threads[i]->stop = true;
-          discard_threads[i]->detach();
-          discard_threads.erase(discard_threads.begin() + i);
-        }
+      // Signal the last threads to quit, and stop tracking them
+      for(uint64_t i = oldval - 1; i >= newval && i != UINT64_MAX; i--)
+      {
+        // Also detach the thread so we no longer need to join
+        discard_threads[i]->stop = true;
+        discard_threads[i]->detach();
+        discard_threads.erase(discard_threads.begin() + i);
       }
 
       discard_cond.notify_all();
-- 
2.39.5