From 94168dcd02ff0c2a771e504a531ee5b04e575d25 Mon Sep 17 00:00:00 2001
From: Jaya Prakash <jayaprakash@ibm.com>
Date: Thu, 9 Jan 2025 21:44:05 +0530
Subject: [PATCH] blk:Warning added for discard queue overflow

Added a health warning mechanism to monitor the discard queue for potential overload
Emits a warning if the accumulated discarded bytes in the queue exceed the configured threshold
Introduced a debugging tool to simulate slow discard operations by adding a configurable delay

Fixes : https://tracker.ceph.com/issues/69082

Signed-off-by: Jaya Prakash <jayaprakash@ibm.com>
---
 src/blk/BlockDevice.cc         | 21 +++++++++++++++++++++
 src/blk/BlockDevice.h          |  3 +++
 src/blk/kernel/KernelDevice.cc | 26 +++++++++++++++++++++++++-
 3 files changed, 49 insertions(+), 1 deletion(-)
diff --git a/src/blk/BlockDevice.cc b/src/blk/BlockDevice.cc
index 8c06256d25477..e7c0a8e7da285 100644
--- a/src/blk/BlockDevice.cc
+++ b/src/blk/BlockDevice.cc
@@ -226,5 +226,26 @@ void BlockDevice::collect_alerts(osd_alert_list_t& alerts, const std::string& de
       alerts.emplace(device_name + "_DEVICE_STALLED_READ_ALERT", ss.str());
     }
   }
+  if (support_discard && cct->_conf->bdev_enable_discard) {
+    size_t current_discarded_bytes = discard_queue_bytes.load();
+    uint64_t current_discard_queue_items = discard_queue_length.load();
+
+    size_t discard_bytes_warn_threshold = static_cast<size_t>(0.8 * cct->_conf->bdev_discard_max_bytes);
+    uint64_t discard_items_warn_threshold =
+      static_cast<uint64_t>(0.8 * cct->_conf->bdev_async_discard_max_pending);
+
+    bool discard_queue_overload =
+      (current_discarded_bytes >= discard_bytes_warn_threshold) ||
+      (cct->_conf->bdev_async_discard_max_pending > 0 &&
+       current_discard_queue_items >= discard_items_warn_threshold);
+
+    if (discard_queue_overload) {
+      std::ostringstream ss;
+      ss << "Slow discard on " << device_name
+         << ", queue: " << current_discard_queue_items
+	 << " items " << byte_u_t(current_discarded_bytes);
+      alerts.emplace(device_name + "_DEVICE_DISCARD_QUEUE", ss.str());
+    }
+  }
 }
 
diff --git a/src/blk/BlockDevice.h b/src/blk/BlockDevice.h
index cb795eaa5e5b5..4b46f8ddcf2d2 100644
--- a/src/blk/BlockDevice.h
+++ b/src/blk/BlockDevice.h
@@ -304,6 +304,9 @@ public:
 
   struct hugepaged_raw_marker_t {};
 
+  std::atomic<size_t> discard_queue_bytes = 0;
+  std::atomic<uint64_t> discard_queue_length = 0;
+
 protected:
   bool is_valid_io(uint64_t off, uint64_t len) const;
 };
diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc
index db06d30528634..287aaa931d44d 100644
--- a/src/blk/kernel/KernelDevice.cc
+++ b/src/blk/kernel/KernelDevice.cc
@@ -20,6 +20,7 @@
 #include <fcntl.h>
 #include <sys/file.h>
 #include <sys/mman.h>
+#include <chrono>
 
 #include <boost/container/flat_map.hpp>
 #include <boost/lockfree/queue.hpp>
@@ -813,18 +814,25 @@ void KernelDevice::_discard_thread(uint64_t tid)
       if (thr->stop && !discard_threads.empty())
         break;
 
+      if (cct->_conf->bdev_debug_discard_sleep > 0)
+        std::this_thread::sleep_for(std::chrono::milliseconds(cct->_conf->bdev_debug_discard_sleep));
+
       // Limit local processing to MAX_LOCAL_DISCARD items.
       // This will allow threads to work in parallel
       //      instead of a single thread taking over the whole discard_queued.
       // It will also allow threads to finish in a timely manner.
       constexpr unsigned MAX_LOCAL_DISCARD = 32;
       unsigned count = 0;
+      size_t bytes_discarded = 0;
       for (auto it = discard_queued.begin();
            it != discard_queued.end() && count < MAX_LOCAL_DISCARD;
            ++count) {
         discard_processing.insert(it.get_start(), it.get_len());
+	bytes_discarded += it.get_len();
         it = discard_queued.erase(it);
       }
+      discard_queue_bytes -= bytes_discarded;
+      discard_queue_length = discard_queued.num_intervals();
 
       // there are multiple active threads -> must use a counter instead of a flag
       discard_running ++;
@@ -857,10 +865,26 @@ bool KernelDevice::_queue_discard(interval_set<uint64_t> &to_release)
 
   std::lock_guard l(discard_lock);
 
-  if (max_pending > 0 && discard_queued.num_intervals() >= max_pending)
+  if (max_pending > 0 && discard_queued.num_intervals() >= max_pending) {
+    discard_cond.notify_one();
     return false;
+  }
+
+  if(discard_queue_bytes >= cct->_conf->bdev_discard_max_bytes) {
+    discard_cond.notify_one();
+    return false;
+  }
 
   discard_queued.insert(to_release);
+
+  size_t discarded_bytes = 0;
+  for(auto p = to_release.begin(); p != to_release.end(); ++p){
+    discarded_bytes += p.get_len();
+  }
+  discard_queue_bytes += discarded_bytes;
+
+  discard_queue_length = discard_queued.num_intervals();
+
   discard_cond.notify_one();
   return true;
 }
-- 
2.39.5