blk:Warning added for discard queue overflow

author Jaya Prakash <jayaprakash@ibm.com>

Thu, 9 Jan 2025 16:14:05 +0000 (21:44 +0530)

committer Jaya Prakash <jayaprakash@ibm.com>

Thu, 3 Jul 2025 07:07:02 +0000 (07:07 +0000)
author Jaya Prakash <jayaprakash@ibm.com>
Thu, 9 Jan 2025 16:14:05 +0000 (21:44 +0530)
committer Jaya Prakash <jayaprakash@ibm.com>
Thu, 3 Jul 2025 07:07:02 +0000 (07:07 +0000)
diff --git a/src/blk/BlockDevice.cc b/src/blk/BlockDevice.cc

index 8c06256d25477b9a612f8defc2b69cbe632d2fd6..e7c0a8e7da285b1233bd1f3756a67d89adbe7442 100644 (file)
--- a/src/blk/BlockDevice.cc
+++ b/src/blk/BlockDevice.cc
@@ -226,5 +226,26 @@ void BlockDevice::collect_alerts(osd_alert_list_t& alerts, const std::string& de
        alerts.emplace(device_name + "_DEVICE_STALLED_READ_ALERT", ss.str());
      }
    }
+  if (support_discard && cct->_conf->bdev_enable_discard) {
+    size_t current_discarded_bytes = discard_queue_bytes.load();
+    uint64_t current_discard_queue_items = discard_queue_length.load();
+
+    size_t discard_bytes_warn_threshold = static_cast<size_t>(0.8 * cct->_conf->bdev_discard_max_bytes);
+    uint64_t discard_items_warn_threshold =
+      static_cast<uint64_t>(0.8 * cct->_conf->bdev_async_discard_max_pending);
+
+    bool discard_queue_overload =
+      (current_discarded_bytes >= discard_bytes_warn_threshold) ||
+      (cct->_conf->bdev_async_discard_max_pending > 0 &&
+       current_discard_queue_items >= discard_items_warn_threshold);
+
+    if (discard_queue_overload) {
+      std::ostringstream ss;
+      ss << "Slow discard on " << device_name
+         << ", queue: " << current_discard_queue_items
+        << " items " << byte_u_t(current_discarded_bytes);
+      alerts.emplace(device_name + "_DEVICE_DISCARD_QUEUE", ss.str());
+    }
+  }
  }
  
diff --git a/src/blk/BlockDevice.h b/src/blk/BlockDevice.h

index cb795eaa5e5b5122e9e01c943b50d6f2ef220f3a..4b46f8ddcf2d248b577901a14d9ba452fa705f43 100644 (file)
--- a/src/blk/BlockDevice.h
+++ b/src/blk/BlockDevice.h
@@ -304,6 +304,9 @@ public:
  
    struct hugepaged_raw_marker_t {};
  
+  std::atomic<size_t> discard_queue_bytes = 0;
+  std::atomic<uint64_t> discard_queue_length = 0;
+
  protected:
    bool is_valid_io(uint64_t off, uint64_t len) const;
  };
diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc

index db06d3052863416b52e8db58a9337ec269f0856a..287aaa931d44d90a7ef19621881257606a1b56ea 100644 (file)
--- a/src/blk/kernel/KernelDevice.cc
+++ b/src/blk/kernel/KernelDevice.cc
@@ -20,6 +20,7 @@
  #include <fcntl.h>
  #include <sys/file.h>
  #include <sys/mman.h>
+#include <chrono>
  
  #include <boost/container/flat_map.hpp>
  #include <boost/lockfree/queue.hpp>
@@ -813,18 +814,25 @@ void KernelDevice::_discard_thread(uint64_t tid)
        if (thr->stop && !discard_threads.empty())
          break;
  
+      if (cct->_conf->bdev_debug_discard_sleep > 0)
+        std::this_thread::sleep_for(std::chrono::milliseconds(cct->_conf->bdev_debug_discard_sleep));
+
        // Limit local processing to MAX_LOCAL_DISCARD items.
        // This will allow threads to work in parallel
        //      instead of a single thread taking over the whole discard_queued.
        // It will also allow threads to finish in a timely manner.
        constexpr unsigned MAX_LOCAL_DISCARD = 32;
        unsigned count = 0;
+      size_t bytes_discarded = 0;
        for (auto it = discard_queued.begin();
             it != discard_queued.end() && count < MAX_LOCAL_DISCARD;
             ++count) {
          discard_processing.insert(it.get_start(), it.get_len());
+       bytes_discarded += it.get_len();
          it = discard_queued.erase(it);
        }
+      discard_queue_bytes -= bytes_discarded;
+      discard_queue_length = discard_queued.num_intervals();
  
        // there are multiple active threads -> must use a counter instead of a flag
        discard_running ++;
@@ -857,10 +865,26 @@ bool KernelDevice::_queue_discard(interval_set<uint64_t> &to_release)
  
    std::lock_guard l(discard_lock);
  
-  if (max_pending > 0 && discard_queued.num_intervals() >= max_pending)
+  if (max_pending > 0 && discard_queued.num_intervals() >= max_pending) {
+    discard_cond.notify_one();
      return false;
+  }
+
+  if(discard_queue_bytes >= cct->_conf->bdev_discard_max_bytes) {
+    discard_cond.notify_one();
+    return false;
+  }
  
    discard_queued.insert(to_release);
+
+  size_t discarded_bytes = 0;
+  for(auto p = to_release.begin(); p != to_release.end(); ++p){
+    discarded_bytes += p.get_len();
+  }
+  discard_queue_bytes += discarded_bytes;
+
+  discard_queue_length = discard_queued.num_intervals();
+
    discard_cond.notify_one();
    return true;
  }
author	Jaya Prakash <jayaprakash@ibm.com>
	Thu, 9 Jan 2025 16:14:05 +0000 (21:44 +0530)
committer	Jaya Prakash <jayaprakash@ibm.com>
	Thu, 3 Jul 2025 07:07:02 +0000 (07:07 +0000)
src/blk/BlockDevice.cc		patch \| blob \| history
src/blk/BlockDevice.h		patch \| blob \| history
src/blk/kernel/KernelDevice.cc		patch \| blob \| history