blk/KernelDevice: Introduce a cap on the number of pending discards

author Joshua Baergen <jbaergen@digitalocean.com>

Wed, 18 Dec 2024 17:27:58 +0000 (10:27 -0700)

committer Igor Fedotov <igor.fedotov@croit.io>

Tue, 11 Mar 2025 10:00:13 +0000 (13:00 +0300)
author Joshua Baergen <jbaergen@digitalocean.com>
Wed, 18 Dec 2024 17:27:58 +0000 (10:27 -0700)
committer Igor Fedotov <igor.fedotov@croit.io>
Tue, 11 Mar 2025 10:00:13 +0000 (13:00 +0300)
diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc

index cb2b06a3de46eca832cc9ba15f371172326312c5..eeeab21082c8ba70116b230ef201f94afa9dd0d2 100644 (file)
--- a/src/blk/kernel/KernelDevice.cc
+++ b/src/blk/kernel/KernelDevice.cc
@@ -801,14 +801,21 @@ void KernelDevice::_discard_thread(uint64_t tid)
  
  // this is private and is expected that the caller checks that discard
  // threads are running via _discard_started()
-void KernelDevice::_queue_discard(interval_set<uint64_t> &to_release)
+bool KernelDevice::_queue_discard(interval_set<uint64_t> &to_release)
  {
    if (to_release.empty())
-    return;
+    return false;
+
+  auto max_pending = cct->_conf->bdev_async_discard_max_pending;
  
    std::lock_guard l(discard_lock);
+
+  if (max_pending > 0 && discard_queued.num_intervals() >= max_pending)
+    return false;
+
    discard_queued.insert(to_release);
    discard_cond.notify_one();
+  return true;
  }
  
  // return true only if discard was queued, so caller won't have to do
@@ -819,8 +826,7 @@ bool KernelDevice::try_discard(interval_set<uint64_t> &to_release, bool async)
      return false;
  
    if (async && _discard_started()) {
-    _queue_discard(to_release);
-    return true;
+    return _queue_discard(to_release);
    } else {
      for (auto p = to_release.begin(); p != to_release.end(); ++p) {
        _discard(p.get_start(), p.get_len());
diff --git a/src/blk/kernel/KernelDevice.h b/src/blk/kernel/KernelDevice.h

index 99098d7fe401a760941b3ce2a954526174e8c66f..f54e9cae4b92bb6258483daa20225a50ef75f3fd 100644 (file)
--- a/src/blk/kernel/KernelDevice.h
+++ b/src/blk/kernel/KernelDevice.h
@@ -87,7 +87,7 @@ private:
  
    void _aio_thread();
    void _discard_thread(uint64_t tid);
-  void _queue_discard(interval_set<uint64_t> &to_release);
+  bool _queue_discard(interval_set<uint64_t> &to_release);
    bool try_discard(interval_set<uint64_t> &to_release, bool async = true) override;
  
    int _aio_start();
diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in

index 5955a3dfe3e4ccc630ca728c6179e2bed09285b3..d88a5f691fbcdd21c0c86c303a1dd7296e5facd1 100644 (file)
--- a/src/common/options/global.yaml.in
+++ b/src/common/options/global.yaml.in
@@ -4067,6 +4067,23 @@ options:
    - runtime
    see_also:
    - bdev_enable_discard
+  - bdev_async_discard_max_pending
+- name: bdev_async_discard_max_pending
+  desc: maximum number of pending discards
+  long_desc: The maximum number of pending async discards that can be queued and not claimed by an
+    async discard thread. Discards will not be issued once the queue is full and blocks will be
+    freed back to the allocator immediately instead. This is useful if you have a device with slow
+    discard performance that can't keep up to a consistently high write workload. 0 means
+    'unlimited'.
+  type: uint
+  level: advanced
+  default: 1000000
+  min: 0
+  with_legacy: true
+  flags:
+  - runtime
+  see_also:
+  - bdev_async_discard_threads
  - name: bdev_flock_retry_interval
    type: float
    level: advanced
author	Joshua Baergen <jbaergen@digitalocean.com>
	Wed, 18 Dec 2024 17:27:58 +0000 (10:27 -0700)
committer	Igor Fedotov <igor.fedotov@croit.io>
	Tue, 11 Mar 2025 10:00:13 +0000 (13:00 +0300)
src/blk/kernel/KernelDevice.cc		patch \| blob \| history
src/blk/kernel/KernelDevice.h		patch \| blob \| history
src/common/options/global.yaml.in		patch \| blob \| history