From cf861523cf42f034ce4f0b128cc48e100d1bf486 Mon Sep 17 00:00:00 2001 From: Gabriel BenHanokh Date: Sun, 7 Apr 2024 10:57:14 +0000 Subject: [PATCH] os/BlueStore: NCB fix for leaked space when bdev_async_discard is enabled Fix calls bdev->discard_drain() before calling store_allocator() to make sure all freed space is reflected in the allocator before destaging it The fix set a timeout for the drain call (500msec) and if expires will not store the allocator (forcing a recovery on the next startup) Fixes: https://tracker.ceph.com/issues/65298 Signed-off-by: Gabriel BenHanokh (cherry picked from commit 3aa891dbf6dafd3b1983fbe2efcbfd1d11d52b40) --- src/blk/BlockDevice.h | 2 +- src/blk/kernel/KernelDevice.cc | 24 +++++++++++++++++++++++- src/blk/kernel/KernelDevice.h | 2 +- src/os/bluestore/BlueStore.cc | 14 +++++++++++--- 4 files changed, 36 insertions(+), 6 deletions(-) diff --git a/src/blk/BlockDevice.h b/src/blk/BlockDevice.h index 6c55646fc76..7431b062dce 100644 --- a/src/blk/BlockDevice.h +++ b/src/blk/BlockDevice.h @@ -285,7 +285,7 @@ public: int write_hint = WRITE_LIFE_NOT_SET) = 0; virtual int flush() = 0; virtual bool try_discard(interval_set &to_release, bool async=true) { return false; } - virtual void discard_drain() { return; } + virtual int discard_drain(uint32_t timeout_msec = 0) { return 0; } // for managing buffered readers/writers virtual int invalidate_cache(uint64_t off, uint64_t len) = 0; diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc index 6337292f5de..716004e6c89 100644 --- a/src/blk/kernel/KernelDevice.cc +++ b/src/blk/kernel/KernelDevice.cc @@ -587,13 +587,35 @@ bool KernelDevice::_discard_started() return !discard_threads.empty(); } -void KernelDevice::discard_drain() +int KernelDevice::discard_drain(uint32_t timeout_msec = 0) { dout(10) << __func__ << dendl; + bool check_timeout = false; + utime_t end_time; + if (timeout_msec) { + check_timeout = true; + uint32_t timeout_sec = 0; + if (timeout_msec >= 1000) { + timeout_sec = (timeout_msec / 1000); + timeout_msec = (timeout_msec % 1000); + } + end_time = ceph_clock_now(); + // add the timeout after converting from msec to nsec + end_time.tv.tv_nsec += (timeout_msec * (1000*1000)); + if (end_time.tv.tv_nsec > (1000*1000*1000)) { + end_time.tv.tv_nsec -= (1000*1000*1000); + end_time.tv.tv_sec += 1; + } + end_time.tv.tv_sec += timeout_sec; + } std::unique_lock l(discard_lock); while (!discard_queued.empty() || discard_running) { discard_cond.wait(l); + if (check_timeout && ceph_clock_now() > end_time) { + return -1; + } } + return 0; } static bool is_expected_ioerr(const int r) diff --git a/src/blk/kernel/KernelDevice.h b/src/blk/kernel/KernelDevice.h index 914f05e64c4..b7b0a7dc38b 100644 --- a/src/blk/kernel/KernelDevice.h +++ b/src/blk/kernel/KernelDevice.h @@ -123,7 +123,7 @@ public: ~KernelDevice(); void aio_submit(IOContext *ioc) override; - void discard_drain() override; + int discard_drain(uint32_t timeout_msec) override; int collect_metadata(const std::string& prefix, std::map *pm) const override; int get_devname(std::string *s) const override { diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index f8ab4720b64..108afc4038d 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -7754,9 +7754,17 @@ void BlueStore::_close_db() db = nullptr; if (do_destage && fm && fm->is_null_manager()) { - int ret = store_allocator(alloc); - if (ret != 0) { - derr << __func__ << "::NCB::store_allocator() failed (continue with bitmapFreelistManager)" << dendl; + // force all backgrounds discards to be committed before storing allocator + // set timeout to 500msec + int ret = bdev->discard_drain(500); + if (ret == 0) { + ret = store_allocator(alloc); + if (unlikely(ret != 0)) { + derr << __func__ << "::NCB::store_allocator() failed (we will need to rebuild it on startup)" << dendl; + } + } + else { + derr << __func__ << "::NCB::discard_drain() exceeded timeout (abort!)" << dendl; } } -- 2.39.5