From 1fe8412bedd2fdce3352a5e96b078d1a88d35cea Mon Sep 17 00:00:00 2001 From: skanta Date: Tue, 5 Nov 2024 16:19:39 +0530 Subject: [PATCH] Revert "On graceful shutdown we will wait for discard queue to drain before storing the allocator." This reverts commit d65eebc478d0a9bd366f3b5cc38fb4c84aa00dc8. The commit was merged prematurely without testing. Signed-off-by: Srinivasa Bharath Kanta --- src/blk/BlockDevice.h | 4 ++-- src/blk/kernel/KernelDevice.cc | 24 +++++++++++++++++++++++- src/blk/kernel/KernelDevice.h | 4 ++-- src/os/bluestore/BlueStore.cc | 25 +++++++++---------------- 4 files changed, 36 insertions(+), 21 deletions(-) diff --git a/src/blk/BlockDevice.h b/src/blk/BlockDevice.h index 7c3050d2e317a..3c93cbdeeb189 100644 --- a/src/blk/BlockDevice.h +++ b/src/blk/BlockDevice.h @@ -292,8 +292,8 @@ public: int write_hint = WRITE_LIFE_NOT_SET) = 0; virtual int flush() = 0; virtual bool try_discard(interval_set &to_release, bool async=true) { return false; } - virtual void discard_drain() { return; } - virtual const interval_set* get_discard_queued() { return nullptr;} + virtual int discard_drain(uint32_t timeout_msec = 0) { return 0; } + // for managing buffered readers/writers virtual int invalidate_cache(uint64_t off, uint64_t len) = 0; virtual int open(const std::string& path) = 0; diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc index f6edaf98e8304..07927f237cce1 100644 --- a/src/blk/kernel/KernelDevice.cc +++ b/src/blk/kernel/KernelDevice.cc @@ -587,13 +587,35 @@ bool KernelDevice::_discard_started() return !discard_threads.empty(); } -void KernelDevice::discard_drain() +int KernelDevice::discard_drain(uint32_t timeout_msec = 0) { dout(10) << __func__ << dendl; + bool check_timeout = false; + utime_t end_time; + if (timeout_msec) { + check_timeout = true; + uint32_t timeout_sec = 0; + if (timeout_msec >= 1000) { + timeout_sec = (timeout_msec / 1000); + timeout_msec = (timeout_msec % 1000); + } + end_time = ceph_clock_now(); + // add the timeout after converting from msec to nsec + end_time.tv.tv_nsec += (timeout_msec * (1000*1000)); + if (end_time.tv.tv_nsec > (1000*1000*1000)) { + end_time.tv.tv_nsec -= (1000*1000*1000); + end_time.tv.tv_sec += 1; + } + end_time.tv.tv_sec += timeout_sec; + } std::unique_lock l(discard_lock); while (!discard_queued.empty() || discard_running) { discard_cond.wait(l); + if (check_timeout && ceph_clock_now() > end_time) { + return -1; + } } + return 0; } static bool is_expected_ioerr(const int r) diff --git a/src/blk/kernel/KernelDevice.h b/src/blk/kernel/KernelDevice.h index 2b3f3943d77b8..b7b0a7dc38bc1 100644 --- a/src/blk/kernel/KernelDevice.h +++ b/src/blk/kernel/KernelDevice.h @@ -123,8 +123,8 @@ public: ~KernelDevice(); void aio_submit(IOContext *ioc) override; - void discard_drain() override; - const interval_set* get_discard_queued() override { return &discard_queued;} + int discard_drain(uint32_t timeout_msec) override; + int collect_metadata(const std::string& prefix, std::map *pm) const override; int get_devname(std::string *s) const override { if (devname.empty()) { diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index b3e130fac7b81..bf92ccf6558a1 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -8038,24 +8038,17 @@ void BlueStore::_close_db() db = nullptr; if (do_destage && fm && fm->is_null_manager()) { - if (cct->_conf->osd_fast_shutdown == false) { - // graceful shutdown -> commit backgrounds discards before storing allocator - bdev->discard_drain(); - } - - auto discard_queued = bdev->get_discard_queued(); - if (discard_queued && (discard_queued->num_intervals() > 0)) { - dout(10) << __func__ << "::discard_drain: size=" << discard_queued->size() - << " num_intervals=" << discard_queued->num_intervals() << dendl; - // copy discard_queued to the allocator before storing it - for (auto p = discard_queued->begin(); p != discard_queued->end(); ++p) { - dout(20) << __func__ << "::discarded-extent=[" << p.get_start() << ", " << p.get_len() << "]" << dendl; - alloc->init_add_free(p.get_start(), p.get_len()); + // force all backgrounds discards to be committed before storing allocator + // set timeout to 500msec + int ret = bdev->discard_drain(500); + if (ret == 0) { + ret = store_allocator(alloc); + if (unlikely(ret != 0)) { + derr << __func__ << "::NCB::store_allocator() failed (we will need to rebuild it on startup)" << dendl; } } - int ret = store_allocator(alloc); - if (unlikely(ret != 0)) { - derr << __func__ << "::NCB::store_allocator() failed (we will need to rebuild it on startup)" << dendl; + else { + derr << __func__ << "::NCB::discard_drain() exceeded timeout (abort!)" << dendl; } } -- 2.39.5