From 4762ffaaa19ca2afe8864fa1a6fb67ceea3aa640 Mon Sep 17 00:00:00 2001 From: Gabriel BenHanokh Date: Mon, 8 Apr 2024 11:15:47 +0000 Subject: [PATCH] On graceful shutdown we will wait for discard queue to drain before storing the allocator. ON fast shutdown we will simply copy the discard queue entries to the allocator Signed-off-by: Gabriel BenHanokh --- src/blk/BlockDevice.h | 4 ++-- src/blk/kernel/KernelDevice.cc | 24 +----------------------- src/blk/kernel/KernelDevice.h | 4 ++-- src/os/bluestore/BlueStore.cc | 25 ++++++++++++++++--------- 4 files changed, 21 insertions(+), 36 deletions(-) diff --git a/src/blk/BlockDevice.h b/src/blk/BlockDevice.h index 7431b062dce..e46948ced34 100644 --- a/src/blk/BlockDevice.h +++ b/src/blk/BlockDevice.h @@ -285,8 +285,8 @@ public: int write_hint = WRITE_LIFE_NOT_SET) = 0; virtual int flush() = 0; virtual bool try_discard(interval_set &to_release, bool async=true) { return false; } - virtual int discard_drain(uint32_t timeout_msec = 0) { return 0; } - + virtual void discard_drain() { return; } + virtual const interval_set* get_discard_queued() { return nullptr;} // for managing buffered readers/writers virtual int invalidate_cache(uint64_t off, uint64_t len) = 0; virtual int open(const std::string& path) = 0; diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc index 716004e6c89..6337292f5de 100644 --- a/src/blk/kernel/KernelDevice.cc +++ b/src/blk/kernel/KernelDevice.cc @@ -587,35 +587,13 @@ bool KernelDevice::_discard_started() return !discard_threads.empty(); } -int KernelDevice::discard_drain(uint32_t timeout_msec = 0) +void KernelDevice::discard_drain() { dout(10) << __func__ << dendl; - bool check_timeout = false; - utime_t end_time; - if (timeout_msec) { - check_timeout = true; - uint32_t timeout_sec = 0; - if (timeout_msec >= 1000) { - timeout_sec = (timeout_msec / 1000); - timeout_msec = (timeout_msec % 1000); - } - end_time = ceph_clock_now(); - // add the timeout after converting from msec to nsec - end_time.tv.tv_nsec += (timeout_msec * (1000*1000)); - if (end_time.tv.tv_nsec > (1000*1000*1000)) { - end_time.tv.tv_nsec -= (1000*1000*1000); - end_time.tv.tv_sec += 1; - } - end_time.tv.tv_sec += timeout_sec; - } std::unique_lock l(discard_lock); while (!discard_queued.empty() || discard_running) { discard_cond.wait(l); - if (check_timeout && ceph_clock_now() > end_time) { - return -1; - } } - return 0; } static bool is_expected_ioerr(const int r) diff --git a/src/blk/kernel/KernelDevice.h b/src/blk/kernel/KernelDevice.h index b7b0a7dc38b..2b3f3943d77 100644 --- a/src/blk/kernel/KernelDevice.h +++ b/src/blk/kernel/KernelDevice.h @@ -123,8 +123,8 @@ public: ~KernelDevice(); void aio_submit(IOContext *ioc) override; - int discard_drain(uint32_t timeout_msec) override; - + void discard_drain() override; + const interval_set* get_discard_queued() override { return &discard_queued;} int collect_metadata(const std::string& prefix, std::map *pm) const override; int get_devname(std::string *s) const override { if (devname.empty()) { diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index cd19ffbd180..4570e4353e9 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -7762,17 +7762,24 @@ void BlueStore::_close_db() db = nullptr; if (do_destage && fm && fm->is_null_manager()) { - // force all backgrounds discards to be committed before storing allocator - // set timeout to 500msec - int ret = bdev->discard_drain(500); - if (ret == 0) { - ret = store_allocator(alloc); - if (unlikely(ret != 0)) { - derr << __func__ << "::NCB::store_allocator() failed (we will need to rebuild it on startup)" << dendl; + if (cct->_conf->osd_fast_shutdown == false) { + // graceful shutdown -> commit backgrounds discards before storing allocator + bdev->discard_drain(); + } + + auto discard_queued = bdev->get_discard_queued(); + if (discard_queued && (discard_queued->num_intervals() > 0)) { + dout(10) << __func__ << "::discard_drain: size=" << discard_queued->size() + << " num_intervals=" << discard_queued->num_intervals() << dendl; + // copy discard_queued to the allocator before storing it + for (auto p = discard_queued->begin(); p != discard_queued->end(); ++p) { + dout(20) << __func__ << "::discarded-extent=[" << p.get_start() << ", " << p.get_len() << "]" << dendl; + alloc->init_add_free(p.get_start(), p.get_len()); } } - else { - derr << __func__ << "::NCB::discard_drain() exceeded timeout (abort!)" << dendl; + int ret = store_allocator(alloc); + if (unlikely(ret != 0)) { + derr << __func__ << "::NCB::store_allocator() failed (we will need to rebuild it on startup)" << dendl; } } -- 2.39.5