From: Gabriel BenHanokh Date: Mon, 8 Apr 2024 11:15:47 +0000 (+0000) Subject: On graceful shutdown we will wait for discard queue to drain before storing the alloc... X-Git-Tag: testing/wip-vshankar-testing-20241106.074359-squid-debug~19^2~6 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=d65eebc478d0a9bd366f3b5cc38fb4c84aa00dc8;p=ceph-ci.git On graceful shutdown we will wait for discard queue to drain before storing the allocator. ON fast shutdown we will simply copy the discard queue entries to the allocator Signed-off-by: Gabriel BenHanokh (cherry picked from commit 4762ffaaa19ca2afe8864fa1a6fb67ceea3aa640) --- diff --git a/src/blk/BlockDevice.h b/src/blk/BlockDevice.h index 7431b062dce..e46948ced34 100644 --- a/src/blk/BlockDevice.h +++ b/src/blk/BlockDevice.h @@ -285,8 +285,8 @@ public: int write_hint = WRITE_LIFE_NOT_SET) = 0; virtual int flush() = 0; virtual bool try_discard(interval_set &to_release, bool async=true) { return false; } - virtual int discard_drain(uint32_t timeout_msec = 0) { return 0; } - + virtual void discard_drain() { return; } + virtual const interval_set* get_discard_queued() { return nullptr;} // for managing buffered readers/writers virtual int invalidate_cache(uint64_t off, uint64_t len) = 0; virtual int open(const std::string& path) = 0; diff --git a/src/blk/kernel/KernelDevice.cc b/src/blk/kernel/KernelDevice.cc index 716004e6c89..6337292f5de 100644 --- a/src/blk/kernel/KernelDevice.cc +++ b/src/blk/kernel/KernelDevice.cc @@ -587,35 +587,13 @@ bool KernelDevice::_discard_started() return !discard_threads.empty(); } -int KernelDevice::discard_drain(uint32_t timeout_msec = 0) +void KernelDevice::discard_drain() { dout(10) << __func__ << dendl; - bool check_timeout = false; - utime_t end_time; - if (timeout_msec) { - check_timeout = true; - uint32_t timeout_sec = 0; - if (timeout_msec >= 1000) { - timeout_sec = (timeout_msec / 1000); - timeout_msec = (timeout_msec % 1000); - } - end_time = ceph_clock_now(); - // add the timeout after converting from msec to nsec - end_time.tv.tv_nsec += (timeout_msec * (1000*1000)); - if (end_time.tv.tv_nsec > (1000*1000*1000)) { - end_time.tv.tv_nsec -= (1000*1000*1000); - end_time.tv.tv_sec += 1; - } - end_time.tv.tv_sec += timeout_sec; - } std::unique_lock l(discard_lock); while (!discard_queued.empty() || discard_running) { discard_cond.wait(l); - if (check_timeout && ceph_clock_now() > end_time) { - return -1; - } } - return 0; } static bool is_expected_ioerr(const int r) diff --git a/src/blk/kernel/KernelDevice.h b/src/blk/kernel/KernelDevice.h index b7b0a7dc38b..2b3f3943d77 100644 --- a/src/blk/kernel/KernelDevice.h +++ b/src/blk/kernel/KernelDevice.h @@ -123,8 +123,8 @@ public: ~KernelDevice(); void aio_submit(IOContext *ioc) override; - int discard_drain(uint32_t timeout_msec) override; - + void discard_drain() override; + const interval_set* get_discard_queued() override { return &discard_queued;} int collect_metadata(const std::string& prefix, std::map *pm) const override; int get_devname(std::string *s) const override { if (devname.empty()) { diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 108afc4038d..9d853120b7d 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -7754,17 +7754,24 @@ void BlueStore::_close_db() db = nullptr; if (do_destage && fm && fm->is_null_manager()) { - // force all backgrounds discards to be committed before storing allocator - // set timeout to 500msec - int ret = bdev->discard_drain(500); - if (ret == 0) { - ret = store_allocator(alloc); - if (unlikely(ret != 0)) { - derr << __func__ << "::NCB::store_allocator() failed (we will need to rebuild it on startup)" << dendl; + if (cct->_conf->osd_fast_shutdown == false) { + // graceful shutdown -> commit backgrounds discards before storing allocator + bdev->discard_drain(); + } + + auto discard_queued = bdev->get_discard_queued(); + if (discard_queued && (discard_queued->num_intervals() > 0)) { + dout(10) << __func__ << "::discard_drain: size=" << discard_queued->size() + << " num_intervals=" << discard_queued->num_intervals() << dendl; + // copy discard_queued to the allocator before storing it + for (auto p = discard_queued->begin(); p != discard_queued->end(); ++p) { + dout(20) << __func__ << "::discarded-extent=[" << p.get_start() << ", " << p.get_len() << "]" << dendl; + alloc->init_add_free(p.get_start(), p.get_len()); } } - else { - derr << __func__ << "::NCB::discard_drain() exceeded timeout (abort!)" << dendl; + int ret = store_allocator(alloc); + if (unlikely(ret != 0)) { + derr << __func__ << "::NCB::store_allocator() failed (we will need to rebuild it on startup)" << dendl; } }