From: Abutalib Aghayev Date: Wed, 4 Nov 2020 20:35:17 +0000 (-0500) Subject: os/bluestore: Introduce plumbing necessary for garbage collecting zones. X-Git-Tag: v16.1.0~385^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3895e3720b0165356c9cc9f2bcee855010d49eba;p=ceph.git os/bluestore: Introduce plumbing necessary for garbage collecting zones. Signed-off-by: Abutalib Aghayev --- diff --git a/src/os/bluestore/Allocator.h b/src/os/bluestore/Allocator.h index 2104c2cc1139..b3b76c53faf3 100644 --- a/src/os/bluestore/Allocator.h +++ b/src/os/bluestore/Allocator.h @@ -49,7 +49,11 @@ public: virtual void dump() = 0; virtual void dump(std::function notify) = 0; - virtual void set_zone_states(std::vector &&_zone_states) {} + virtual void zoned_set_zone_states(std::vector &&_zone_states) {} + virtual bool zoned_get_zones_to_clean(std::deque *zones_to_clean) { + return false; + } + virtual void init_add_free(uint64_t offset, uint64_t length) = 0; virtual void init_rm_free(uint64_t offset, uint64_t length) = 0; diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 21f0325eeb48..5e60fa85cd1b 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -4373,6 +4373,7 @@ BlueStore::BlueStore(CephContext *cct, finisher(cct, "commit_finisher", "cfin"), kv_sync_thread(this), kv_finalize_thread(this), + zoned_cleaner_thread(this), min_alloc_size(_min_alloc_size), min_alloc_size_order(ctz(_min_alloc_size)), mempool_thread(this) @@ -5301,7 +5302,7 @@ int BlueStore::_init_alloc() ceph_assert(shared_alloc.a != NULL); if (bdev->is_smr()) { - shared_alloc.a->set_zone_states(fm->get_zone_states(db)); + shared_alloc.a->zoned_set_zone_states(fm->get_zone_states(db)); } uint64_t num = 0, bytes = 0; @@ -6840,6 +6841,10 @@ int BlueStore::_mount() _kv_start(); + if (bdev->is_smr()) { + _zoned_cleaner_start(); + } + r = _deferred_replay(); if (r < 0) goto out_stop; @@ -6869,6 +6874,9 @@ int BlueStore::_mount() return 0; out_stop: + if (bdev->is_smr()) { + _zoned_cleaner_stop(); + } _kv_stop(); out_coll: _shutdown_cache(); @@ -6887,6 +6895,10 @@ int BlueStore::umount() mounted = false; if (!_kv_only) { mempool_thread.shutdown(); + if (bdev->is_smr()) { + dout(20) << __func__ << " stopping zone cleaner thread" << dendl; + _zoned_cleaner_stop(); + } dout(20) << __func__ << " stopping kv thread" << dendl; _kv_stop(); _shutdown_cache(); @@ -11965,6 +11977,63 @@ void BlueStore::_kv_finalize_thread() kv_finalize_started = false; } +void BlueStore::_zoned_cleaner_start() { + dout(10) << __func__ << dendl; + + zoned_cleaner_thread.create("bstore_zcleaner"); +} + +void BlueStore::_zoned_cleaner_stop() { + dout(10) << __func__ << dendl; + { + std::unique_lock l{zoned_cleaner_lock}; + while (!zoned_cleaner_started) { + zoned_cleaner_cond.wait(l); + } + zoned_cleaner_stop = true; + zoned_cleaner_cond.notify_all(); + } + zoned_cleaner_thread.join(); + { + std::lock_guard l{zoned_cleaner_lock}; + zoned_cleaner_stop = false; + } + dout(10) << __func__ << " done" << dendl; +} + +void BlueStore::_zoned_cleaner_thread() { + dout(10) << __func__ << " start" << dendl; + std::unique_lock l{zoned_cleaner_lock}; + ceph_assert(!zoned_cleaner_started); + zoned_cleaner_started = true; + zoned_cleaner_cond.notify_all(); + std::deque zones_to_clean; + while (true) { + if (zoned_cleaner_queue.empty()) { + if (zoned_cleaner_stop) { + break; + } + dout(20) << __func__ << " sleep" << dendl; + zoned_cleaner_cond.wait(l); + dout(20) << __func__ << " wake" << dendl; + } else { + zones_to_clean.swap(zoned_cleaner_queue); + l.unlock(); + while (!zones_to_clean.empty()) { + _zoned_clean_zone(zones_to_clean.front()); + zones_to_clean.pop_front(); + } + l.lock(); + } + } + dout(10) << __func__ << " finish" << dendl; + zoned_cleaner_started = false; +} + +void BlueStore::_zoned_clean_zone(uint64_t zone_num) { + dout(10) << __func__ << " cleaning zone " << zone_num << dendl; +} + bluestore_deferred_op_t *BlueStore::_get_deferred_op( TransContext *txc) { @@ -13623,6 +13692,15 @@ int BlueStore::_do_alloc_write( } _collect_allocation_stats(need, min_alloc_size, prealloc.size()); + if (bdev->is_smr()) { + std::deque zones_to_clean; + if (shared_alloc.a->zoned_get_zones_to_clean(&zones_to_clean)) { + std::lock_guard l{zoned_cleaner_lock}; + zoned_cleaner_queue.swap(zones_to_clean); + zoned_cleaner_cond.notify_one(); + } + } + dout(20) << __func__ << " prealloc " << prealloc << dendl; auto prealloc_pos = prealloc.begin(); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 4d8a669e0e98..b10b68373241 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -1989,11 +1989,19 @@ public: struct KVFinalizeThread : public Thread { BlueStore *store; explicit KVFinalizeThread(BlueStore *s) : store(s) {} - void *entry() { + void *entry() override { store->_kv_finalize_thread(); return NULL; } }; + struct ZonedCleanerThread : public Thread { + BlueStore *store; + explicit ZonedCleanerThread(BlueStore *s) : store(s) {} + void *entry() override { + store->_zoned_cleaner_thread(); + return nullptr; + } + }; struct DBHistogram { struct value_dist { @@ -2108,6 +2116,13 @@ private: std::deque deferred_stable_to_finalize; ///< pending finalization bool kv_finalize_in_progress = false; + ZonedCleanerThread zoned_cleaner_thread; + ceph::mutex zoned_cleaner_lock = ceph::make_mutex("BlueStore::zoned_cleaner_lock"); + ceph::condition_variable zoned_cleaner_cond; + bool zoned_cleaner_started = false; + bool zoned_cleaner_stop = false; + std::deque zoned_cleaner_queue; + PerfCounters *logger = nullptr; std::list removed_collections; @@ -2465,6 +2480,11 @@ private: void _kv_sync_thread(); void _kv_finalize_thread(); + void _zoned_cleaner_start(); + void _zoned_cleaner_stop(); + void _zoned_cleaner_thread(); + void _zoned_clean_zone(uint64_t zone_num); + bluestore_deferred_op_t *_get_deferred_op(TransContext *txc); void _deferred_queue(TransContext *txc); public: diff --git a/src/os/bluestore/ZonedAllocator.cc b/src/os/bluestore/ZonedAllocator.cc index 3a80593f40c7..bac6f016997c 100644 --- a/src/os/bluestore/ZonedAllocator.cc +++ b/src/os/bluestore/ZonedAllocator.cc @@ -101,7 +101,6 @@ void ZonedAllocator::release(const interval_set& release_set) { } uint64_t ZonedAllocator::get_free() { - std::lock_guard l(lock); return num_free; } @@ -117,8 +116,7 @@ void ZonedAllocator::dump(std::function &&_zone_states) { +bool ZonedAllocator::zoned_get_zones_to_clean(std::deque *zones_to_clean) { + // TODO: make 0.25 tunable + if (static_cast(num_free) / size > 0.25) { + return false; + } + { + std::lock_guard l(lock); + // TODO: populate |zones_to_clean| with the numbers of zones that should be + // cleaned. + } + return true; +} + +void ZonedAllocator::zoned_set_zone_states(std::vector &&_zone_states) { std::lock_guard l(lock); ldout(cct, 10) << __func__ << dendl; zone_states = std::move(_zone_states); diff --git a/src/os/bluestore/ZonedAllocator.h b/src/os/bluestore/ZonedAllocator.h index 22b40221f7b8..4b03fe5e822b 100644 --- a/src/os/bluestore/ZonedAllocator.h +++ b/src/os/bluestore/ZonedAllocator.h @@ -30,7 +30,7 @@ class ZonedAllocator : public Allocator { // atomic_alloc_and_submit_lock will be removed. ceph::mutex lock = ceph::make_mutex("ZonedAllocator::lock"); - int64_t num_free; ///< total bytes in freelist + std::atomic num_free; ///< total bytes in freelist uint64_t size; uint64_t block_size; uint64_t zone_size; @@ -75,7 +75,9 @@ public: void dump(std::function notify) override; - void set_zone_states(std::vector &&_zone_states) override; + void zoned_set_zone_states(std::vector &&_zone_states) override; + bool zoned_get_zones_to_clean(std::deque *zones_to_clean) override; + void init_add_free(uint64_t offset, uint64_t length) override; void init_rm_free(uint64_t offset, uint64_t length) override;