From: Igor Fedotov Date: Fri, 14 Jul 2023 14:25:45 +0000 (+0300) Subject: os/bluestore: make hybrid allocator implementation reusable X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=71ced83e8829294762ebd9d4ddd5f1b3718bc9ce;p=ceph.git os/bluestore: make hybrid allocator implementation reusable Refactor hybrid allocator in a way to permit alternative hybrid allocator implementations using the same code base. Signed-off-by: Igor Fedotov --- diff --git a/src/os/bluestore/Allocator.cc b/src/os/bluestore/Allocator.cc index 1277b5762145e..748506ae25181 100644 --- a/src/os/bluestore/Allocator.cc +++ b/src/os/bluestore/Allocator.cc @@ -13,6 +13,7 @@ #endif #include "common/debug.h" #include "common/admin_socket.h" + #define dout_subsys ceph_subsys_bluestore using TOPNSPC::common::cmd_getval; @@ -187,7 +188,7 @@ Allocator *Allocator::create( } else if (type == "btree") { return new BtreeAllocator(cct, size, block_size, name); } else if (type == "hybrid") { - return new HybridAllocator(cct, size, block_size, + return new HybridAvlAllocator(cct, size, block_size, cct->_conf.get_val("bluestore_hybrid_alloc_mem_cap"), name); #ifdef HAVE_LIBZBD diff --git a/src/os/bluestore/AvlAllocator.cc b/src/os/bluestore/AvlAllocator.cc index 564572bc7ff1a..34915cff7fd48 100644 --- a/src/os/bluestore/AvlAllocator.cc +++ b/src/os/bluestore/AvlAllocator.cc @@ -177,6 +177,11 @@ void AvlAllocator::_remove_from_tree(uint64_t start, uint64_t size) ceph_assert(size != 0); ceph_assert(size <= num_free); + //FIXME minor: techically this is wrong since find should return end() + // if exact matching offset isn't found. Which might be the case when we're + // trying to remove a subchunk from the middle of existing chunk. + // But it looks like avl containers tolerate this thing and return the chunk + // before the 'start' offset. auto rs = range_tree.find(range_t{start, end}, range_tree.key_comp()); /* Make sure we completely overlap with someone */ if (rs == range_tree.end() || @@ -360,6 +365,7 @@ AvlAllocator::AvlAllocator(CephContext* cct, uint64_t max_mem, std::string_view name) : Allocator(name, device_size, block_size), + cct(cct), range_size_alloc_threshold( cct->_conf.get_val("bluestore_avl_alloc_bf_threshold")), range_size_alloc_free_pct( @@ -368,8 +374,7 @@ AvlAllocator::AvlAllocator(CephContext* cct, cct->_conf.get_val("bluestore_avl_alloc_ff_max_search_count")), max_search_bytes( cct->_conf.get_val("bluestore_avl_alloc_ff_max_search_bytes")), - range_count_cap(max_mem / sizeof(range_seg_t)), - cct(cct) + range_count_cap(max_mem / sizeof(range_seg_t)) { ldout(cct, 10) << __func__ << " 0x" << std::hex << get_capacity() << "/" << get_block_size() << std::dec << dendl; diff --git a/src/os/bluestore/AvlAllocator.h b/src/os/bluestore/AvlAllocator.h index 4ac0f6fb0c8a5..91bac9b0619ca 100644 --- a/src/os/bluestore/AvlAllocator.h +++ b/src/os/bluestore/AvlAllocator.h @@ -229,6 +229,21 @@ private: // i.e. (range_count_cap > 0) ceph_assert(false); } + // to be overriden by Hybrid wrapper + virtual uint64_t _get_spilled_over() const { + return 0; + } + virtual uint64_t _spillover_allocate(uint64_t want, + uint64_t unit, + uint64_t max_alloc_size, + int64_t hint, + PExtentVector* extents) { + // this should be overriden when range count cap is present, + // i.e. (range_count_cap > 0) + ceph_assert(false); + return 0; + } + protected: // called when extent to be released/marked free virtual void _add_to_tree(uint64_t start, uint64_t size); diff --git a/src/os/bluestore/HybridAllocator.cc b/src/os/bluestore/HybridAllocator.cc index 373abd4eb5119..1cfab7e5ac3dc 100644 --- a/src/os/bluestore/HybridAllocator.cc +++ b/src/os/bluestore/HybridAllocator.cc @@ -3,208 +3,19 @@ #include "HybridAllocator.h" -#include -#include - -#include "common/config_proxy.h" -#include "common/debug.h" - -#define dout_context cct +#define dout_context (T::get_context()) #define dout_subsys ceph_subsys_bluestore #undef dout_prefix -#define dout_prefix *_dout << "HybridAllocator " - - -int64_t HybridAllocator::allocate( - uint64_t want, - uint64_t unit, - uint64_t max_alloc_size, - int64_t hint, - PExtentVector* extents) -{ - ldout(cct, 10) << __func__ << std::hex - << " 0x" << want - << "/" << unit - << "," << max_alloc_size - << "," << hint - << std::dec << dendl; - ceph_assert(std::has_single_bit(unit)); - ceph_assert(want % unit == 0); - - if (max_alloc_size == 0) { - max_alloc_size = want; - } - if (constexpr auto cap = std::numeric_limits::max(); - max_alloc_size >= cap) { - max_alloc_size = p2align(uint64_t(cap), (uint64_t)get_block_size()); - } - - int64_t res; - - typedef - std::function - alloc_fn; - alloc_fn priA = [&](uint64_t _want, - uint64_t _unit, - uint64_t _max_alloc_size, - int64_t _hint, - PExtentVector* _extents) { - return _allocate(_want, _unit, _max_alloc_size, _hint, _extents); - }; - alloc_fn secA = [&](uint64_t _want, - uint64_t _unit, - uint64_t _max_alloc_size, - int64_t _hint, - PExtentVector* _extents) { - return bmap_alloc ? - bmap_alloc->allocate(_want, _unit, _max_alloc_size, _hint, _extents) : - 0; - }; - - std::lock_guard l(lock); - // try bitmap first to avoid unneeded contiguous extents split if - // desired amount is less than shortes range in AVL - if (bmap_alloc && bmap_alloc->get_free() && - want < _lowest_size_available()) { - std::swap(priA, secA); - } - - { - auto orig_size = extents->size(); - res = priA(want, unit, max_alloc_size, hint, extents); - if (res < 0) { - // allocator shouldn't return new extents on error - ceph_assert(orig_size == extents->size()); - res = 0; - } - } - if ((uint64_t)res < want) { - auto orig_size = extents->size(); - auto res2 = secA(want - res, unit, max_alloc_size, hint, extents); - if (res2 > 0) { - res += res2; - } else { - ceph_assert(orig_size == extents->size()); - } - } - return res ? res : -ENOSPC; -} - -void HybridAllocator::release(const interval_set& release_set) { - std::lock_guard l(lock); - // this will attempt to put free ranges into AvlAllocator first and - // fallback to bitmap one via _try_insert_range call - _release(release_set); -} - -uint64_t HybridAllocator::get_free() -{ - std::lock_guard l(lock); - return (bmap_alloc ? bmap_alloc->get_free() : 0) + _get_free(); -} - -double HybridAllocator::get_fragmentation() -{ - std::lock_guard l(lock); - auto f = AvlAllocator::_get_fragmentation(); - auto bmap_free = bmap_alloc ? bmap_alloc->get_free() : 0; - if (bmap_free) { - auto _free = _get_free() + bmap_free; - auto bf = bmap_alloc->get_fragmentation(); - - f = f * _get_free() / _free + bf * bmap_free / _free; - } - return f; -} - -void HybridAllocator::dump() -{ - std::lock_guard l(lock); - AvlAllocator::_dump(); - if (bmap_alloc) { - bmap_alloc->dump(); - } - ldout(cct, 0) << __func__ - << " avl_free: " << _get_free() - << " bmap_free: " << (bmap_alloc ? bmap_alloc->get_free() : 0) - << dendl; -} - -void HybridAllocator::foreach( - std::function notify) -{ - std::lock_guard l(lock); - AvlAllocator::_foreach(notify); - if (bmap_alloc) { - bmap_alloc->foreach(notify); - } -} +#define dout_prefix *_dout << (std::string(this->get_type()) + "::").c_str() -void HybridAllocator::init_rm_free(uint64_t offset, uint64_t length) +/* + * class HybridAvlAllocator + * + * + */ +const char* HybridAvlAllocator::get_type() const { - if (!length) - return; - std::lock_guard l(lock); - ldout(cct, 10) << __func__ << std::hex - << " offset 0x" << offset - << " length 0x" << length - << std::dec << dendl; - _try_remove_from_tree(offset, length, - [&](uint64_t o, uint64_t l, bool found) { - if (!found) { - if (bmap_alloc) { - bmap_alloc->init_rm_free(o, l); - } else { - lderr(cct) << "init_rm_free lambda " << std::hex - << "Uexpected extent: " - << " 0x" << o << "~" << l - << std::dec << dendl; - ceph_assert(false); - } - } - }); + return "hybrid"; } -void HybridAllocator::shutdown() -{ - std::lock_guard l(lock); - _shutdown(); - if (bmap_alloc) { - bmap_alloc->shutdown(); - delete bmap_alloc; - bmap_alloc = nullptr; - } -} - -void HybridAllocator::_spillover_range(uint64_t start, uint64_t end) -{ - auto size = end - start; - dout(20) << __func__ - << std::hex << " " - << start << "~" << size - << std::dec - << dendl; - ceph_assert(size); - if (!bmap_alloc) { - dout(1) << __func__ - << " constructing fallback allocator" - << dendl; - bmap_alloc = new BitmapAllocator(cct, - get_capacity(), - get_block_size(), - get_name() + ".fallback"); - } - bmap_alloc->init_add_free(start, size); -} - -void HybridAllocator::_add_to_tree(uint64_t start, uint64_t size) -{ - if (bmap_alloc) { - uint64_t head = bmap_alloc->claim_free_to_left(start); - uint64_t tail = bmap_alloc->claim_free_to_right(start + size); - ceph_assert(head <= start); - start -= head; - size += head + tail; - } - AvlAllocator::_add_to_tree(start, size); -} +#include "HybridAllocator_impl.h" diff --git a/src/os/bluestore/HybridAllocator.h b/src/os/bluestore/HybridAllocator.h index a4cf1e2250c6e..bca6e7af9e5ed 100644 --- a/src/os/bluestore/HybridAllocator.h +++ b/src/os/bluestore/HybridAllocator.h @@ -8,17 +8,14 @@ #include "AvlAllocator.h" #include "BitmapAllocator.h" -class HybridAllocator : public AvlAllocator { +template +class HybridAllocatorBase : public PrimaryAllocator { BitmapAllocator* bmap_alloc = nullptr; public: - HybridAllocator(CephContext* cct, int64_t device_size, int64_t _block_size, - uint64_t max_mem, - std::string_view name) : - AvlAllocator(cct, device_size, _block_size, max_mem, name) { - } - const char* get_type() const override - { - return "hybrid"; + HybridAllocatorBase(CephContext* cct, int64_t device_size, int64_t _block_size, + uint64_t max_mem, + std::string_view name) : + PrimaryAllocator(cct, device_size, _block_size, max_mem, name) { } int64_t allocate( uint64_t want, @@ -26,15 +23,46 @@ public: uint64_t max_alloc_size, int64_t hint, PExtentVector *extents) override; - void release(const interval_set& release_set) override; - uint64_t get_free() override; - double get_fragmentation() override; + using PrimaryAllocator::release; + uint64_t get_free() override { + std::lock_guard l(PrimaryAllocator::get_lock()); + return (bmap_alloc ? bmap_alloc->get_free() : 0) + + PrimaryAllocator::_get_free(); + } + + double get_fragmentation() override { + std::lock_guard l(PrimaryAllocator::get_lock()); + auto f = PrimaryAllocator::_get_fragmentation(); + auto bmap_free = bmap_alloc ? bmap_alloc->get_free() : 0; + if (bmap_free) { + auto _free = PrimaryAllocator::_get_free() + bmap_free; + auto bf = bmap_alloc->get_fragmentation(); + + f = f * PrimaryAllocator::_get_free() / _free + bf * bmap_free / _free; + } + return f; + } void dump() override; + void foreach( - std::function notify) override; + std::function notify) override { + std::lock_guard l(PrimaryAllocator::get_lock()); + PrimaryAllocator::_foreach(notify); + if (bmap_alloc) { + bmap_alloc->foreach(notify); + } + } void init_rm_free(uint64_t offset, uint64_t length) override; - void shutdown() override; + void shutdown() override { + std::lock_guard l(PrimaryAllocator::get_lock()); + PrimaryAllocator::_shutdown(); + if (bmap_alloc) { + bmap_alloc->shutdown(); + delete bmap_alloc; + bmap_alloc = nullptr; + } + } protected: // intended primarily for UT @@ -47,7 +75,50 @@ protected: private: void _spillover_range(uint64_t start, uint64_t end) override; + uint64_t _spillover_allocate(uint64_t want, + uint64_t unit, + uint64_t max_alloc_size, + int64_t hint, + PExtentVector* extents) override; + + // Allocates up to 'want' bytes from primary or secondary allocator. + // Returns: + // 0 (and unmDodified extents) if error occurred or nothing + // has been allocated. 'extents' vector remains unmodified + // amount of allocated bytes (<= want) if something has been allocated, + // 'extents' vector gets new extents, existing ones are preserved. + uint64_t _allocate_or_rollback(bool primary, + uint64_t want, + uint64_t unit, + uint64_t max_alloc_size, + int64_t hint, + PExtentVector* extents); + + uint64_t _get_spilled_over() const override { + return bmap_alloc ? bmap_alloc->get_free() : 0; + } // called when extent to be released/marked free - void _add_to_tree(uint64_t start, uint64_t size) override; + void _add_to_tree(uint64_t start, uint64_t size) override { + if (bmap_alloc) { + uint64_t head = bmap_alloc->claim_free_to_left(start); + uint64_t tail = bmap_alloc->claim_free_to_right(start + size); + ceph_assert(head <= start); + start -= head; + size += head + tail; + } + PrimaryAllocator::_add_to_tree(start, size); + } +}; + +class HybridAvlAllocator : public HybridAllocatorBase { +public: + HybridAvlAllocator(CephContext* cct, int64_t device_size, int64_t _block_size, + uint64_t max_mem, + std::string_view name) : + HybridAllocatorBase(cct, + device_size, _block_size, max_mem, name) { + } + const char* get_type() const override; }; + diff --git a/src/os/bluestore/HybridAllocator_impl.h b/src/os/bluestore/HybridAllocator_impl.h new file mode 100644 index 0000000000000..4542319b07a67 --- /dev/null +++ b/src/os/bluestore/HybridAllocator_impl.h @@ -0,0 +1,184 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#pragma once + +#include "HybridAllocator.h" + +#define dout_context (T::get_context()) +#define dout_subsys ceph_subsys_bluestore +#undef dout_prefix +#define dout_prefix *_dout << (std::string(this->get_type()) + "::") + +template +int64_t HybridAllocatorBase::allocate( + uint64_t want, + uint64_t unit, + uint64_t max_alloc_size, + int64_t hint, + PExtentVector* extents) +{ + dout(10) << __func__ << std::hex + << " 0x" << want + << "/" << unit + << "," << max_alloc_size + << "," << hint + << std::dec << dendl; + ceph_assert(std::has_single_bit(unit)); + ceph_assert(want % unit == 0); + + if (max_alloc_size == 0) { + max_alloc_size = want; + } + if (constexpr auto cap = std::numeric_limits::max(); + max_alloc_size >= cap) { + max_alloc_size = p2align(uint64_t(cap), (uint64_t)T::get_block_size()); + } + + std::lock_guard l(T::get_lock()); + + // try bitmap first to avoid unneeded contiguous extents split if + // desired amount is less than shortes range in AVL + bool primary_first = !(bmap_alloc && bmap_alloc->get_free() && + want < T::_lowest_size_available()); + + int64_t res = _allocate_or_rollback(primary_first, + want, unit, max_alloc_size, hint, extents); + ceph_assert(res >= 0); + if ((uint64_t)res < want) { + auto orig_size = extents->size(); + int64_t res2 = 0; + // try alternate allocator + if (!primary_first) { + res2 = T::_allocate(want - res, unit, max_alloc_size, hint, extents); + } else if (bmap_alloc) { + res2 = + bmap_alloc->allocate(want - res, unit, max_alloc_size, hint, extents); + } + if (res2 >= 0) { + res += res2; + } else { + // allocator shouldn't return new extents on error + ceph_assert(orig_size == extents->size()); + } + } + return res ? res : -ENOSPC; +} + +template +void HybridAllocatorBase::dump() +{ + std::lock_guard l(T::get_lock()); + T::_dump(); + if (bmap_alloc) { + bmap_alloc->dump(); + } + dout(0) << __func__ + << " avl_free: " << T::_get_free() + << " bmap_free: " << (bmap_alloc ? bmap_alloc->get_free() : 0) + << dendl; +} + +template +void HybridAllocatorBase::init_rm_free(uint64_t offset, uint64_t length) +{ + if (!length) + return; + std::lock_guard l(T::get_lock()); + dout(10) << __func__ << std::hex + << " offset 0x" << offset + << " length 0x" << length + << std::dec << dendl; + T::_try_remove_from_tree(offset, length, + [&](uint64_t o, uint64_t l, bool found) { + if (!found) { + if (bmap_alloc) { + bmap_alloc->init_rm_free(o, l); + } else { + derr << __func__ << " lambda " << std::hex + << "Uexpected extent: " + << " 0x" << o << "~" << l + << std::dec << dendl; + ceph_assert(false); + } + } + }); +} + +template +void HybridAllocatorBase::_spillover_range(uint64_t start, uint64_t end) +{ + auto size = end - start; + dout(20) << __func__ + << std::hex << " " + << start << "~" << size + << std::dec + << dendl; + ceph_assert(size); + if (!bmap_alloc) { + dout(1) << __func__ + << " constructing fallback allocator" + << dendl; + bmap_alloc = new BitmapAllocator(T::get_context(), + T::get_capacity(), + T::get_block_size(), + T::get_name() + ".fallback"); + } + bmap_alloc->init_add_free(start, size); +} + +template +uint64_t HybridAllocatorBase::_spillover_allocate(uint64_t want, + uint64_t unit, + uint64_t max_alloc_size, + int64_t hint, + PExtentVector* extents) +{ + return _allocate_or_rollback(false, + want, + unit, + max_alloc_size, + hint, + extents); +} + +template +uint64_t HybridAllocatorBase::_allocate_or_rollback( + bool primary, + uint64_t want, + uint64_t unit, + uint64_t max_alloc_size, + int64_t hint, + PExtentVector* extents) +{ + int64_t res = 0; + ceph_assert(extents); + // preserve original 'extents' vector state + auto orig_size = extents->size(); + if (primary) { + res = PrimaryAllocator::_allocate(want, unit, max_alloc_size, hint, extents); + } else if (bmap_alloc) { + res = bmap_alloc->allocate(want, unit, max_alloc_size, hint, extents); + } + if (res < 0) { + // got a failure, release already allocated + PExtentVector local_extents; + PExtentVector* e = extents; + if (orig_size) { + local_extents.insert( + local_extents.end(), extents->begin() + orig_size, extents->end()); + e = &local_extents; + } + + if (e->size()) { + if(primary) { + PrimaryAllocator::_release(*e); + } else if (bmap_alloc) { + bmap_alloc->release(*e); + } + } + extents->resize(orig_size); + res = 0; + } + return (uint64_t)res; +} diff --git a/src/test/objectstore/hybrid_allocator_test.cc b/src/test/objectstore/hybrid_allocator_test.cc index c7ecfde021d31..f103b3644d68f 100755 --- a/src/test/objectstore/hybrid_allocator_test.cc +++ b/src/test/objectstore/hybrid_allocator_test.cc @@ -6,14 +6,14 @@ #include "os/bluestore/HybridAllocator.h" -class TestHybridAllocator : public HybridAllocator { +class TestHybridAllocator : public HybridAvlAllocator { public: TestHybridAllocator(CephContext* cct, int64_t device_size, int64_t _block_size, uint64_t max_entries, const std::string& name) : - HybridAllocator(cct, device_size, _block_size, + HybridAvlAllocator(cct, device_size, _block_size, max_entries, name) { }