From a7d7e3e65c96abcaa8a619a2470d25e78ab00911 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Wed, 12 Feb 2020 15:55:03 +0300 Subject: [PATCH] os/bluestore: intdoduce spillover mechanics to AvlAllocator. This to be utilized in AvlAllocator's descendant class(es) to be able to limit amount of entries allocator tracks. Hence bringing a cap for RAM used by the allocator. Signed-off-by: Igor Fedotov (cherry picked from commit efedcc077c7ed4e1e5ea26ae989fa3d6cbd287d4) --- src/os/bluestore/AvlAllocator.cc | 68 +++++++++++++++----------- src/os/bluestore/AvlAllocator.h | 83 +++++++++++++++++++++++++++++++- 2 files changed, 121 insertions(+), 30 deletions(-) diff --git a/src/os/bluestore/AvlAllocator.cc b/src/os/bluestore/AvlAllocator.cc index c2108210327f1..69a6b24d1e11e 100755 --- a/src/os/bluestore/AvlAllocator.cc +++ b/src/os/bluestore/AvlAllocator.cc @@ -55,15 +55,6 @@ uint64_t AvlAllocator::_block_picker(const Tree& t, return _block_picker(t, cursor, size, align); } -namespace { - struct dispose_rs { - void operator()(range_seg_t* p) - { - delete p; - } - }; -} - void AvlAllocator::_add_to_tree(uint64_t start, uint64_t size) { assert(size != 0); @@ -83,25 +74,22 @@ void AvlAllocator::_add_to_tree(uint64_t start, uint64_t size) bool merge_after = (rs_after != range_tree.end() && rs_after->start == end); if (merge_before && merge_after) { - range_size_tree.erase(*rs_before); - range_size_tree.erase(*rs_after); + _range_size_tree_rm(*rs_before); + _range_size_tree_rm(*rs_after); rs_after->start = rs_before->start; range_tree.erase_and_dispose(rs_before, dispose_rs{}); - range_size_tree.insert(*rs_after); + _range_size_tree_try_insert(*rs_after); } else if (merge_before) { - range_size_tree.erase(*rs_before); + _range_size_tree_rm(*rs_before); rs_before->end = end; - range_size_tree.insert(*rs_before); + _range_size_tree_try_insert(*rs_before); } else if (merge_after) { - range_size_tree.erase(*rs_after); + _range_size_tree_rm(*rs_after); rs_after->start = start; - range_size_tree.insert(*rs_after); + _range_size_tree_try_insert(*rs_after); } else { - auto new_rs = new range_seg_t{start, end}; - range_tree.insert_before(rs_after, *new_rs); - range_size_tree.insert(*new_rs); + _try_insert_range(start, end, &rs_after); } - num_free += size; } void AvlAllocator::_remove_from_tree(uint64_t start, uint64_t size) @@ -120,25 +108,31 @@ void AvlAllocator::_remove_from_tree(uint64_t start, uint64_t size) bool left_over = (rs->start != start); bool right_over = (rs->end != end); - range_size_tree.erase(*rs); + _range_size_tree_rm(*rs); if (left_over && right_over) { - auto new_seg = new range_seg_t{end, rs->end}; + auto old_right_end = rs->end; + auto insert_pos = rs; + ceph_assert(insert_pos != range_tree.end()); + ++insert_pos; rs->end = start; - range_tree.insert(rs, *new_seg); - range_size_tree.insert(*new_seg); - range_size_tree.insert(*rs); + + // Insert tail first to be sure insert_pos hasn't been disposed. + // This woulnd't dispose rs though since it's out of range_size_tree. + // Don't care about a small chance of 'not-the-best-choice-for-removal' case + // which might happen if rs has the lowest size. + _try_insert_range(end, old_right_end, &insert_pos); + _range_size_tree_try_insert(*rs); + } else if (left_over) { rs->end = start; - range_size_tree.insert(*rs); + _range_size_tree_try_insert(*rs); } else if (right_over) { rs->start = end; - range_size_tree.insert(*rs); + _range_size_tree_try_insert(*rs); } else { range_tree.erase_and_dispose(rs, dispose_rs{}); } - assert(num_free >= size); - num_free -= size; } int AvlAllocator::_allocate( @@ -198,6 +192,22 @@ int AvlAllocator::_allocate( return 0; } +AvlAllocator::AvlAllocator(CephContext* cct, + int64_t device_size, + int64_t block_size, + uint64_t max_entries, + const std::string& name) : + Allocator(name), + num_total(device_size), + block_size(block_size), + range_size_alloc_threshold( + cct->_conf.get_val("bluestore_avl_alloc_bf_threshold")), + range_size_alloc_free_pct( + cct->_conf.get_val("bluestore_avl_alloc_bf_free_pct")), + range_count_cap(max_entries), + cct(cct) +{} + AvlAllocator::AvlAllocator(CephContext* cct, int64_t device_size, int64_t block_size, diff --git a/src/os/bluestore/AvlAllocator.h b/src/os/bluestore/AvlAllocator.h index cb6ebbce58478..a1c4dff4e5a0d 100755 --- a/src/os/bluestore/AvlAllocator.h +++ b/src/os/bluestore/AvlAllocator.h @@ -43,10 +43,30 @@ struct range_seg_t { } } }; + inline uint64_t length() const { + return end - start; + } boost::intrusive::avl_set_member_hook<> size_hook; }; class AvlAllocator final : public Allocator { + struct dispose_rs { + void operator()(range_seg_t* p) + { + delete p; + } + }; + +protected: + /* + * ctor intended for the usage from descendant class(es) which + * provides handlig for spilled over entries + * (when entry count >= max_entries) + */ + AvlAllocator(CephContext* cct, int64_t device_size, int64_t block_size, + uint64_t max_entries, + const std::string& name); + public: AvlAllocator(CephContext* cct, int64_t device_size, int64_t block_size, const std::string& name); @@ -101,7 +121,8 @@ private: boost::intrusive::member_hook< range_seg_t, boost::intrusive::avl_set_member_hook<>, - &range_seg_t::size_hook>>; + &range_seg_t::size_hook>, + boost::intrusive::constant_time_size>; range_size_tree_t range_size_tree; const int64_t num_total; ///< device size @@ -132,6 +153,66 @@ private: */ int range_size_alloc_free_pct = 0; + /* + * Max amount of range entries allowed. 0 - unlimited + */ + uint64_t range_count_cap = 0; + CephContext* cct; std::mutex lock; + + uint64_t _lowest_size_available() { + auto rs = range_size_tree.begin(); + return rs != range_size_tree.end() ? rs->length() : 0; + } + void _range_size_tree_rm(range_seg_t& r) { + ceph_assert(num_free >= r.length()); + num_free -= r.length(); + range_size_tree.erase(r); + + } + void _range_size_tree_try_insert(range_seg_t& r) { + if (_try_insert_range(r.start, r.end)) { + range_size_tree.insert(r); + num_free += r.length(); + } else { + range_tree.erase_and_dispose(r, dispose_rs{}); + } + } + bool _try_insert_range(uint64_t start, + uint64_t end, + range_tree_t::iterator* insert_pos = nullptr) { + bool res = !range_count_cap || range_size_tree.size() < range_count_cap; + bool remove_lowest = false; + if (!res) { + if (end - start > _lowest_size_available()) { + remove_lowest = true; + res = true; + } + } + if (!res) { + _spillover_range(start, end); + } else { + // NB: we should do insertion before the following removal + // to avoid potential iterator disposal insertion might depend on. + if (insert_pos) { + auto new_rs = new range_seg_t{ start, end }; + range_tree.insert_before(*insert_pos, *new_rs); + range_size_tree.insert(*new_rs); + num_free += new_rs->length(); + } + if (remove_lowest) { + auto r = range_size_tree.begin(); + _range_size_tree_rm(*r); + _spillover_range(r->start, r->end); + range_tree.erase_and_dispose(*r, dispose_rs{}); + } + } + return res; + } + virtual void _spillover_range(uint64_t start, uint64_t end) { + // this should be overriden when range count cap is present, + // i.e. (range_count_cap > 0) + ceph_assert(false); + } }; -- 2.39.5