From c27490e3d641f2b8ea65759fd79640e5f6dd398c Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Wed, 25 Mar 2020 17:19:12 +0300 Subject: [PATCH] os/bluestore: improve adjustent extents merge in hybrid allocatoc Bitmap counterpart is inspected for adjustent free extents when releasing extent and trying to add it to AVL tree. Signed-off-by: Igor Fedotov --- src/os/bluestore/AvlAllocator.h | 5 +- src/os/bluestore/BitmapAllocator.h | 1 - src/os/bluestore/HybridAllocator.cc | 15 +- src/os/bluestore/HybridAllocator.h | 3 + src/os/bluestore/fastbmap_allocator_impl.cc | 83 ++++++++++++ src/os/bluestore/fastbmap_allocator_impl.h | 55 ++++++++ .../objectstore/fastbmap_allocator_test.cc | 128 ++++++++++++++++++ src/test/objectstore/hybrid_allocator_test.cc | 38 +++--- 8 files changed, 306 insertions(+), 22 deletions(-) diff --git a/src/os/bluestore/AvlAllocator.h b/src/os/bluestore/AvlAllocator.h index a68d4c190ac02..571784a6c155b 100644 --- a/src/os/bluestore/AvlAllocator.h +++ b/src/os/bluestore/AvlAllocator.h @@ -212,6 +212,10 @@ private: // i.e. (range_count_cap > 0) ceph_assert(false); } +protected: + // called when extent to be released/marked free + virtual void _add_to_tree(uint64_t start, uint64_t size); + protected: CephContext* cct; std::mutex lock; @@ -241,7 +245,6 @@ protected: void _release(const PExtentVector& release_set); void _shutdown(); - void _add_to_tree(uint64_t start, uint64_t size); void _process_range_removal(uint64_t start, uint64_t end, range_tree_t::iterator& rs); void _remove_from_tree(uint64_t start, uint64_t size); void _try_remove_from_tree(uint64_t start, uint64_t size, diff --git a/src/os/bluestore/BitmapAllocator.h b/src/os/bluestore/BitmapAllocator.h index cbf8c007591a5..51ebaa4208c84 100644 --- a/src/os/bluestore/BitmapAllocator.h +++ b/src/os/bluestore/BitmapAllocator.h @@ -22,7 +22,6 @@ public: { } - int64_t allocate( uint64_t want_size, uint64_t alloc_unit, uint64_t max_alloc_size, int64_t hint, PExtentVector *extents) override; diff --git a/src/os/bluestore/HybridAllocator.cc b/src/os/bluestore/HybridAllocator.cc index f3fdca158af53..484a664bf0d44 100644 --- a/src/os/bluestore/HybridAllocator.cc +++ b/src/os/bluestore/HybridAllocator.cc @@ -187,7 +187,8 @@ void HybridAllocator::shutdown() } } -void HybridAllocator::_spillover_range(uint64_t start, uint64_t end) { +void HybridAllocator::_spillover_range(uint64_t start, uint64_t end) +{ auto size = end - start; dout(20) << __func__ << std::hex << " " @@ -207,3 +208,15 @@ void HybridAllocator::_spillover_range(uint64_t start, uint64_t end) { } bmap_alloc->init_add_free(start, size); } + +void HybridAllocator::_add_to_tree(uint64_t start, uint64_t size) +{ + if (bmap_alloc) { + uint64_t head = bmap_alloc->claim_free_to_left(start); + uint64_t tail = bmap_alloc->claim_free_to_right(start + size); + ceph_assert(head <= start); + start -= head; + size += head + tail; + } + AvlAllocator::_add_to_tree(start, size); +} diff --git a/src/os/bluestore/HybridAllocator.h b/src/os/bluestore/HybridAllocator.h index 2b9af918f7c1e..e8246cf4dfcb2 100644 --- a/src/os/bluestore/HybridAllocator.h +++ b/src/os/bluestore/HybridAllocator.h @@ -42,4 +42,7 @@ protected: private: void _spillover_range(uint64_t start, uint64_t end) override; + + // called when extent to be released/marked free + void _add_to_tree(uint64_t start, uint64_t size) override; }; diff --git a/src/os/bluestore/fastbmap_allocator_impl.cc b/src/os/bluestore/fastbmap_allocator_impl.cc index 441a185d5c721..bb8bdf0499a4c 100644 --- a/src/os/bluestore/fastbmap_allocator_impl.cc +++ b/src/os/bluestore/fastbmap_allocator_impl.cc @@ -629,3 +629,86 @@ void AllocatorLevel01Loose::dump( notify(off, len); } +uint64_t AllocatorLevel01Loose::_claim_free_to_left_l0(int64_t l0_pos_start) +{ + int64_t d0 = L0_ENTRIES_PER_SLOT; + + int64_t pos = l0_pos_start - 1; + slot_t bits = (slot_t)1 << (pos % d0); + int64_t idx = pos / d0; + slot_t* val_s = l0.data() + idx; + + int64_t pos_e = p2align(pos, d0); + + while (pos >= pos_e) { + if (0 == ((*val_s) & bits)) + return pos + 1; + (*val_s) &= ~bits; + bits >>= 1; + --pos; + } + --idx; + val_s = l0.data() + idx; + while (idx >= 0 && (*val_s) == all_slot_set) { + *val_s = all_slot_clear; + --idx; + pos -= d0; + val_s = l0.data() + idx; + } + + if (idx >= 0 && + (*val_s) != all_slot_set && (*val_s) != all_slot_clear) { + int64_t pos_e = p2align(pos, d0); + slot_t bits = (slot_t)1 << (pos % d0); + while (pos >= pos_e) { + if (0 == ((*val_s) & bits)) + return pos + 1; + (*val_s) &= ~bits; + bits >>= 1; + --pos; + } + } + return pos + 1; +} + +uint64_t AllocatorLevel01Loose::_claim_free_to_right_l0(int64_t l0_pos_start) +{ + auto d0 = L0_ENTRIES_PER_SLOT; + + int64_t pos = l0_pos_start; + slot_t bits = (slot_t)1 << (pos % d0); + size_t idx = pos / d0; + slot_t* val_s = l0.data() + idx; + + int64_t pos_e = p2roundup(pos + 1, d0); + + while (pos < pos_e) { + if (0 == ((*val_s) & bits)) + return pos; + (*val_s) &= ~bits; + bits <<= 1; + ++pos; + } + ++idx; + val_s = l0.data() + idx; + while (idx < l0.size() && (*val_s) == all_slot_set) { + *val_s = all_slot_clear; + ++idx; + pos += d0; + val_s = l0.data() + idx; + } + + if (idx < l0.size() && + (*val_s) != all_slot_set && (*val_s) != all_slot_clear) { + int64_t pos_e = p2roundup(pos + 1, d0); + slot_t bits = (slot_t)1 << (pos % d0); + while (pos < pos_e) { + if (0 == ((*val_s) & bits)) + return pos; + (*val_s) &= ~bits; + bits <<= 1; + ++pos; + } + } + return pos; +} diff --git a/src/os/bluestore/fastbmap_allocator_impl.h b/src/os/bluestore/fastbmap_allocator_impl.h index 7dcbec30ec54f..ed2d4c8cc98ce 100644 --- a/src/os/bluestore/fastbmap_allocator_impl.h +++ b/src/os/bluestore/fastbmap_allocator_impl.h @@ -322,6 +322,9 @@ protected: void _mark_l1_on_l0(int64_t l0_pos, int64_t l0_pos_end); void _mark_alloc_l0(int64_t l0_pos_start, int64_t l0_pos_end); + uint64_t _claim_free_to_left_l0(int64_t l0_pos_start); + uint64_t _claim_free_to_right_l0(int64_t l0_pos_start); + void _mark_alloc_l1_l0(int64_t l0_pos_start, int64_t l0_pos_end) { @@ -424,6 +427,34 @@ protected: return l0_granularity * (l0_pos_end - l0_pos_start); } + uint64_t claim_free_to_left_l1(uint64_t offs) + { + uint64_t l0_pos_end = offs / l0_granularity; + uint64_t l0_pos_start = _claim_free_to_left_l0(l0_pos_end); + if (l0_pos_start < l0_pos_end) { + _mark_l1_on_l0( + p2align(l0_pos_start, uint64_t(bits_per_slotset)), + p2roundup(l0_pos_end, uint64_t(bits_per_slotset))); + return l0_granularity * (l0_pos_end - l0_pos_start); + } + return 0; + } + + uint64_t claim_free_to_right_l1(uint64_t offs) + { + uint64_t l0_pos_start = offs / l0_granularity; + uint64_t l0_pos_end = _claim_free_to_right_l0(l0_pos_start); + + if (l0_pos_start < l0_pos_end) { + _mark_l1_on_l0( + p2align(l0_pos_start, uint64_t(bits_per_slotset)), + p2roundup(l0_pos_end, uint64_t(bits_per_slotset))); + return l0_granularity * (l0_pos_end - l0_pos_start); + } + return 0; + } + + public: uint64_t debug_get_allocated(uint64_t pos0 = 0, uint64_t pos1 = 0) { @@ -522,7 +553,31 @@ public: std::lock_guard l(lock); l1.collect_stats(bins_overall); } + uint64_t claim_free_to_left(uint64_t offset) { + std::lock_guard l(lock); + auto allocated = l1.claim_free_to_left_l1(offset); + ceph_assert(available >= allocated); + available -= allocated; + + uint64_t l2_pos = (offset - allocated) / l2_granularity; + uint64_t l2_pos_end = + p2roundup(int64_t(offset), int64_t(l2_granularity)) / l2_granularity; + _mark_l2_on_l1(l2_pos, l2_pos_end); + return allocated; + } + uint64_t claim_free_to_right(uint64_t offset) { + std::lock_guard l(lock); + auto allocated = l1.claim_free_to_right_l1(offset); + ceph_assert(available >= allocated); + available -= allocated; + + uint64_t l2_pos = (offset) / l2_granularity; + int64_t end = offset + allocated; + uint64_t l2_pos_end = p2roundup(end, int64_t(l2_granularity)) / l2_granularity; + _mark_l2_on_l1(l2_pos, l2_pos_end); + return allocated; + } protected: ceph::mutex lock = ceph::make_mutex("AllocatorLevel02::lock"); L1 l1; diff --git a/src/test/objectstore/fastbmap_allocator_test.cc b/src/test/objectstore/fastbmap_allocator_test.cc index 14752422b29b3..c3af73706569c 100644 --- a/src/test/objectstore/fastbmap_allocator_test.cc +++ b/src/test/objectstore/fastbmap_allocator_test.cc @@ -44,6 +44,14 @@ public: { _free_l2(r); } + void mark_free(uint64_t o, uint64_t len) + { + _mark_free(o, len); + } + void mark_allocated(uint64_t o, uint64_t len) + { + _mark_allocated(o, len); + } }; const uint64_t _1m = 1024 * 1024; @@ -931,3 +939,123 @@ TEST(TestAllocatorLevel01, test_4G_alloc_bug3) ASSERT_EQ(a4[1].length, 2048ull * _1m); } } + +TEST(TestAllocatorLevel01, test_claim_free_l2) +{ + TestAllocatorLevel02 al2; + uint64_t num_l2_entries = 64;// *512; + uint64_t capacity = num_l2_entries * 256 * 512 * 4096; + al2.init(capacity, 0x1000); + std::cout << "Init L2" << std::endl; + + uint64_t max_available = 0x20000; + al2.mark_allocated(max_available, capacity - max_available); + + uint64_t allocated1 = 0; + interval_vector_t a1; + al2.allocate_l2(0x2000, 0x2000, &allocated1, &a1); + ASSERT_EQ(allocated1, 0x2000u); + ASSERT_EQ(a1[0].offset, 0u); + ASSERT_EQ(a1[0].length, 0x2000u); + + uint64_t allocated2 = 0; + interval_vector_t a2; + al2.allocate_l2(0x2000, 0x2000, &allocated2, &a2); + ASSERT_EQ(allocated2, 0x2000u); + ASSERT_EQ(a2[0].offset, 0x2000u); + ASSERT_EQ(a2[0].length, 0x2000u); + + uint64_t allocated3 = 0; + interval_vector_t a3; + al2.allocate_l2(0x3000, 0x3000, &allocated3, &a3); + ASSERT_EQ(allocated3, 0x3000u); + ASSERT_EQ(a3[0].offset, 0x4000u); + ASSERT_EQ(a3[0].length, 0x3000u); + + al2.free_l2(a1); + al2.free_l2(a3); + ASSERT_EQ(max_available - 0x2000, al2.debug_get_free()); + + auto claimed = al2.claim_free_to_right(0x4000); + ASSERT_EQ(max_available - 0x4000u, claimed); + ASSERT_EQ(0x2000, al2.debug_get_free()); + + claimed = al2.claim_free_to_right(0x4000); + ASSERT_EQ(0, claimed); + ASSERT_EQ(0x2000, al2.debug_get_free()); + + claimed = al2.claim_free_to_left(0x2000); + ASSERT_EQ(0x2000u, claimed); + ASSERT_EQ(0, al2.debug_get_free()); + + claimed = al2.claim_free_to_left(0x2000); + ASSERT_EQ(0, claimed); + ASSERT_EQ(0, al2.debug_get_free()); + + + al2.mark_free(0x3000, 0x4000); + ASSERT_EQ(0x4000, al2.debug_get_free()); + + claimed = al2.claim_free_to_right(0x7000); + ASSERT_EQ(0, claimed); + ASSERT_EQ(0x4000, al2.debug_get_free()); + + claimed = al2.claim_free_to_right(0x6000); + ASSERT_EQ(0x1000, claimed); + ASSERT_EQ(0x3000, al2.debug_get_free()); + + claimed = al2.claim_free_to_right(0x6000); + ASSERT_EQ(0, claimed); + ASSERT_EQ(0x3000, al2.debug_get_free()); + + claimed = al2.claim_free_to_left(0x3000); + ASSERT_EQ(0u, claimed); + ASSERT_EQ(0x3000, al2.debug_get_free()); + + claimed = al2.claim_free_to_left(0x4000); + ASSERT_EQ(0x1000, claimed); + ASSERT_EQ(0x2000, al2.debug_get_free()); + + // extend allocator space up to 64M + auto max_available2 = 64 * 1024 * 1024; + al2.mark_free(max_available, max_available2 - max_available); + ASSERT_EQ(max_available2 - max_available + 0x2000, al2.debug_get_free()); + + // pin some allocations + al2.mark_allocated(0x400000 + 0x2000, 1000); + al2.mark_allocated(0x400000 + 0x5000, 1000); + al2.mark_allocated(0x400000 + 0x20000, 1000); + ASSERT_EQ(max_available2 - max_available - 0x1000, al2.debug_get_free()); + + claimed = al2.claim_free_to_left(0x403000); + ASSERT_EQ(0x0, claimed); + + claimed = al2.claim_free_to_left(0x404000); + ASSERT_EQ(0x1000, claimed); + ASSERT_EQ(max_available2 - max_available - 0x2000, al2.debug_get_free()); + + claimed = al2.claim_free_to_left(max_available); + ASSERT_EQ(0, claimed); + + claimed = al2.claim_free_to_left(0x400000); + ASSERT_EQ(0x3e0000, claimed); + ASSERT_EQ(max_available2 - max_available - 0x3e2000, al2.get_available()); + ASSERT_EQ(max_available2 - max_available - 0x3e2000, al2.debug_get_free()); + + claimed = al2.claim_free_to_right(0x407000); + ASSERT_EQ(0x19000, claimed); + ASSERT_EQ(max_available2 - max_available - 0x3e2000 - 0x19000, + al2.get_available()); + ASSERT_EQ(max_available2 - max_available - 0x3e2000 - 0x19000, + al2.debug_get_free()); + + claimed = al2.claim_free_to_right(0x407000); + ASSERT_EQ(0, claimed); + + claimed = al2.claim_free_to_right(0x430000); + ASSERT_EQ(max_available2 - 0x430000, claimed); + ASSERT_EQ(0x15000, + al2.get_available()); + ASSERT_EQ(0x15000, + al2.debug_get_free()); +} diff --git a/src/test/objectstore/hybrid_allocator_test.cc b/src/test/objectstore/hybrid_allocator_test.cc index 5643d47d9b798..e43d28b28442f 100755 --- a/src/test/objectstore/hybrid_allocator_test.cc +++ b/src/test/objectstore/hybrid_allocator_test.cc @@ -149,35 +149,35 @@ TEST(HybridAllocator, basic) // we have at avl: 2M~2M, 8M~7M // and at bmap: 0~1M, 16M~1M, 18M~2M - // this will go to avl making 16M~4M span broken between both allocators + // this will be merged with neighbors from bmap and go to avl ha.init_add_free(17 * _1m, _1m); - ASSERT_EQ(_1m * 10, ha.get_avl_free()); - ASSERT_EQ(_1m * 4, ha.get_bmap_free()); + ASSERT_EQ(_1m * 1, ha.get_bmap_free()); + ASSERT_EQ(_1m * 13, ha.get_avl_free()); - // we have at avl: 2M~2M, 8M~7M, 17M~1M - // and at bmap: 0~1M, 16M~1M, 18M~2M + // we have at avl: 2M~2M, 8M~7M, 16M~4M + // and at bmap: 0~1M - // and now do some cutoffs from this "broken" 16M~4M span + // and now do some cutoffs from 0~1M span //cut off 4K from bmap - ha.init_rm_free(16 * _1m, 0x1000); - ASSERT_EQ(_1m * 10, ha.get_avl_free()); - ASSERT_EQ(_1m * 4 - 0x1000, ha.get_bmap_free()); + ha.init_rm_free(0 * _1m, 0x1000); + ASSERT_EQ(_1m * 13, ha.get_avl_free()); + ASSERT_EQ(_1m * 1 - 0x1000, ha.get_bmap_free()); - //cut off 1M-4K from bmap and 4K from avl - ha.init_rm_free(16 * _1m + 0x1000, _1m); - ASSERT_EQ(_1m * 10 - 0x1000, ha.get_avl_free()); - ASSERT_EQ(_1m * 3, ha.get_bmap_free()); + //cut off 1M-4K from bmap + ha.init_rm_free(0 * _1m + 0x1000, _1m - 0x1000); + ASSERT_EQ(_1m * 13, ha.get_avl_free()); + ASSERT_EQ(0, ha.get_bmap_free()); //cut off 512K avl ha.init_rm_free(17 * _1m + 0x1000, _1m / 2); - ASSERT_EQ(_1m * 10 - 0x1000 - _1m / 2, ha.get_avl_free()); - ASSERT_EQ(_1m * 3, ha.get_bmap_free()); + ASSERT_EQ(_1m * 13 - _1m / 2, ha.get_avl_free()); + ASSERT_EQ(0, ha.get_bmap_free()); - //cut off the rest from avl and 8K from bmap - ha.init_rm_free(17 * _1m + 0x1000 + _1m / 2, _1m / 2 + 0x1000); - ASSERT_EQ(_1m * 9, ha.get_avl_free()); - ASSERT_EQ(_1m * 3 - 0x2000, ha.get_bmap_free()); + //cut off the rest from avl + ha.init_rm_free(17 * _1m + 0x1000 + _1m / 2, _1m / 2); + ASSERT_EQ(_1m * 12, ha.get_avl_free()); + ASSERT_EQ(0, ha.get_bmap_free()); } { -- 2.39.5