From 25b7d0ea9e6a8c67a6212e325a8559a17500d2e1 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Fri, 1 Jun 2018 21:59:15 +0300 Subject: [PATCH] os/bluestore: perform allocations aligned with min_length in new bitmap allocator Signed-off-by: Igor Fedotov (cherry picked from commit 51974c48e38e7080eb12c6028f95f4da666fbe76) --- src/os/bluestore/fastbmap_allocator_impl.cc | 213 ++++++++++++++---- src/os/bluestore/fastbmap_allocator_impl.h | 86 +------ .../objectstore/fastbmap_allocator_test.cc | 181 +++++++++++++++ 3 files changed, 353 insertions(+), 127 deletions(-) diff --git a/src/os/bluestore/fastbmap_allocator_impl.cc b/src/os/bluestore/fastbmap_allocator_impl.cc index 9737c6539f4d8..6309c97a15596 100755 --- a/src/os/bluestore/fastbmap_allocator_impl.cc +++ b/src/os/bluestore/fastbmap_allocator_impl.cc @@ -15,6 +15,117 @@ uint64_t AllocatorLevel::alloc_fragments = 0; uint64_t AllocatorLevel::alloc_fragments_fast = 0; uint64_t AllocatorLevel::l2_allocs = 0; +inline interval_t _align2units(uint64_t offset, uint64_t len, uint64_t min_length) +{ + interval_t res; + if (len >= min_length) { + res.offset = p2roundup(offset, min_length); + auto delta_off = res.offset - offset; + if (len > delta_off) { + res.length = len - delta_off; + res.length = p2align(res.length, min_length); + if (res.length) { + return res; + } + } + } + return interval_t(); +} + + +interval_t AllocatorLevel01Loose::_get_longest_from_l0(uint64_t pos0, + uint64_t pos1, uint64_t min_length, interval_t* tail) const +{ + interval_t res; + if (pos0 >= pos1) { + return res; + } + auto pos = pos0; + + interval_t res_candidate; + if (tail->length != 0) { + assert((tail->offset % l0_granularity) == 0); + assert((tail->length % l0_granularity) == 0); + res_candidate.offset = tail->offset / l0_granularity; + res_candidate.length = tail->length / l0_granularity; + } + *tail = interval_t(); + + auto d = bits_per_slot; + slot_t bits = l0[pos / d]; + bits >>= pos % d; + bool end_loop = false; + auto min_granules = min_length / l0_granularity; + + do { + if ((pos % d) == 0) { + bits = l0[pos / d]; + if (pos1 - pos >= d) { + switch(bits) { + case all_slot_set: + // slot is totally free + if (!res_candidate.length) { + res_candidate.offset = pos; + } + res_candidate.length += d; + pos += d; + end_loop = pos >= pos1; + if (end_loop) { + *tail = res_candidate; + res_candidate = _align2units(res_candidate.offset, + res_candidate.length, min_granules); + if(res.length < res_candidate.length) { + res = res_candidate; + } + } + continue; + case all_slot_clear: + // slot is totally allocated + res_candidate = _align2units(res_candidate.offset, + res_candidate.length, min_granules); + if (res.length < res_candidate.length) { + res = res_candidate; + res_candidate = interval_t(); + } + pos += d; + end_loop = pos >= pos1; + continue; + } + } + } //if ((pos % d) == 0) + + end_loop = ++pos >= pos1; + if (bits & 1) { + // item is free + if (!res_candidate.length) { + res_candidate.offset = pos - 1; + } + ++res_candidate.length; + if (end_loop) { + *tail = res_candidate; + res_candidate = _align2units(res_candidate.offset, + res_candidate.length, min_granules); + if (res.length < res_candidate.length) { + res = res_candidate; + } + } + } else { + res_candidate = _align2units(res_candidate.offset, + res_candidate.length, min_granules); + if (res.length < res_candidate.length) { + res = res_candidate; + } + res_candidate = interval_t(); + } + bits >>= 1; + } while (!end_loop); + res.offset *= l0_granularity; + res.length *= l0_granularity; + tail->offset *= l0_granularity; + tail->length *= l0_granularity; + return res; +} + void AllocatorLevel01Loose::_analyze_partials(uint64_t pos_start, uint64_t pos_end, uint64_t length, uint64_t min_length, int mode, search_ctx_t* ctx) @@ -26,7 +137,9 @@ void AllocatorLevel01Loose::_analyze_partials(uint64_t pos_start, uint64_t l0_w = slotset_width * CHILD_PER_SLOT_L0; uint64_t l1_pos = pos_start; - bool prev_pos_partial = false; + const interval_t empty_tail; + interval_t prev_tail; + uint64_t next_free_l1_pos = 0; for (auto pos = pos_start / d; pos < pos_end / d; ++pos) { slot_t slot_val = l1[pos]; @@ -36,12 +149,14 @@ void AllocatorLevel01Loose::_analyze_partials(uint64_t pos_start, for (auto c = 0; c < d; c++) { switch (slot_val & L1_ENTRY_MASK) { case L1_ENTRY_FREE: - prev_pos_partial = false; + prev_tail = empty_tail; if (!ctx->free_count) { ctx->free_l1_pos = l1_pos; } else if (l1_pos != next_free_l1_pos){ - // check if already found extent fits min_length - if (ctx->free_count * l1_granularity >= min_length) { + auto o = ctx->free_l1_pos * l1_granularity; + auto l = ctx->free_count * l1_granularity; + // check if already found extent fits min_length after alignment + if (_align2units(o, l, min_length).length >= min_length) { break; } // if not - proceed with the next one @@ -55,33 +170,28 @@ void AllocatorLevel01Loose::_analyze_partials(uint64_t pos_start, } break; case L1_ENTRY_FULL: - prev_pos_partial = false; + prev_tail = empty_tail; break; case L1_ENTRY_PARTIAL: - uint64_t l; - uint64_t p0 = 0; + interval_t longest; ++ctx->partial_count; - if (!prev_pos_partial) { - l = _get_longest_from_l0(l1_pos * l0_w, (l1_pos + 1) * l0_w, &p0); - prev_pos_partial = true; - } else { - l = _get_longest_from_l0((l1_pos - 1) * l0_w, (l1_pos + 1) * l0_w, &p0); - } - if (l >= length) { + longest = _get_longest_from_l0(l1_pos * l0_w, (l1_pos + 1) * l0_w, min_length, &prev_tail); + + if (longest.length >= length) { if ((ctx->affordable_len == 0) || ((ctx->affordable_len != 0) && - (l < ctx->affordable_len))) { - ctx->affordable_len = l; - ctx->affordable_l0_pos_start = p0; + (longest.length < ctx->affordable_len))) { + ctx->affordable_len = longest.length; + ctx->affordable_offs = longest.offset; } } - if (l >= min_length && + if (longest.length >= min_length && (ctx->min_affordable_len == 0 || - (l < ctx->min_affordable_len))) { + (longest.length < ctx->min_affordable_len))) { - ctx->min_affordable_len = p2align(l, min_length); - ctx->min_affordable_l0_pos_start = p0; + ctx->min_affordable_len = p2align(longest.length, min_length); + ctx->min_affordable_offs = longest.offset; } if (mode == STOP_ON_PARTIAL) { return; @@ -201,9 +311,9 @@ interval_t AllocatorLevel01Loose::_allocate_l1_contiguous(uint64_t length, if (ctx.affordable_len) { // allocate as specified assert(ctx.affordable_len >= length); - auto pos_end = ctx.affordable_l0_pos_start + 1; - _mark_alloc_l1_l0(ctx.affordable_l0_pos_start, pos_end); - res = interval_t(ctx.affordable_l0_pos_start * l0_granularity, length); + auto pos = ctx.affordable_offs / l0_granularity; + _mark_alloc_l1_l0(pos, pos + 1); + res = interval_t(ctx.affordable_offs, length); return res; } @@ -242,15 +352,17 @@ interval_t AllocatorLevel01Loose::_allocate_l1_contiguous(uint64_t length, if (ctx.affordable_len) { assert(ctx.affordable_len >= length); assert((length % l0_granularity) == 0); - auto pos_end = ctx.affordable_l0_pos_start + length / l0_granularity; - _mark_alloc_l1_l0(ctx.affordable_l0_pos_start, pos_end); - res = interval_t(ctx.affordable_l0_pos_start * l0_granularity, length); + auto pos_start = ctx.affordable_offs + length / l0_granularity; + auto pos_end = (ctx.affordable_offs + length) / l0_granularity; + _mark_alloc_l1_l0(pos_start, pos_end); + res = interval_t(ctx.affordable_offs, length); return res; } if (ctx.min_affordable_len) { - auto pos0 = ctx.min_affordable_l0_pos_start; - _mark_alloc_l1_l0(pos0, pos0 + ctx.min_affordable_len / l0_granularity); - return interval_t(pos0 * l0_granularity, ctx.min_affordable_len); + auto pos_start = ctx.min_affordable_offs / l0_granularity; + auto pos_end = (ctx.min_affordable_offs + ctx.min_affordable_len) / l0_granularity; + _mark_alloc_l1_l0(pos_start, pos_end); + return interval_t(ctx.min_affordable_offs, ctx.min_affordable_len); } } else { search_ctx_t ctx; @@ -261,27 +373,36 @@ interval_t AllocatorLevel01Loose::_allocate_l1_contiguous(uint64_t length, if (ctx.affordable_len) { assert(ctx.affordable_len >= length); assert((length % l0_granularity) == 0); - auto pos_end = ctx.affordable_l0_pos_start + length / l0_granularity; - _mark_alloc_l1_l0(ctx.affordable_l0_pos_start, pos_end); - res = interval_t(ctx.affordable_l0_pos_start * l0_granularity, length); + auto pos_start = ctx.affordable_offs / l0_granularity; + auto pos_end = (ctx.affordable_offs + length) / l0_granularity; + _mark_alloc_l1_l0(pos_start, pos_end); + res = interval_t(ctx.affordable_offs, length); return res; } // allocate using contiguous extent found at l1 if affordable - if (ctx.free_count && ctx.free_count * l1_granularity >= min_length) { - - auto l = p2align(std::min(length, ctx.free_count * l1_granularity), - min_length); - assert((l % l0_granularity) == 0); - auto pos_end = ctx.free_l1_pos * l0_w + l / l0_granularity; - - _mark_alloc_l1_l0(ctx.free_l1_pos * l0_w, pos_end); - res = interval_t(ctx.free_l1_pos * l1_granularity, l); - return res; + // align allocated extent with min_length + if (ctx.free_count) { + auto o = ctx.free_l1_pos * l1_granularity; + auto l = ctx.free_count * l1_granularity; + interval_t aligned_extent = _align2units(o, l, min_length); + if (aligned_extent.length > 0) { + aligned_extent.length = std::min(length, + uint64_t(aligned_extent.length)); + assert((aligned_extent.offset % l0_granularity) == 0); + assert((aligned_extent.length % l0_granularity) == 0); + + auto pos_start = aligned_extent.offset / l0_granularity; + auto pos_end = (aligned_extent.offset + aligned_extent.length) / l0_granularity; + + _mark_alloc_l1_l0(pos_start, pos_end); + return aligned_extent; + } } if (ctx.min_affordable_len) { - auto pos0 = ctx.min_affordable_l0_pos_start; - _mark_alloc_l1_l0(pos0, pos0 + ctx.min_affordable_len / l0_granularity); - return interval_t(pos0 * l0_granularity, ctx.min_affordable_len); + auto pos_start = ctx.min_affordable_offs / l0_granularity; + auto pos_end = (ctx.min_affordable_offs + ctx.min_affordable_len) / l0_granularity; + _mark_alloc_l1_l0(pos_start, pos_end); + return interval_t(ctx.min_affordable_offs, ctx.min_affordable_len); } } return res; diff --git a/src/os/bluestore/fastbmap_allocator_impl.h b/src/os/bluestore/fastbmap_allocator_impl.h index 804ea0c91225d..179a22c80777f 100755 --- a/src/os/bluestore/fastbmap_allocator_impl.h +++ b/src/os/bluestore/fastbmap_allocator_impl.h @@ -131,84 +131,8 @@ class AllocatorLevel01Loose : public AllocatorLevel01 return CHILD_PER_SLOT; } - uint64_t _get_longest_from_l0(uint64_t pos0, uint64_t pos1, - uint64_t* pos0_res) const - { - uint64_t res = 0; - if (pos0 >= pos1) { - return res; - } - - uint64_t res_candidate = 0; - - auto pos = pos0; - uint64_t pos_free_start = 0; - bool was_free = false; - auto d = slotset_width * 8; - slot_t bits = l0[pos / d]; - bits >>= pos % d; - bool end_loop = false; - do { - if ((pos % d) == 0) { - bits = l0[pos / d]; - if (pos1 - pos >= d) { - if (bits == all_slot_set) { - // slot is totally free - if (was_free) { - res_candidate += d; - } else { - was_free = true; - res_candidate = d; - pos_free_start = pos; - } - pos += d; - end_loop = pos >= pos1; - if (end_loop && res < res_candidate) { - *pos0_res = pos_free_start; - res = res_candidate; - res_candidate = 0; - } - continue; - } - if (bits == all_slot_clear) { - // slot is totally allocated - if (was_free) { - if (res < res_candidate) { - *pos0_res = pos_free_start; - res = res_candidate; - res_candidate = 0; - } - was_free = false; - } - pos += d; - end_loop = pos >= pos1; - continue; - } - } - } //if ((pos % d) == 0) - - if ((bits & 1) == 1) { - // item is free - ++res_candidate; - if (!was_free) { - pos_free_start = pos; - was_free = true; - } - } - end_loop = ++pos >= pos1; - if (was_free && (end_loop || !(bits & 1))) { - if (res < res_candidate) { - *pos0_res = pos_free_start; - res = res_candidate; - } - res_candidate = 0; - was_free = false; - } - bits >>= 1; - } while (!end_loop); - res *= l0_granularity; - return res; - } + interval_t _get_longest_from_l0(uint64_t pos0, uint64_t pos1, + uint64_t min_length, interval_t* tail) const; inline void _fragment_and_emplace(uint64_t max_length, uint64_t offset, uint64_t len, @@ -358,9 +282,9 @@ protected: uint64_t free_l1_pos = 0; uint64_t min_affordable_len = 0; - uint64_t min_affordable_l0_pos_start = 0; + uint64_t min_affordable_offs = 0; uint64_t affordable_len = 0; - uint64_t affordable_l0_pos_start = 0; + uint64_t affordable_offs = 0; bool fully_processed = false; @@ -675,7 +599,7 @@ protected: uint64_t prev_allocated = *allocated; uint64_t d = CHILD_PER_SLOT; assert(isp2(min_length)); - assert(min_length <= l1._level_granularity()); + assert(min_length <= l2_granularity); assert(max_length == 0 || max_length >= min_length); assert(max_length == 0 || (max_length % min_length) == 0); assert(length >= min_length); diff --git a/src/test/objectstore/fastbmap_allocator_test.cc b/src/test/objectstore/fastbmap_allocator_test.cc index e3d5432575390..78343d5cf8a32 100755 --- a/src/test/objectstore/fastbmap_allocator_test.cc +++ b/src/test/objectstore/fastbmap_allocator_test.cc @@ -582,3 +582,184 @@ TEST(TestAllocatorLevel01, test_l2_unaligned) std::cout << "Done L2 Unaligned" << std::endl; } + +TEST(TestAllocatorLevel01, test_l2_contiguous_alignment) +{ + { + TestAllocatorLevel02 al2; + uint64_t num_l2_entries = 3; + uint64_t capacity = num_l2_entries * 256 * 512 * 4096; // 3x512 MB + al2.init(capacity, 0x1000); + std::cout << "Init L2 cont aligned" << std::endl; + for (uint64_t i = 0; i < capacity / 2; i += _1m) { + uint64_t allocated4 = 0; + interval_vector_t a4; + al2.allocate_l2(_1m, _1m, &allocated4, &a4); + ASSERT_EQ(a4.size(), 1); + ASSERT_EQ(allocated4, _1m); + ASSERT_EQ(a4[0].offset, i); + ASSERT_EQ(a4[0].length, _1m); + } + ASSERT_EQ(capacity / 2, al2.debug_get_free()); + + { + // release 2M + 4K at the beginning + interval_vector_t r; + r.emplace_back(0, 2 * _1m + 0x1000); + al2.free_l2(r); + } + { + // allocate 4K within the deallocated range + uint64_t allocated4 = 0; + interval_vector_t a4; + al2.allocate_l2(0x1000, 0x1000, &allocated4, &a4); + ASSERT_EQ(a4.size(), 1); + ASSERT_EQ(allocated4, 0x1000); + ASSERT_EQ(a4[0].offset, 0); + ASSERT_EQ(a4[0].length, 0x1000); + } + { + // allocate 1M - should go to the second 1M chunk + uint64_t allocated4 = 0; + interval_vector_t a4; + al2.allocate_l2(_1m, _1m, &allocated4, &a4); + ASSERT_EQ(a4.size(), 1); + ASSERT_EQ(allocated4, _1m); + ASSERT_EQ(a4[0].offset, _1m); + ASSERT_EQ(a4[0].length, _1m); + } + { + // and allocate yet another 8K within the deallocated range + uint64_t allocated4 = 0; + interval_vector_t a4; + al2.allocate_l2(0x2000, 0x1000, &allocated4, &a4); + ASSERT_EQ(a4.size(), 1); + ASSERT_EQ(allocated4, 0x2000); + ASSERT_EQ(a4[0].offset, 0x1000); + ASSERT_EQ(a4[0].length, 0x2000); + } + { + // release just allocated 1M + interval_vector_t r; + r.emplace_back(_1m, _1m); + al2.free_l2(r); + } + { + // allocate 3M - should go to the second 1M chunk and @capacity/2 + uint64_t allocated4 = 0; + interval_vector_t a4; + al2.allocate_l2(3 * _1m, _1m, &allocated4, &a4); + ASSERT_EQ(a4.size(), 2); + ASSERT_EQ(allocated4, 3 * _1m); + ASSERT_EQ(a4[0].offset, _1m); + ASSERT_EQ(a4[0].length, _1m); + ASSERT_EQ(a4[1].offset, capacity / 2); + ASSERT_EQ(a4[1].length, 2 * _1m); + } + { + // release allocated 1M in the second meg chunk except + // the first 4K chunk + interval_vector_t r; + r.emplace_back(_1m + 0x1000, _1m); + al2.free_l2(r); + } + { + // release 2M @(capacity / 2) + interval_vector_t r; + r.emplace_back(capacity / 2, 2 * _1m); + al2.free_l2(r); + } + { + // allocate 3x512K - should go to the second halves of + // the first and second 1M chunks and @(capacity / 2) + uint64_t allocated4 = 0; + interval_vector_t a4; + al2.allocate_l2(2 * _1m, _1m / 2, &allocated4, &a4); + ASSERT_EQ(a4.size(), 3); + ASSERT_EQ(allocated4, 2 * _1m); + ASSERT_EQ(a4[0].offset, _1m / 2); + ASSERT_EQ(a4[0].length, _1m / 2); + ASSERT_EQ(a4[1].offset, _1m + _1m / 2); + ASSERT_EQ(a4[1].length, _1m / 2); + ASSERT_EQ(a4[2].offset, capacity / 2); + ASSERT_EQ(a4[2].length, _1m); + } + { + // cleanup first 2M except except the last 4K chunk + interval_vector_t r; + r.emplace_back(0, 2 * _1m - 0x1000); + al2.free_l2(r); + } + { + // release 2M @(capacity / 2) + interval_vector_t r; + r.emplace_back(capacity / 2, 2 * _1m); + al2.free_l2(r); + } + { + // allocate 132M using 4M granularity should go to (capacity / 2) + uint64_t allocated4 = 0; + interval_vector_t a4; + al2.allocate_l2(132 * _1m, 4 * _1m , &allocated4, &a4); + ASSERT_EQ(a4.size(), 1); + ASSERT_EQ(a4[0].offset, capacity / 2); + ASSERT_EQ(a4[0].length, 132 * _1m); + } + { + // cleanup left 4K chunk in the first 2M + interval_vector_t r; + r.emplace_back(2 * _1m - 0x1000, 0x1000); + al2.free_l2(r); + } + { + // release 132M @(capacity / 2) + interval_vector_t r; + r.emplace_back(capacity / 2, 132 * _1m); + al2.free_l2(r); + } + { + // allocate 132M using 2M granularity should go to the first chunk and to + // (capacity / 2) + uint64_t allocated4 = 0; + interval_vector_t a4; + al2.allocate_l2(132 * _1m, 2 * _1m , &allocated4, &a4); + ASSERT_EQ(a4.size(), 2); + ASSERT_EQ(a4[0].offset, 0); + ASSERT_EQ(a4[0].length, 2 * _1m); + ASSERT_EQ(a4[1].offset, capacity / 2); + ASSERT_EQ(a4[1].length, 130 * _1m); + } + { + // release 130M @(capacity / 2) + interval_vector_t r; + r.emplace_back(capacity / 2, 132 * _1m); + al2.free_l2(r); + } + { + // release 4K~16K + // release 28K~32K + // release 68K~24K + interval_vector_t r; + r.emplace_back(0x1000, 0x4000); + r.emplace_back(0x7000, 0x8000); + r.emplace_back(0x11000, 0x6000); + al2.free_l2(r); + } + { + // allocate 32K using 16K granularity - should bypass the first + // unaligned extent, use the second free extent partially given + // the 16K alignment and then fallback to capacity / 2 + uint64_t allocated4 = 0; + interval_vector_t a4; + al2.allocate_l2(0x8000, 0x4000, &allocated4, &a4); + ASSERT_EQ(a4.size(), 2); + ASSERT_EQ(a4[0].offset, 0x8000); + ASSERT_EQ(a4[0].length, 0x4000); + ASSERT_EQ(a4[1].offset, capacity / 2); + ASSERT_EQ(a4[1].length, 0x4000); + } + + } + + std::cout << "Done L2 cont aligned" << std::endl; +} -- 2.39.5