uint64_t AllocatorLevel::alloc_fragments_fast = 0;
uint64_t AllocatorLevel::l2_allocs = 0;
+inline interval_t _align2units(uint64_t offset, uint64_t len, uint64_t min_length)
+{
+ interval_t res;
+ if (len >= min_length) {
+ res.offset = p2roundup(offset, min_length);
+ auto delta_off = res.offset - offset;
+ if (len > delta_off) {
+ res.length = len - delta_off;
+ res.length = p2align(res.length, min_length);
+ if (res.length) {
+ return res;
+ }
+ }
+ }
+ return interval_t();
+}
+
+
+interval_t AllocatorLevel01Loose::_get_longest_from_l0(uint64_t pos0,
+ uint64_t pos1, uint64_t min_length, interval_t* tail) const
+{
+ interval_t res;
+ if (pos0 >= pos1) {
+ return res;
+ }
+ auto pos = pos0;
+
+ interval_t res_candidate;
+ if (tail->length != 0) {
+ assert((tail->offset % l0_granularity) == 0);
+ assert((tail->length % l0_granularity) == 0);
+ res_candidate.offset = tail->offset / l0_granularity;
+ res_candidate.length = tail->length / l0_granularity;
+ }
+ *tail = interval_t();
+
+ auto d = bits_per_slot;
+ slot_t bits = l0[pos / d];
+ bits >>= pos % d;
+ bool end_loop = false;
+ auto min_granules = min_length / l0_granularity;
+
+ do {
+ if ((pos % d) == 0) {
+ bits = l0[pos / d];
+ if (pos1 - pos >= d) {
+ switch(bits) {
+ case all_slot_set:
+ // slot is totally free
+ if (!res_candidate.length) {
+ res_candidate.offset = pos;
+ }
+ res_candidate.length += d;
+ pos += d;
+ end_loop = pos >= pos1;
+ if (end_loop) {
+ *tail = res_candidate;
+ res_candidate = _align2units(res_candidate.offset,
+ res_candidate.length, min_granules);
+ if(res.length < res_candidate.length) {
+ res = res_candidate;
+ }
+ }
+ continue;
+ case all_slot_clear:
+ // slot is totally allocated
+ res_candidate = _align2units(res_candidate.offset,
+ res_candidate.length, min_granules);
+ if (res.length < res_candidate.length) {
+ res = res_candidate;
+ res_candidate = interval_t();
+ }
+ pos += d;
+ end_loop = pos >= pos1;
+ continue;
+ }
+ }
+ } //if ((pos % d) == 0)
+
+ end_loop = ++pos >= pos1;
+ if (bits & 1) {
+ // item is free
+ if (!res_candidate.length) {
+ res_candidate.offset = pos - 1;
+ }
+ ++res_candidate.length;
+ if (end_loop) {
+ *tail = res_candidate;
+ res_candidate = _align2units(res_candidate.offset,
+ res_candidate.length, min_granules);
+ if (res.length < res_candidate.length) {
+ res = res_candidate;
+ }
+ }
+ } else {
+ res_candidate = _align2units(res_candidate.offset,
+ res_candidate.length, min_granules);
+ if (res.length < res_candidate.length) {
+ res = res_candidate;
+ }
+ res_candidate = interval_t();
+ }
+ bits >>= 1;
+ } while (!end_loop);
+ res.offset *= l0_granularity;
+ res.length *= l0_granularity;
+ tail->offset *= l0_granularity;
+ tail->length *= l0_granularity;
+ return res;
+}
+
void AllocatorLevel01Loose::_analyze_partials(uint64_t pos_start,
uint64_t pos_end, uint64_t length, uint64_t min_length, int mode,
search_ctx_t* ctx)
uint64_t l0_w = slotset_width * CHILD_PER_SLOT_L0;
uint64_t l1_pos = pos_start;
- bool prev_pos_partial = false;
+ const interval_t empty_tail;
+ interval_t prev_tail;
+
uint64_t next_free_l1_pos = 0;
for (auto pos = pos_start / d; pos < pos_end / d; ++pos) {
slot_t slot_val = l1[pos];
for (auto c = 0; c < d; c++) {
switch (slot_val & L1_ENTRY_MASK) {
case L1_ENTRY_FREE:
- prev_pos_partial = false;
+ prev_tail = empty_tail;
if (!ctx->free_count) {
ctx->free_l1_pos = l1_pos;
} else if (l1_pos != next_free_l1_pos){
- // check if already found extent fits min_length
- if (ctx->free_count * l1_granularity >= min_length) {
+ auto o = ctx->free_l1_pos * l1_granularity;
+ auto l = ctx->free_count * l1_granularity;
+ // check if already found extent fits min_length after alignment
+ if (_align2units(o, l, min_length).length >= min_length) {
break;
}
// if not - proceed with the next one
}
break;
case L1_ENTRY_FULL:
- prev_pos_partial = false;
+ prev_tail = empty_tail;
break;
case L1_ENTRY_PARTIAL:
- uint64_t l;
- uint64_t p0 = 0;
+ interval_t longest;
++ctx->partial_count;
- if (!prev_pos_partial) {
- l = _get_longest_from_l0(l1_pos * l0_w, (l1_pos + 1) * l0_w, &p0);
- prev_pos_partial = true;
- } else {
- l = _get_longest_from_l0((l1_pos - 1) * l0_w, (l1_pos + 1) * l0_w, &p0);
- }
- if (l >= length) {
+ longest = _get_longest_from_l0(l1_pos * l0_w, (l1_pos + 1) * l0_w, min_length, &prev_tail);
+
+ if (longest.length >= length) {
if ((ctx->affordable_len == 0) ||
((ctx->affordable_len != 0) &&
- (l < ctx->affordable_len))) {
- ctx->affordable_len = l;
- ctx->affordable_l0_pos_start = p0;
+ (longest.length < ctx->affordable_len))) {
+ ctx->affordable_len = longest.length;
+ ctx->affordable_offs = longest.offset;
}
}
- if (l >= min_length &&
+ if (longest.length >= min_length &&
(ctx->min_affordable_len == 0 ||
- (l < ctx->min_affordable_len))) {
+ (longest.length < ctx->min_affordable_len))) {
- ctx->min_affordable_len = p2align(l, min_length);
- ctx->min_affordable_l0_pos_start = p0;
+ ctx->min_affordable_len = p2align(longest.length, min_length);
+ ctx->min_affordable_offs = longest.offset;
}
if (mode == STOP_ON_PARTIAL) {
return;
if (ctx.affordable_len) {
// allocate as specified
assert(ctx.affordable_len >= length);
- auto pos_end = ctx.affordable_l0_pos_start + 1;
- _mark_alloc_l1_l0(ctx.affordable_l0_pos_start, pos_end);
- res = interval_t(ctx.affordable_l0_pos_start * l0_granularity, length);
+ auto pos = ctx.affordable_offs / l0_granularity;
+ _mark_alloc_l1_l0(pos, pos + 1);
+ res = interval_t(ctx.affordable_offs, length);
return res;
}
if (ctx.affordable_len) {
assert(ctx.affordable_len >= length);
assert((length % l0_granularity) == 0);
- auto pos_end = ctx.affordable_l0_pos_start + length / l0_granularity;
- _mark_alloc_l1_l0(ctx.affordable_l0_pos_start, pos_end);
- res = interval_t(ctx.affordable_l0_pos_start * l0_granularity, length);
+ auto pos_start = ctx.affordable_offs + length / l0_granularity;
+ auto pos_end = (ctx.affordable_offs + length) / l0_granularity;
+ _mark_alloc_l1_l0(pos_start, pos_end);
+ res = interval_t(ctx.affordable_offs, length);
return res;
}
if (ctx.min_affordable_len) {
- auto pos0 = ctx.min_affordable_l0_pos_start;
- _mark_alloc_l1_l0(pos0, pos0 + ctx.min_affordable_len / l0_granularity);
- return interval_t(pos0 * l0_granularity, ctx.min_affordable_len);
+ auto pos_start = ctx.min_affordable_offs / l0_granularity;
+ auto pos_end = (ctx.min_affordable_offs + ctx.min_affordable_len) / l0_granularity;
+ _mark_alloc_l1_l0(pos_start, pos_end);
+ return interval_t(ctx.min_affordable_offs, ctx.min_affordable_len);
}
} else {
search_ctx_t ctx;
if (ctx.affordable_len) {
assert(ctx.affordable_len >= length);
assert((length % l0_granularity) == 0);
- auto pos_end = ctx.affordable_l0_pos_start + length / l0_granularity;
- _mark_alloc_l1_l0(ctx.affordable_l0_pos_start, pos_end);
- res = interval_t(ctx.affordable_l0_pos_start * l0_granularity, length);
+ auto pos_start = ctx.affordable_offs / l0_granularity;
+ auto pos_end = (ctx.affordable_offs + length) / l0_granularity;
+ _mark_alloc_l1_l0(pos_start, pos_end);
+ res = interval_t(ctx.affordable_offs, length);
return res;
}
// allocate using contiguous extent found at l1 if affordable
- if (ctx.free_count && ctx.free_count * l1_granularity >= min_length) {
-
- auto l = p2align(std::min(length, ctx.free_count * l1_granularity),
- min_length);
- assert((l % l0_granularity) == 0);
- auto pos_end = ctx.free_l1_pos * l0_w + l / l0_granularity;
-
- _mark_alloc_l1_l0(ctx.free_l1_pos * l0_w, pos_end);
- res = interval_t(ctx.free_l1_pos * l1_granularity, l);
- return res;
+ // align allocated extent with min_length
+ if (ctx.free_count) {
+ auto o = ctx.free_l1_pos * l1_granularity;
+ auto l = ctx.free_count * l1_granularity;
+ interval_t aligned_extent = _align2units(o, l, min_length);
+ if (aligned_extent.length > 0) {
+ aligned_extent.length = std::min(length,
+ uint64_t(aligned_extent.length));
+ assert((aligned_extent.offset % l0_granularity) == 0);
+ assert((aligned_extent.length % l0_granularity) == 0);
+
+ auto pos_start = aligned_extent.offset / l0_granularity;
+ auto pos_end = (aligned_extent.offset + aligned_extent.length) / l0_granularity;
+
+ _mark_alloc_l1_l0(pos_start, pos_end);
+ return aligned_extent;
+ }
}
if (ctx.min_affordable_len) {
- auto pos0 = ctx.min_affordable_l0_pos_start;
- _mark_alloc_l1_l0(pos0, pos0 + ctx.min_affordable_len / l0_granularity);
- return interval_t(pos0 * l0_granularity, ctx.min_affordable_len);
+ auto pos_start = ctx.min_affordable_offs / l0_granularity;
+ auto pos_end = (ctx.min_affordable_offs + ctx.min_affordable_len) / l0_granularity;
+ _mark_alloc_l1_l0(pos_start, pos_end);
+ return interval_t(ctx.min_affordable_offs, ctx.min_affordable_len);
}
}
return res;
return CHILD_PER_SLOT;
}
- uint64_t _get_longest_from_l0(uint64_t pos0, uint64_t pos1,
- uint64_t* pos0_res) const
- {
- uint64_t res = 0;
- if (pos0 >= pos1) {
- return res;
- }
-
- uint64_t res_candidate = 0;
-
- auto pos = pos0;
- uint64_t pos_free_start = 0;
- bool was_free = false;
- auto d = slotset_width * 8;
- slot_t bits = l0[pos / d];
- bits >>= pos % d;
- bool end_loop = false;
- do {
- if ((pos % d) == 0) {
- bits = l0[pos / d];
- if (pos1 - pos >= d) {
- if (bits == all_slot_set) {
- // slot is totally free
- if (was_free) {
- res_candidate += d;
- } else {
- was_free = true;
- res_candidate = d;
- pos_free_start = pos;
- }
- pos += d;
- end_loop = pos >= pos1;
- if (end_loop && res < res_candidate) {
- *pos0_res = pos_free_start;
- res = res_candidate;
- res_candidate = 0;
- }
- continue;
- }
- if (bits == all_slot_clear) {
- // slot is totally allocated
- if (was_free) {
- if (res < res_candidate) {
- *pos0_res = pos_free_start;
- res = res_candidate;
- res_candidate = 0;
- }
- was_free = false;
- }
- pos += d;
- end_loop = pos >= pos1;
- continue;
- }
- }
- } //if ((pos % d) == 0)
-
- if ((bits & 1) == 1) {
- // item is free
- ++res_candidate;
- if (!was_free) {
- pos_free_start = pos;
- was_free = true;
- }
- }
- end_loop = ++pos >= pos1;
- if (was_free && (end_loop || !(bits & 1))) {
- if (res < res_candidate) {
- *pos0_res = pos_free_start;
- res = res_candidate;
- }
- res_candidate = 0;
- was_free = false;
- }
- bits >>= 1;
- } while (!end_loop);
- res *= l0_granularity;
- return res;
- }
+ interval_t _get_longest_from_l0(uint64_t pos0, uint64_t pos1,
+ uint64_t min_length, interval_t* tail) const;
inline void _fragment_and_emplace(uint64_t max_length, uint64_t offset,
uint64_t len,
uint64_t free_l1_pos = 0;
uint64_t min_affordable_len = 0;
- uint64_t min_affordable_l0_pos_start = 0;
+ uint64_t min_affordable_offs = 0;
uint64_t affordable_len = 0;
- uint64_t affordable_l0_pos_start = 0;
+ uint64_t affordable_offs = 0;
bool fully_processed = false;
uint64_t prev_allocated = *allocated;
uint64_t d = CHILD_PER_SLOT;
assert(isp2(min_length));
- assert(min_length <= l1._level_granularity());
+ assert(min_length <= l2_granularity);
assert(max_length == 0 || max_length >= min_length);
assert(max_length == 0 || (max_length % min_length) == 0);
assert(length >= min_length);
std::cout << "Done L2 Unaligned" << std::endl;
}
+
+TEST(TestAllocatorLevel01, test_l2_contiguous_alignment)
+{
+ {
+ TestAllocatorLevel02 al2;
+ uint64_t num_l2_entries = 3;
+ uint64_t capacity = num_l2_entries * 256 * 512 * 4096; // 3x512 MB
+ al2.init(capacity, 0x1000);
+ std::cout << "Init L2 cont aligned" << std::endl;
+ for (uint64_t i = 0; i < capacity / 2; i += _1m) {
+ uint64_t allocated4 = 0;
+ interval_vector_t a4;
+ al2.allocate_l2(_1m, _1m, &allocated4, &a4);
+ ASSERT_EQ(a4.size(), 1);
+ ASSERT_EQ(allocated4, _1m);
+ ASSERT_EQ(a4[0].offset, i);
+ ASSERT_EQ(a4[0].length, _1m);
+ }
+ ASSERT_EQ(capacity / 2, al2.debug_get_free());
+
+ {
+ // release 2M + 4K at the beginning
+ interval_vector_t r;
+ r.emplace_back(0, 2 * _1m + 0x1000);
+ al2.free_l2(r);
+ }
+ {
+ // allocate 4K within the deallocated range
+ uint64_t allocated4 = 0;
+ interval_vector_t a4;
+ al2.allocate_l2(0x1000, 0x1000, &allocated4, &a4);
+ ASSERT_EQ(a4.size(), 1);
+ ASSERT_EQ(allocated4, 0x1000);
+ ASSERT_EQ(a4[0].offset, 0);
+ ASSERT_EQ(a4[0].length, 0x1000);
+ }
+ {
+ // allocate 1M - should go to the second 1M chunk
+ uint64_t allocated4 = 0;
+ interval_vector_t a4;
+ al2.allocate_l2(_1m, _1m, &allocated4, &a4);
+ ASSERT_EQ(a4.size(), 1);
+ ASSERT_EQ(allocated4, _1m);
+ ASSERT_EQ(a4[0].offset, _1m);
+ ASSERT_EQ(a4[0].length, _1m);
+ }
+ {
+ // and allocate yet another 8K within the deallocated range
+ uint64_t allocated4 = 0;
+ interval_vector_t a4;
+ al2.allocate_l2(0x2000, 0x1000, &allocated4, &a4);
+ ASSERT_EQ(a4.size(), 1);
+ ASSERT_EQ(allocated4, 0x2000);
+ ASSERT_EQ(a4[0].offset, 0x1000);
+ ASSERT_EQ(a4[0].length, 0x2000);
+ }
+ {
+ // release just allocated 1M
+ interval_vector_t r;
+ r.emplace_back(_1m, _1m);
+ al2.free_l2(r);
+ }
+ {
+ // allocate 3M - should go to the second 1M chunk and @capacity/2
+ uint64_t allocated4 = 0;
+ interval_vector_t a4;
+ al2.allocate_l2(3 * _1m, _1m, &allocated4, &a4);
+ ASSERT_EQ(a4.size(), 2);
+ ASSERT_EQ(allocated4, 3 * _1m);
+ ASSERT_EQ(a4[0].offset, _1m);
+ ASSERT_EQ(a4[0].length, _1m);
+ ASSERT_EQ(a4[1].offset, capacity / 2);
+ ASSERT_EQ(a4[1].length, 2 * _1m);
+ }
+ {
+ // release allocated 1M in the second meg chunk except
+ // the first 4K chunk
+ interval_vector_t r;
+ r.emplace_back(_1m + 0x1000, _1m);
+ al2.free_l2(r);
+ }
+ {
+ // release 2M @(capacity / 2)
+ interval_vector_t r;
+ r.emplace_back(capacity / 2, 2 * _1m);
+ al2.free_l2(r);
+ }
+ {
+ // allocate 3x512K - should go to the second halves of
+ // the first and second 1M chunks and @(capacity / 2)
+ uint64_t allocated4 = 0;
+ interval_vector_t a4;
+ al2.allocate_l2(2 * _1m, _1m / 2, &allocated4, &a4);
+ ASSERT_EQ(a4.size(), 3);
+ ASSERT_EQ(allocated4, 2 * _1m);
+ ASSERT_EQ(a4[0].offset, _1m / 2);
+ ASSERT_EQ(a4[0].length, _1m / 2);
+ ASSERT_EQ(a4[1].offset, _1m + _1m / 2);
+ ASSERT_EQ(a4[1].length, _1m / 2);
+ ASSERT_EQ(a4[2].offset, capacity / 2);
+ ASSERT_EQ(a4[2].length, _1m);
+ }
+ {
+ // cleanup first 2M except except the last 4K chunk
+ interval_vector_t r;
+ r.emplace_back(0, 2 * _1m - 0x1000);
+ al2.free_l2(r);
+ }
+ {
+ // release 2M @(capacity / 2)
+ interval_vector_t r;
+ r.emplace_back(capacity / 2, 2 * _1m);
+ al2.free_l2(r);
+ }
+ {
+ // allocate 132M using 4M granularity should go to (capacity / 2)
+ uint64_t allocated4 = 0;
+ interval_vector_t a4;
+ al2.allocate_l2(132 * _1m, 4 * _1m , &allocated4, &a4);
+ ASSERT_EQ(a4.size(), 1);
+ ASSERT_EQ(a4[0].offset, capacity / 2);
+ ASSERT_EQ(a4[0].length, 132 * _1m);
+ }
+ {
+ // cleanup left 4K chunk in the first 2M
+ interval_vector_t r;
+ r.emplace_back(2 * _1m - 0x1000, 0x1000);
+ al2.free_l2(r);
+ }
+ {
+ // release 132M @(capacity / 2)
+ interval_vector_t r;
+ r.emplace_back(capacity / 2, 132 * _1m);
+ al2.free_l2(r);
+ }
+ {
+ // allocate 132M using 2M granularity should go to the first chunk and to
+ // (capacity / 2)
+ uint64_t allocated4 = 0;
+ interval_vector_t a4;
+ al2.allocate_l2(132 * _1m, 2 * _1m , &allocated4, &a4);
+ ASSERT_EQ(a4.size(), 2);
+ ASSERT_EQ(a4[0].offset, 0);
+ ASSERT_EQ(a4[0].length, 2 * _1m);
+ ASSERT_EQ(a4[1].offset, capacity / 2);
+ ASSERT_EQ(a4[1].length, 130 * _1m);
+ }
+ {
+ // release 130M @(capacity / 2)
+ interval_vector_t r;
+ r.emplace_back(capacity / 2, 132 * _1m);
+ al2.free_l2(r);
+ }
+ {
+ // release 4K~16K
+ // release 28K~32K
+ // release 68K~24K
+ interval_vector_t r;
+ r.emplace_back(0x1000, 0x4000);
+ r.emplace_back(0x7000, 0x8000);
+ r.emplace_back(0x11000, 0x6000);
+ al2.free_l2(r);
+ }
+ {
+ // allocate 32K using 16K granularity - should bypass the first
+ // unaligned extent, use the second free extent partially given
+ // the 16K alignment and then fallback to capacity / 2
+ uint64_t allocated4 = 0;
+ interval_vector_t a4;
+ al2.allocate_l2(0x8000, 0x4000, &allocated4, &a4);
+ ASSERT_EQ(a4.size(), 2);
+ ASSERT_EQ(a4[0].offset, 0x8000);
+ ASSERT_EQ(a4[0].length, 0x4000);
+ ASSERT_EQ(a4[1].offset, capacity / 2);
+ ASSERT_EQ(a4[1].length, 0x4000);
+ }
+
+ }
+
+ std::cout << "Done L2 cont aligned" << std::endl;
+}