From: Adam Kupczyk Date: Thu, 21 Jun 2018 11:58:45 +0000 (+0200) Subject: BlueStore/allocator: Improved (but slower) method of calculating fragmentation. X-Git-Tag: v12.2.13~157^2~10 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=832cf6039a70ee34c80c95c3e8fe4ee6c1e7b95f;p=ceph.git BlueStore/allocator: Improved (but slower) method of calculating fragmentation. Signed-off-by: Adam Kupczyk (cherry picked from commit c1ac6a1207cdb71a471321e42ceca517383b36c8) --- diff --git a/src/os/bluestore/Allocator.cc b/src/os/bluestore/Allocator.cc index b6026353165..4c140cefd48 100644 --- a/src/os/bluestore/Allocator.cc +++ b/src/os/bluestore/Allocator.cc @@ -29,3 +29,55 @@ void Allocator::release(const PExtentVector& release_vec) } release(release_set); } + +/** + * Gives fragmentation a numeric value. + * + * Following algorithm applies value to each existing free unallocated block. + * Value of single block is a multiply of size and per-byte-value. + * Per-byte-value is greater for larger blocks. + * Assume block size X has value per-byte p; then block size 2*X will have per-byte value 1.1*p. + * + * This could be expressed in logarithms, but for speed this is interpolated inside ranges. + * [1] [2..3] [4..7] [8..15] ... + * ^ ^ ^ ^ + * 1.1 1.1^2 1.1^3 1.1^4 ... + * + * Final score is obtained by proportion between score that would have been obtained + * in condition of absolute fragmentation and score in no fragmentation at all. + */ +double Allocator::get_fragmentation_score() +{ + // this value represents how much worth is 2X bytes in one chunk then in X + X bytes + static const double double_size_worth = 1.1 ; + std::vector scales{1}; + double score_sum = 0; + size_t sum = 0; + + auto get_score = [&](size_t v) -> double { + size_t sc = sizeof(v) * 8 - clz(v) - 1; //assign to grade depending on log2(len) + while (scales.size() <= sc + 1) { + //unlikely expand scales vector + scales.push_back(scales[scales.size() - 1] * double_size_worth); + } + + size_t sc_shifted = size_t(1) << sc; + double x = double(v - sc_shifted) / sc_shifted; //x is <0,1) in its scale grade + // linear extrapolation in its scale grade + double score = (sc_shifted ) * scales[sc] * (1-x) + + (sc_shifted * 2) * scales[sc+1] * x; + return score; + }; + + auto iterated_allocation = [&](size_t off, size_t len) { + ceph_assert(len > 0); + score_sum += get_score(len); + sum += len; + }; + dump(iterated_allocation); + + + double ideal = get_score(sum); + double terrible = sum * get_score(1); + return (ideal - score_sum) / (ideal - terrible); +} diff --git a/src/os/bluestore/Allocator.h b/src/os/bluestore/Allocator.h index ddf194f9148..7425e33629d 100644 --- a/src/os/bluestore/Allocator.h +++ b/src/os/bluestore/Allocator.h @@ -15,6 +15,7 @@ #include #include "include/assert.h" #include "os/bluestore/bluestore_types.h" +#include class FreelistManager; @@ -46,6 +47,7 @@ public: void release(const PExtentVector& release_set); virtual void dump() = 0; + virtual void dump(std::function notify) = 0; virtual void init_add_free(uint64_t offset, uint64_t length) = 0; virtual void init_rm_free(uint64_t offset, uint64_t length) = 0; @@ -55,7 +57,7 @@ public: { return 0.0; } - + virtual double get_fragmentation_score(); virtual void shutdown() = 0; static Allocator *create(CephContext* cct, string type, int64_t size, int64_t block_size); diff --git a/src/os/bluestore/BitmapAllocator.cc b/src/os/bluestore/BitmapAllocator.cc index d632d5fab87..3e80d025843 100755 --- a/src/os/bluestore/BitmapAllocator.cc +++ b/src/os/bluestore/BitmapAllocator.cc @@ -100,3 +100,13 @@ void BitmapAllocator::dump() ++it; } } + +void BitmapAllocator::dump(std::function notify) +{ + size_t alloc_size = get_min_alloc_size(); + auto multiply_by_alloc_size = [alloc_size, notify](size_t off, size_t len) { + notify(off * alloc_size, len * alloc_size); + }; + std::lock_guard lck(lock); + l1.dump(multiply_by_alloc_size); +} diff --git a/src/os/bluestore/BitmapAllocator.h b/src/os/bluestore/BitmapAllocator.h index 223c21dfbc5..df41c1a958c 100755 --- a/src/os/bluestore/BitmapAllocator.h +++ b/src/os/bluestore/BitmapAllocator.h @@ -36,6 +36,7 @@ public: } void dump() override; + void dump(std::function notify) override; double get_fragmentation(uint64_t) override { return _get_fragmentation(); diff --git a/src/os/bluestore/StupidAllocator.cc b/src/os/bluestore/StupidAllocator.cc index c7224ea8c4e..0f957e4c6d1 100644 --- a/src/os/bluestore/StupidAllocator.cc +++ b/src/os/bluestore/StupidAllocator.cc @@ -287,6 +287,16 @@ void StupidAllocator::dump() } } +void StupidAllocator::dump(std::function notify) +{ + std::lock_guard l(lock); + for (unsigned bin = 0; bin < free.size(); ++bin) { + for (auto p = free[bin].begin(); p != free[bin].end(); ++p) { + notify(p.get_start(), p.get_len()); + } + } +} + void StupidAllocator::init_add_free(uint64_t offset, uint64_t length) { std::lock_guard l(lock); diff --git a/src/os/bluestore/StupidAllocator.h b/src/os/bluestore/StupidAllocator.h index c994cc718e4..fb0e2761a0a 100644 --- a/src/os/bluestore/StupidAllocator.h +++ b/src/os/bluestore/StupidAllocator.h @@ -52,6 +52,7 @@ public: double get_fragmentation(uint64_t alloc_unit) override; void dump() override; + void dump(std::function notify) override; void init_add_free(uint64_t offset, uint64_t length) override; void init_rm_free(uint64_t offset, uint64_t length) override; diff --git a/src/os/bluestore/fastbmap_allocator_impl.cc b/src/os/bluestore/fastbmap_allocator_impl.cc index 876e987dcc9..73ac5d0097a 100755 --- a/src/os/bluestore/fastbmap_allocator_impl.cc +++ b/src/os/bluestore/fastbmap_allocator_impl.cc @@ -544,3 +544,88 @@ void AllocatorLevel01Loose::collect_stats( bins_overall[cbits(free_seq_cnt) - 1]++; } } + +inline ssize_t AllocatorLevel01Loose::count_0s(slot_t slot_val, size_t start_pos) + { + #ifdef __GNUC__ + size_t pos = __builtin_ffsll(slot_val >> start_pos); + if (pos == 0) + return sizeof(slot_t)*8 - start_pos; + return pos - 1; + #else + size_t pos = start_pos; + slot_t mask = slot_t(1) << pos; + while (pos < bits_per_slot && (slot_val & mask) == 0) { + mask <<= 1; + pos++; + } + return pos - start_pos; + #endif + } + + inline ssize_t AllocatorLevel01Loose::count_1s(slot_t slot_val, size_t start_pos) + { + return count_0s(~slot_val, start_pos); + } +void AllocatorLevel01Loose::dump( + std::function notify) +{ + size_t len = 0; + size_t off = 0; + for (size_t i = 0; i < l1.size(); i++) + { + for (size_t j = 0; j < L1_ENTRIES_PER_SLOT * L1_ENTRY_WIDTH; j += L1_ENTRY_WIDTH) + { + size_t w = (l1[i] >> j) & L1_ENTRY_MASK; + switch (w) { + case L1_ENTRY_FULL: + if (len > 0) { + notify(off, len); + len = 0; + } + break; + case L1_ENTRY_FREE: + if (len == 0) + off = ( ( bits_per_slot * i + j ) / L1_ENTRY_WIDTH ) * slots_per_slotset * bits_per_slot; + len += bits_per_slotset; + break; + case L1_ENTRY_PARTIAL: + size_t pos = ( ( bits_per_slot * i + j ) / L1_ENTRY_WIDTH ) * slots_per_slotset; + for (size_t t = 0; t < slots_per_slotset; t++) { + size_t p = 0; + slot_t allocation_pattern = l0[pos + t]; + while (p < bits_per_slot) { + if (len == 0) { + //continue to skip allocated space, meaning bits set to 0 + ssize_t alloc_count = count_0s(allocation_pattern, p); + p += alloc_count; + //now we are switched to expecting free space + if (p < bits_per_slot) { + //now @p are 1s + ssize_t free_count = count_1s(allocation_pattern, p); + assert(free_count > 0); + len = free_count; + off = (pos + t) * bits_per_slot + p; + p += free_count; + } + } else { + //continue free region + ssize_t free_count = count_1s(allocation_pattern, p); + if (free_count == 0) { + notify(off, len); + len = 0; + } else { + p += free_count; + len += free_count; + } + } + } + } + break; + } + } + } + if (len > 0) + notify(off, len); +} + diff --git a/src/os/bluestore/fastbmap_allocator_impl.h b/src/os/bluestore/fastbmap_allocator_impl.h index 52764ae42b1..2c7112c123a 100755 --- a/src/os/bluestore/fastbmap_allocator_impl.h +++ b/src/os/bluestore/fastbmap_allocator_impl.h @@ -46,6 +46,7 @@ typedef mempool::bluestore_alloc::vector slot_vector_t; // fitting into cache line on x86_64 static const size_t slotset_width = 8; // 8 slots per set +static const size_t slots_per_slotset = 8; static const size_t slotset_bytes = sizeof(slot_t) * slotset_width; static const size_t bits_per_slot = sizeof(slot_t) * 8; static const size_t bits_per_slotset = slotset_bytes * 8; @@ -141,6 +142,7 @@ class AllocatorLevel01Loose : public AllocatorLevel01 L1_ENTRY_NOT_USED = 0x02, L1_ENTRY_FREE = 0x03, CHILD_PER_SLOT = bits_per_slot / L1_ENTRY_WIDTH, // 32 + L1_ENTRIES_PER_SLOT = bits_per_slot / L1_ENTRY_WIDTH, //32 CHILD_PER_SLOT_L0 = bits_per_slot, // 64 }; uint64_t _children_per_slot() const override @@ -469,8 +471,13 @@ public: } void collect_stats( std::map& bins_overall) override; + + static inline ssize_t count_0s(slot_t slot_val, size_t start_pos); + static inline ssize_t count_1s(slot_t slot_val, size_t start_pos); + void dump(std::function notify); }; + class AllocatorLevel01Compact : public AllocatorLevel01 { uint64_t _children_per_slot() const override