From c1ac6a1207cdb71a471321e42ceca517383b36c8 Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Thu, 21 Jun 2018 13:58:45 +0200 Subject: [PATCH] BlueStore/allocator: Improved (but slower) method of calculating fragmentation. Signed-off-by: Adam Kupczyk --- src/os/bluestore/Allocator.cc | 52 +++++++++++++ src/os/bluestore/Allocator.h | 4 +- src/os/bluestore/BitmapAllocator.cc | 10 +++ src/os/bluestore/BitmapAllocator.h | 1 + src/os/bluestore/StupidAllocator.cc | 10 +++ src/os/bluestore/StupidAllocator.h | 1 + src/os/bluestore/fastbmap_allocator_impl.cc | 85 +++++++++++++++++++++ src/os/bluestore/fastbmap_allocator_impl.h | 7 ++ 8 files changed, 169 insertions(+), 1 deletion(-) diff --git a/src/os/bluestore/Allocator.cc b/src/os/bluestore/Allocator.cc index b6026353165..4c140cefd48 100644 --- a/src/os/bluestore/Allocator.cc +++ b/src/os/bluestore/Allocator.cc @@ -29,3 +29,55 @@ void Allocator::release(const PExtentVector& release_vec) } release(release_set); } + +/** + * Gives fragmentation a numeric value. + * + * Following algorithm applies value to each existing free unallocated block. + * Value of single block is a multiply of size and per-byte-value. + * Per-byte-value is greater for larger blocks. + * Assume block size X has value per-byte p; then block size 2*X will have per-byte value 1.1*p. + * + * This could be expressed in logarithms, but for speed this is interpolated inside ranges. + * [1] [2..3] [4..7] [8..15] ... + * ^ ^ ^ ^ + * 1.1 1.1^2 1.1^3 1.1^4 ... + * + * Final score is obtained by proportion between score that would have been obtained + * in condition of absolute fragmentation and score in no fragmentation at all. + */ +double Allocator::get_fragmentation_score() +{ + // this value represents how much worth is 2X bytes in one chunk then in X + X bytes + static const double double_size_worth = 1.1 ; + std::vector scales{1}; + double score_sum = 0; + size_t sum = 0; + + auto get_score = [&](size_t v) -> double { + size_t sc = sizeof(v) * 8 - clz(v) - 1; //assign to grade depending on log2(len) + while (scales.size() <= sc + 1) { + //unlikely expand scales vector + scales.push_back(scales[scales.size() - 1] * double_size_worth); + } + + size_t sc_shifted = size_t(1) << sc; + double x = double(v - sc_shifted) / sc_shifted; //x is <0,1) in its scale grade + // linear extrapolation in its scale grade + double score = (sc_shifted ) * scales[sc] * (1-x) + + (sc_shifted * 2) * scales[sc+1] * x; + return score; + }; + + auto iterated_allocation = [&](size_t off, size_t len) { + ceph_assert(len > 0); + score_sum += get_score(len); + sum += len; + }; + dump(iterated_allocation); + + + double ideal = get_score(sum); + double terrible = sum * get_score(1); + return (ideal - score_sum) / (ideal - terrible); +} diff --git a/src/os/bluestore/Allocator.h b/src/os/bluestore/Allocator.h index 763e6f25d6e..00fdf0394b9 100644 --- a/src/os/bluestore/Allocator.h +++ b/src/os/bluestore/Allocator.h @@ -15,6 +15,7 @@ #include #include "include/ceph_assert.h" #include "os/bluestore/bluestore_types.h" +#include class Allocator { public: @@ -44,6 +45,7 @@ public: void release(const PExtentVector& release_set); virtual void dump() = 0; + virtual void dump(std::function notify) = 0; virtual void init_add_free(uint64_t offset, uint64_t length) = 0; virtual void init_rm_free(uint64_t offset, uint64_t length) = 0; @@ -53,7 +55,7 @@ public: { return 0.0; } - + virtual double get_fragmentation_score(); virtual void shutdown() = 0; static Allocator *create(CephContext* cct, string type, int64_t size, int64_t block_size); diff --git a/src/os/bluestore/BitmapAllocator.cc b/src/os/bluestore/BitmapAllocator.cc index 7c0d86a86be..367108c5880 100644 --- a/src/os/bluestore/BitmapAllocator.cc +++ b/src/os/bluestore/BitmapAllocator.cc @@ -100,3 +100,13 @@ void BitmapAllocator::dump() ++it; } } + +void BitmapAllocator::dump(std::function notify) +{ + size_t alloc_size = get_min_alloc_size(); + auto multiply_by_alloc_size = [alloc_size, notify](size_t off, size_t len) { + notify(off * alloc_size, len * alloc_size); + }; + std::lock_guard lck(lock); + l1.dump(multiply_by_alloc_size); +} diff --git a/src/os/bluestore/BitmapAllocator.h b/src/os/bluestore/BitmapAllocator.h index 223c21dfbc5..df41c1a958c 100644 --- a/src/os/bluestore/BitmapAllocator.h +++ b/src/os/bluestore/BitmapAllocator.h @@ -36,6 +36,7 @@ public: } void dump() override; + void dump(std::function notify) override; double get_fragmentation(uint64_t) override { return _get_fragmentation(); diff --git a/src/os/bluestore/StupidAllocator.cc b/src/os/bluestore/StupidAllocator.cc index bfc46f459f9..289e1736280 100644 --- a/src/os/bluestore/StupidAllocator.cc +++ b/src/os/bluestore/StupidAllocator.cc @@ -293,6 +293,16 @@ void StupidAllocator::dump() } } +void StupidAllocator::dump(std::function notify) +{ + std::lock_guard l(lock); + for (unsigned bin = 0; bin < free.size(); ++bin) { + for (auto p = free[bin].begin(); p != free[bin].end(); ++p) { + notify(p.get_start(), p.get_len()); + } + } +} + void StupidAllocator::init_add_free(uint64_t offset, uint64_t length) { std::lock_guard l(lock); diff --git a/src/os/bluestore/StupidAllocator.h b/src/os/bluestore/StupidAllocator.h index 6c6e7832f7e..d603b47173e 100644 --- a/src/os/bluestore/StupidAllocator.h +++ b/src/os/bluestore/StupidAllocator.h @@ -53,6 +53,7 @@ public: double get_fragmentation(uint64_t alloc_unit) override; void dump() override; + void dump(std::function notify) override; void init_add_free(uint64_t offset, uint64_t length) override; void init_rm_free(uint64_t offset, uint64_t length) override; diff --git a/src/os/bluestore/fastbmap_allocator_impl.cc b/src/os/bluestore/fastbmap_allocator_impl.cc index 089ff396ad7..0654bb8542a 100644 --- a/src/os/bluestore/fastbmap_allocator_impl.cc +++ b/src/os/bluestore/fastbmap_allocator_impl.cc @@ -544,3 +544,88 @@ void AllocatorLevel01Loose::collect_stats( bins_overall[cbits(free_seq_cnt) - 1]++; } } + +inline ssize_t AllocatorLevel01Loose::count_0s(slot_t slot_val, size_t start_pos) + { + #ifdef __GNUC__ + size_t pos = __builtin_ffsll(slot_val >> start_pos); + if (pos == 0) + return sizeof(slot_t)*8 - start_pos; + return pos - 1; + #else + size_t pos = start_pos; + slot_t mask = slot_t(1) << pos; + while (pos < bits_per_slot && (slot_val & mask) == 0) { + mask <<= 1; + pos++; + } + return pos - start_pos; + #endif + } + + inline ssize_t AllocatorLevel01Loose::count_1s(slot_t slot_val, size_t start_pos) + { + return count_0s(~slot_val, start_pos); + } +void AllocatorLevel01Loose::dump( + std::function notify) +{ + size_t len = 0; + size_t off = 0; + for (size_t i = 0; i < l1.size(); i++) + { + for (size_t j = 0; j < L1_ENTRIES_PER_SLOT * L1_ENTRY_WIDTH; j += L1_ENTRY_WIDTH) + { + size_t w = (l1[i] >> j) & L1_ENTRY_MASK; + switch (w) { + case L1_ENTRY_FULL: + if (len > 0) { + notify(off, len); + len = 0; + } + break; + case L1_ENTRY_FREE: + if (len == 0) + off = ( ( bits_per_slot * i + j ) / L1_ENTRY_WIDTH ) * slots_per_slotset * bits_per_slot; + len += bits_per_slotset; + break; + case L1_ENTRY_PARTIAL: + size_t pos = ( ( bits_per_slot * i + j ) / L1_ENTRY_WIDTH ) * slots_per_slotset; + for (size_t t = 0; t < slots_per_slotset; t++) { + size_t p = 0; + slot_t allocation_pattern = l0[pos + t]; + while (p < bits_per_slot) { + if (len == 0) { + //continue to skip allocated space, meaning bits set to 0 + ssize_t alloc_count = count_0s(allocation_pattern, p); + p += alloc_count; + //now we are switched to expecting free space + if (p < bits_per_slot) { + //now @p are 1s + ssize_t free_count = count_1s(allocation_pattern, p); + assert(free_count > 0); + len = free_count; + off = (pos + t) * bits_per_slot + p; + p += free_count; + } + } else { + //continue free region + ssize_t free_count = count_1s(allocation_pattern, p); + if (free_count == 0) { + notify(off, len); + len = 0; + } else { + p += free_count; + len += free_count; + } + } + } + } + break; + } + } + } + if (len > 0) + notify(off, len); +} + diff --git a/src/os/bluestore/fastbmap_allocator_impl.h b/src/os/bluestore/fastbmap_allocator_impl.h index 4143f3d5d53..cfc44c550de 100644 --- a/src/os/bluestore/fastbmap_allocator_impl.h +++ b/src/os/bluestore/fastbmap_allocator_impl.h @@ -46,6 +46,7 @@ typedef mempool::bluestore_alloc::vector slot_vector_t; // fitting into cache line on x86_64 static const size_t slotset_width = 8; // 8 slots per set +static const size_t slots_per_slotset = 8; static const size_t slotset_bytes = sizeof(slot_t) * slotset_width; static const size_t bits_per_slot = sizeof(slot_t) * 8; static const size_t bits_per_slotset = slotset_bytes * 8; @@ -141,6 +142,7 @@ class AllocatorLevel01Loose : public AllocatorLevel01 L1_ENTRY_NOT_USED = 0x02, L1_ENTRY_FREE = 0x03, CHILD_PER_SLOT = bits_per_slot / L1_ENTRY_WIDTH, // 32 + L1_ENTRIES_PER_SLOT = bits_per_slot / L1_ENTRY_WIDTH, //32 CHILD_PER_SLOT_L0 = bits_per_slot, // 64 }; uint64_t _children_per_slot() const override @@ -469,8 +471,13 @@ public: } void collect_stats( std::map& bins_overall) override; + + static inline ssize_t count_0s(slot_t slot_val, size_t start_pos); + static inline ssize_t count_1s(slot_t slot_val, size_t start_pos); + void dump(std::function notify); }; + class AllocatorLevel01Compact : public AllocatorLevel01 { uint64_t _children_per_slot() const override -- 2.39.5