From 625e3461fbed904e85dfeea7ab3e8f7b74c4f384 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Thu, 22 Jun 2023 16:16:32 +0300 Subject: [PATCH] os/bluestore: implement allocator fragmentation histogram Signed-off-by: Igor Fedotov --- src/os/bluestore/Allocator.cc | 89 +++++++++++++++++++++++++++++++++++ src/os/bluestore/Allocator.h | 31 ++++++++++++ 2 files changed, 120 insertions(+) diff --git a/src/os/bluestore/Allocator.cc b/src/os/bluestore/Allocator.cc index 5725e720d6f56..5c5b8db70eccd 100644 --- a/src/os/bluestore/Allocator.cc +++ b/src/os/bluestore/Allocator.cc @@ -14,6 +14,7 @@ #include "common/debug.h" #include "common/admin_socket.h" #define dout_subsys ceph_subsys_bluestore +using TOPNSPC::common::cmd_getval; using std::string; using std::to_string; @@ -52,6 +53,13 @@ public: this, "give allocator fragmentation (0-no fragmentation, 1-absolute fragmentation)"); ceph_assert(r == 0); + r = admin_socket->register_command( + ("bluestore allocator fragmentation histogram " + name + + " name=alloc_unit,type=CephInt,req=false" + + " name=num_buckets,type=CephInt,req=false").c_str(), + this, + "build allocator free regions state histogram"); + ceph_assert(r == 0); } } } @@ -100,6 +108,39 @@ public: f->open_object_section("fragmentation"); f->dump_float("fragmentation_rating", alloc->get_fragmentation()); f->close_section(); + } else if (command == "bluestore allocator fragmentation histogram " + name) { + int64_t alloc_unit = 4096; + cmd_getval(cmdmap, "alloc_unit", alloc_unit); + if (alloc_unit == 0 || + p2align(alloc_unit, alloc->get_block_size()) != alloc_unit) { + ss << "Invalid allocation unit: '" << alloc_unit + << ", to be aligned with: '" << alloc->get_block_size() + << std::endl; + return -EINVAL; + } + int64_t num_buckets = 8; + cmd_getval(cmdmap, "num_buckets", num_buckets); + if (num_buckets < 2) { + ss << "Invalid amount of buckets (min=2): '" << num_buckets + << std::endl; + return -EINVAL; + } + + Allocator::FreeStateHistogram hist; + hist.resize(num_buckets); + alloc->build_free_state_histogram(alloc_unit, hist); + f->open_array_section("extent_counts"); + for(int i = 0; i < num_buckets; i++) { + f->open_object_section("c"); + f->dump_unsigned("max_len", + hist[i].get_max(i, num_buckets) + ); + f->dump_unsigned("total", hist[i].total); + f->dump_unsigned("aligned", hist[i].aligned); + f->dump_unsigned("units", hist[i].alloc_units); + f->close_section(); + } + f->close_section(); } else { ss << "Invalid command" << std::endl; r = -ENOSYS; @@ -234,3 +275,51 @@ double Allocator::get_fragmentation_score() double terrible = (sum / block_size) * get_score(block_size); return (ideal - score_sum) / (ideal - terrible); } + +void Allocator::build_free_state_histogram( + size_t alloc_unit, Allocator::FreeStateHistogram& hist) +{ + auto num_buckets = hist.size(); + ceph_assert(num_buckets); + + auto base = free_state_hist_bucket::base; + auto base_bits = free_state_hist_bucket::base_bits; + auto mux = free_state_hist_bucket::mux; + // maximum chunk size we track, + // provided by the bucket before the last one + size_t max = + free_state_hist_bucket::get_max(num_buckets - 2, num_buckets); + + auto iterated_allocation = [&](size_t off, size_t len) { + size_t idx; + if (len <= base) { + idx = 0; + } else if (len > max) { + idx = num_buckets - 1; + } else { + size_t most_bit = cbits(uint64_t(len-1)) - 1; + idx = 1 + ((most_bit - base_bits) / mux); + } + ceph_assert(idx < num_buckets); + ++hist[idx].total; + + // now calculate the bucket for the chunk after alignment, + // resulting chunks shorter than alloc_unit are discarded + auto delta = p2roundup(off, alloc_unit) - off; + if (len >= delta + alloc_unit) { + len -= delta; + if (len <= base) { + idx = 0; + } else if (len > max) { + idx = num_buckets - 1; + } else { + size_t most_bit = cbits(uint64_t(len-1)) - 1; + idx = 1 + ((most_bit - base_bits) / mux); + } + ++hist[idx].aligned; + hist[idx].alloc_units += len / alloc_unit; + } + }; + + foreach(iterated_allocation); +} diff --git a/src/os/bluestore/Allocator.h b/src/os/bluestore/Allocator.h index e378007c3c003..f136c98b2926a 100644 --- a/src/os/bluestore/Allocator.h +++ b/src/os/bluestore/Allocator.h @@ -88,6 +88,37 @@ public: return block_size; } + // The following code build Allocator's free extents histogram. + // Which is a set of N buckets to track extents layout. + // Extent matches a bucket depending on its length using the following + // length spans: + // [0..4K] (4K..16K] (16K..64K] .. (4M..16M] (16M..] + // Each bucket tracks: + // - total amount of extents of specific lengths + // - amount of extents aligned with allocation boundary + // - amount of allocation units in aligned extents + // + struct free_state_hist_bucket { + static const size_t base_bits = 12; + static const size_t base = 1ull << base_bits; + static const size_t mux = 2; + + size_t total = 0; + size_t aligned = 0; + size_t alloc_units = 0; + + // returns upper bound of the bucket + static size_t get_max(size_t bucket, size_t num_buckets) { + return + bucket < num_buckets - 1 ? + base << (mux * bucket) : + std::numeric_limits::max(); + }; + }; + + typedef std::vector FreeStateHistogram; + void build_free_state_histogram(size_t alloc_unit, FreeStateHistogram& hist); + private: class SocketHook; SocketHook* asok_hook = nullptr; -- 2.39.5