From: Adam Kupczyk <akupczyk@ibm.com>
Date: Thu, 26 Jan 2023 12:21:28 +0000 (+0000)
Subject: os/bluestore: Improve fragmentation calculation
X-Git-Tag: v19.0.0~1295^2~1
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=1ca6e1f0389f1894bdc91ca3c872b17fe2b3fae1;p=ceph.git

os/bluestore: Improve fragmentation calculation

The fragmentation is calculated by giving value to each free chunk.
Simplified, byte chunk size X*2 is 1.1 more worth then byte in chunk size X.
This logic is not very useful when chunks are really large.
So, for large chunks (>8MB) we change value coefficient to 1.02.

Overall, the change is intended so split into large chunks will only slightly
increase fragmentation, but preserving the sharp increase of fragmentation when
small chunks start dominating.

Partially fixes: https://tracker.ceph.com/issues/58022

Signed-off-by: Adam Kupczyk <akupczyk@ibm.com>
---

diff --git a/src/os/bluestore/Allocator.cc b/src/os/bluestore/Allocator.cc
index 3008f9ce8fd..5725e720d6f 100644
--- a/src/os/bluestore/Allocator.cc
+++ b/src/os/bluestore/Allocator.cc
@@ -190,8 +190,13 @@ void Allocator::release(const PExtentVector& release_vec)
 double Allocator::get_fragmentation_score()
 {
   // this value represents how much worth is 2X bytes in one chunk then in X + X bytes
-  static const double double_size_worth = 1.1 ;
-  std::vector<double> scales{1};
+  static const double double_size_worth_small = 1.2;
+  // chunks larger then 128MB are large enough that should be counted without penalty
+  static const double double_size_worth_huge = 1;
+  static const size_t small_chunk_p2 = 20; // 1MB
+  static const size_t huge_chunk_p2 = 27; // 128MB
+  // for chunks 1MB - 128MB penalty coeffs are linearly weighted 1.2 (at small) ... 1 (at huge)
+  static std::vector<double> scales{1};
   double score_sum = 0;
   size_t sum = 0;
 
@@ -199,9 +204,17 @@ double Allocator::get_fragmentation_score()
     size_t sc = sizeof(v) * 8 - std::countl_zero(v) - 1; //assign to grade depending on log2(len)
     while (scales.size() <= sc + 1) {
       //unlikely expand scales vector
-      scales.push_back(scales[scales.size() - 1] * double_size_worth);
+      auto ss = scales.size();
+      double scale = double_size_worth_small;
+      if (ss >= huge_chunk_p2) {
+	scale = double_size_worth_huge;
+      } else if (ss > small_chunk_p2) {
+	// linear decrease 1.2 ... 1
+	scale = (double_size_worth_huge * (ss - small_chunk_p2) + double_size_worth_small * (huge_chunk_p2 - ss)) /
+	  (huge_chunk_p2 - small_chunk_p2);
+      }
+      scales.push_back(scales[scales.size() - 1] * scale);
     }
-
     size_t sc_shifted = size_t(1) << sc;
     double x = double(v - sc_shifted) / sc_shifted; //x is <0,1) in its scale grade
     // linear extrapolation in its scale grade