]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore : Refactoring Allocator Class 61502/head
authorJaya Prakash <jayaprakash@ibm.com>
Wed, 5 Feb 2025 14:01:47 +0000 (19:31 +0530)
committerJaya Prakash <jayaprakash@ibm.com>
Tue, 25 Feb 2025 13:03:35 +0000 (18:33 +0530)
Fixes : https://tracker.ceph.com/issues/69314

Signed-off-by: Jaya Prakash <jayaprakash@ibm.com>
src/crimson/os/alienstore/CMakeLists.txt
src/os/CMakeLists.txt
src/os/bluestore/Allocator.cc
src/os/bluestore/Allocator.h
src/os/bluestore/AllocatorBase.cc
src/test/objectstore/Allocator_bench.cc
src/test/objectstore/allocator_replay_test.cc

index 389e2ec0f22901d6405f6421a0b1bb456825cf26..efd70b1c76ecda6a871baeb62d794aee85cc2725 100644 (file)
@@ -48,6 +48,7 @@ set(alien_store_srcs
   alien_log.cc
   ${PROJECT_SOURCE_DIR}/src/os/ObjectStore.cc
   ${PROJECT_SOURCE_DIR}/src/os/bluestore/Allocator.cc
+  ${PROJECT_SOURCE_DIR}/src/os/bluestore/AllocatorBase.cc
   ${PROJECT_SOURCE_DIR}/src/os/bluestore/AvlAllocator.cc
   ${PROJECT_SOURCE_DIR}/src/os/bluestore/BtreeAllocator.cc
   ${PROJECT_SOURCE_DIR}/src/os/bluestore/Btree2Allocator.cc
index a27656697df1a4844cb492ce1cf46362714b4918..71452e542bbcdd52aa6a54a531ad95b308e3d62c 100644 (file)
@@ -10,6 +10,7 @@ set(libos_srcs
 if(WITH_BLUESTORE)
   list(APPEND libos_srcs
     bluestore/Allocator.cc
+    bluestore/AllocatorBase.cc
     bluestore/BitmapFreelistManager.cc
     bluestore/BlueFS.cc
     bluestore/bluefs_types.cc
index 603f698e382aac1b32e7a7f144f51e5c19fd8742..67cd72e8369cbd170110a0208eff203b274f55a3 100644 (file)
@@ -2,6 +2,7 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "Allocator.h"
+#include "AllocatorBase.h"
 #include <bit>
 #include "StupidAllocator.h"
 #include "BitmapAllocator.h"
@@ -21,153 +22,18 @@ using std::to_string;
 using ceph::bufferlist;
 using ceph::Formatter;
 
-class Allocator::SocketHook : public AdminSocketHook {
-  Allocator *alloc;
 
-  friend class Allocator;
-  std::string name;
-public:
-  SocketHook(Allocator *alloc, std::string_view _name) :
-    alloc(alloc), name(_name)
-  {
-    AdminSocket *admin_socket = g_ceph_context->get_admin_socket();
-    if (name.empty()) {
-      name = to_string((uintptr_t)this);
-    }
-    if (admin_socket) {
-      int r = admin_socket->register_command(
-       ("bluestore allocator dump " + name).c_str(),
-       this,
-       "dump allocator free regions");
-      if (r != 0)
-        alloc = nullptr; //some collision, disable
-      if (alloc) {
-        r = admin_socket->register_command(
-         ("bluestore allocator score " + name).c_str(),
-         this,
-         "give score on allocator fragmentation (0-no fragmentation, 1-absolute fragmentation)");
-        ceph_assert(r == 0);
-        r = admin_socket->register_command(
-          ("bluestore allocator fragmentation " + name).c_str(),
-          this,
-          "give allocator fragmentation (0-no fragmentation, 1-absolute fragmentation)");
-        ceph_assert(r == 0);
-        r = admin_socket->register_command(
-         ("bluestore allocator fragmentation histogram " + name +
-           " name=alloc_unit,type=CephInt,req=false" +
-           " name=num_buckets,type=CephInt,req=false").c_str(),
-         this,
-         "build allocator free regions state histogram");
-        ceph_assert(r == 0);
-      }
-    }
-  }
-  ~SocketHook()
-  {
-    AdminSocket *admin_socket = g_ceph_context->get_admin_socket();
-    if (admin_socket && alloc) {
-      admin_socket->unregister_commands(this);
-    }
-  }
-
-  int call(std::string_view command,
-          const cmdmap_t& cmdmap,
-          const bufferlist&,
-          Formatter *f,
-          std::ostream& ss,
-          bufferlist& out) override {
-    int r = 0;
-    if (command == "bluestore allocator dump " + name) {
-      f->open_object_section("allocator_dump");
-      f->dump_unsigned("capacity", alloc->get_capacity());
-      f->dump_unsigned("alloc_unit", alloc->get_block_size());
-      f->dump_string("alloc_type", alloc->get_type());
-      f->dump_string("alloc_name", name);
-
-      f->open_array_section("extents");
-      auto iterated_allocation = [&](size_t off, size_t len) {
-        ceph_assert(len > 0);
-        f->open_object_section("free");
-        char off_hex[30];
-        char len_hex[30];
-        snprintf(off_hex, sizeof(off_hex) - 1, "0x%zx", off);
-        snprintf(len_hex, sizeof(len_hex) - 1, "0x%zx", len);
-        f->dump_string("offset", off_hex);
-        f->dump_string("length", len_hex);
-        f->close_section();
-      };
-      alloc->foreach(iterated_allocation);
-      f->close_section();
-      f->close_section();
-    } else if (command == "bluestore allocator score " + name) {
-      f->open_object_section("fragmentation_score");
-      f->dump_float("fragmentation_rating", alloc->get_fragmentation_score());
-      f->close_section();
-    } else if (command == "bluestore allocator fragmentation " + name) {
-      f->open_object_section("fragmentation");
-      f->dump_float("fragmentation_rating", alloc->get_fragmentation());
-      f->close_section();
-    } else if (command == "bluestore allocator fragmentation histogram " + name) {
-      int64_t alloc_unit = alloc->get_block_size();
-      cmd_getval(cmdmap, "alloc_unit", alloc_unit);
-      if (alloc_unit <= 0  ||
-          p2align(alloc_unit, alloc->get_block_size()) != alloc_unit) {
-        ss << "Invalid allocation unit: '" << alloc_unit
-           << "', to be aligned with: '" << alloc->get_block_size()
-           << "'" << std::endl;
-        return -EINVAL;
-      }
-      int64_t num_buckets = 8;
-      cmd_getval(cmdmap, "num_buckets", num_buckets);
-      if (num_buckets < 2) {
-        ss << "Invalid amount of buckets (min=2): '" << num_buckets
-           << "'" << std::endl;
-        return -EINVAL;
-      }
-
-      Allocator::FreeStateHistogram hist(num_buckets);
-      alloc->foreach(
-        [&](size_t off, size_t len) {
-          hist.record_extent(uint64_t(alloc_unit), off, len);
-        });
-      f->open_array_section("extent_counts");
-      hist.foreach(
-        [&](uint64_t max_len, uint64_t total, uint64_t aligned, uint64_t units) {
-          f->open_object_section("c");
-          f->dump_unsigned("max_len", max_len);
-          f->dump_unsigned("total", total);
-          f->dump_unsigned("aligned", aligned);
-          f->dump_unsigned("units", units);
-          f->close_section();
-        }
-      );
-      f->close_section();
-    } else {
-      ss << "Invalid command" << std::endl;
-      r = -ENOSYS;
-    }
-    return r;
-  }
-
-};
 Allocator::Allocator(std::string_view name,
                      int64_t _capacity,
                      int64_t _block_size)
  : device_size(_capacity),
    block_size(_block_size)
-{
-  asok_hook = new SocketHook(this, name);
-}
+{}
 
 
 Allocator::~Allocator()
-{
-  delete asok_hook;
-}
+{}
 
-const string& Allocator::get_name() const {
-  return asok_hook->name;
-}
 
 Allocator *Allocator::create(
   CephContext* cct,
@@ -275,40 +141,3 @@ double Allocator::get_fragmentation_score()
   return (ideal - score_sum) / (ideal - terrible);
 }
 
-/*************
-* Allocator::FreeStateHistogram
-*************/
-using std::function;
-
-void Allocator::FreeStateHistogram::record_extent(uint64_t alloc_unit,
-                                                  uint64_t off,
-                                                  uint64_t len)
-{
-  size_t idx = myTraits._get_bucket(len);
-  ceph_assert(idx < buckets.size());
-  ++buckets[idx].total;
-
-  // now calculate the bucket for the chunk after alignment,
-  // resulting chunks shorter than alloc_unit are discarded
-  auto delta = p2roundup(off, alloc_unit) - off;
-  if (len >= delta + alloc_unit) {
-    len -= delta;
-    idx = myTraits._get_bucket(len);
-    ceph_assert(idx < buckets.size());
-    ++buckets[idx].aligned;
-    buckets[idx].alloc_units += len / alloc_unit;
-  }
-}
-void Allocator::FreeStateHistogram::foreach(
-  function<void(uint64_t max_len,
-                uint64_t total,
-                uint64_t aligned,
-                uint64_t unit)> cb)
-{
-  size_t i = 0;
-  for (const auto& b : buckets) {
-    cb(myTraits._get_bucket_max(i),
-      b.total, b.aligned, b.alloc_units);
-    ++i;
-  }
-}
index e27630776506043a19c2e947f2484fce798b439b..9d8c9feef1cda32bd1f10b560d9996e1eee803c1 100644 (file)
@@ -22,233 +22,6 @@ typedef interval_set<uint64_t> release_set_t;
 typedef release_set_t::value_type release_set_entry_t;
 
 class Allocator {
-protected:
-
-  /**
-   * This is a base set of traits for logical placing entries
-   * into limited collection of buckets depending on their sizes.
-   * Descandants should implement get_bucket(len) method to obtain
-   * bucket index using entry length.
-   */
-  struct LenPartitionedSetTraits {
-    size_t num_buckets;
-    size_t base_bits; // bits in min entry size
-    size_t base;      // min entry size
-    size_t factor;    // additional factor to be applied
-                      // to entry size when calculating
-                      // target bucket
-
-
-    LenPartitionedSetTraits(size_t _num_buckets,
-                            size_t _base_bits = 12,  //= 4096 bytes
-                            size_t _factor = 1) :
-      num_buckets(_num_buckets),
-      base_bits(_base_bits),
-      base(1ull << base_bits),
-      factor(_factor)
-    {
-      ceph_assert(factor);
-    }
-  };
-
-  /**
-   * This extends LenPartitionedSetTraits to implement linear bucket indexing:
-   * bucket index to be determined as entry's size divided by (base * factor),
-   * i.e. buckets are:
-   * [0..base)
-   * [base, base+base*factor)
-   * [base+base*factor, base+base*factor*2)
-   * [base+base*factor*2, base+base*factor*3)
-   * ...
-   */
-  struct LenPartitionedSetTraitsLinear : public LenPartitionedSetTraits {
-    using LenPartitionedSetTraits::LenPartitionedSetTraits;
-    /*
-     * Determines bucket index for a given extent's length in a bucket set
-     * with linear (len / base / factor) indexing.
-     * The first bucket is targeted for lengths < base,
-     * the last bucket is used for lengths above the maximum
-     * detemined by bucket count.
-     */
-    inline size_t _get_bucket(uint64_t len) const {
-      size_t idx = (len / factor) >> base_bits;
-      idx = idx < num_buckets ? idx : num_buckets - 1;
-      return idx;
-    }
-    /*
-     * returns upper bound of a specific bucket
-     */
-    inline size_t _get_bucket_max(size_t bucket) const {
-      return
-        bucket < num_buckets - 1 ?
-        base * factor * (1 + bucket) :
-        std::numeric_limits<uint64_t>::max();
-    }
-  };
-
-  /**
-   * This extends LenPartitionedSetTraits to implement exponential bucket indexing:
-   * target bucket bounds are determined as
-   * [0, base]
-   * (base, base*2^factor]
-   * (base*2^factor, base*2^(factor*2)]
-   * (base*2^(factor*2), base*2^(factor*3)]
-   * ...
-   *
-   */
-  struct LenPartitionedSetTraitsPow2 : public LenPartitionedSetTraits {
-    /*
-     * Determines bucket index for a given extent's length in a bucket collection
-     * with log2(len) indexing.
-     * The first bucket is targeted for lengths < base,
-     * The last bucket index is used for lengths above the maximum
-     * detemined by bucket count.
-     */
-    using LenPartitionedSetTraits::LenPartitionedSetTraits;
-    inline size_t _get_bucket(uint64_t len) const {
-      size_t idx;
-      const size_t len_p2_max =
-        base << ((factor * (num_buckets - 2)));
-      if (len <= base) {
-        idx = 0;
-      } else if (len > len_p2_max) {
-        idx = num_buckets - 1;
-      } else {
-        size_t most_bit = cbits(uint64_t(len - 1)) - 1;
-        idx = 1 + ((most_bit - base_bits) / factor);
-      }
-      ceph_assert(idx < num_buckets);
-      return idx;
-    }
-    /*
-     * returns upper bound of the bucket with log2(len) indexing.
-     */
-    inline size_t _get_bucket_max(size_t bucket) const {
-      return
-        bucket < num_buckets - 1 ?
-        base << (factor * bucket) :
-        std::numeric_limits<uint64_t>::max();
-    }
-  };
-
-  /*
-   * Lockless stack implementation
-   * that permits put/get operation exclusively
-   * if no waiting is needed.
-   * Conflicting operations are omitted.
-   */
-  class LocklessOpportunisticStack {
-    std::atomic<size_t> ref = 0;
-    std::atomic<size_t> count = 0;
-    std::vector<uint64_t> data;
-  public:
-    void init(size_t size) {
-      data.resize(size);
-    }
-    bool try_put(uint64_t& v) {
-      bool done = ++ref == 1 && count < data.size();
-      if (done) {
-        data[count++] = v;
-      }
-      --ref;
-      return done;
-    }
-    bool try_get(uint64_t& v) {
-      bool done = ++ref == 1 && count > 0;
-      if (done) {
-        v = data[--count];
-      }
-      --ref;
-      return done;
-    }
-    void foreach(std::function<void(uint64_t)> notify) {
-      for (size_t i = 0; i < count; i++) {
-        notify(data[i]);
-      }
-    }
-  };
-  /*
-   * Concurrently accessed extent (offset,length) cache
-   * which permits put/get operation exclusively if no waiting is needed.
-   * Implemented via a set of independent buckets (aka LocklessOpportunisticStack).
-   * Each bucket keeps extents of specific size only: 4K, 8K, 12K...64K
-   * which allows to avoid individual extent size tracking.
-   * Each bucket permits a single operation at a given time only,
-   * additional operations against the bucket are rejected meaning relevant
-   * extents aren't not cached.
-   */
-  class OpportunisticExtentCache {
-    const LenPartitionedSetTraitsLinear myTraits;
-    enum {
-      BUCKET_COUNT = 16,
-      EXTENTS_PER_BUCKET = 16, // amount of entries per single bucket,
-                               // total amount of entries will be
-                               // BUCKET_COUNT * EXTENTS_PER_BUCKET.
-    };
-
-    std::vector<LocklessOpportunisticStack> buckets;
-    std::atomic<size_t> hits = 0;
-    ceph::shared_mutex lock{
-      ceph::make_shared_mutex(std::string(), false, false, false)
-    };
-  public:
-    OpportunisticExtentCache() :
-      myTraits(BUCKET_COUNT + 1), // 16 regular buckets + 1 "catch-all" pseudo
-                                  // one to be used for out-of-bound checking
-                                  // since _get_*_size_bucket() methods imply
-                                  // the last bucket usage for the entries
-                                  // exceeding the max length.
-      buckets(BUCKET_COUNT)
-    {
-      //buckets.resize(BUCKET_COUNT);
-      for(auto& b : buckets) {
-        b.init(EXTENTS_PER_BUCKET);
-      }
-    }
-    bool try_put(uint64_t offset, uint64_t len) {
-      if (!lock.try_lock_shared()) {
-        return false;
-      }
-      bool ret = false;
-      ceph_assert(p2aligned(offset, myTraits.base));
-      ceph_assert(p2aligned(len, myTraits.base));
-      auto idx = myTraits._get_bucket(len);
-      if (idx < buckets.size())
-        ret = buckets[idx].try_put(offset);
-      lock.unlock_shared();
-      return ret;
-    }
-    bool try_get(uint64_t* offset, uint64_t len) {
-      if (!lock.try_lock_shared()) {
-        return false;
-      }
-      bool ret = false;
-      ceph_assert(offset);
-      ceph_assert(p2aligned(len, myTraits.base));
-      size_t idx = len >> myTraits.base_bits;
-      if (idx < buckets.size()) {
-        ret = buckets[idx].try_get(*offset);
-        if (ret) {
-          ++hits;
-        }
-      }
-      lock.unlock_shared();
-      return ret;
-    }
-    size_t get_hit_count() const {
-      return hits.load();
-    }
-    void foreach(std::function<void(uint64_t offset, uint64_t length)> notify) {
-      std::unique_lock _lock(lock);
-      for (uint64_t i = 0; i < buckets.size(); i++) {
-        auto cb = [&](uint64_t o) {
-          notify(o, i << myTraits.base_bits);
-        };
-        buckets[i].foreach(cb);
-      }
-    }
-  };
-
 public:
   Allocator(std::string_view name,
            int64_t _capacity,
@@ -307,7 +80,7 @@ public:
     );
 
 
-  const std::string& get_name() const;
+  virtual const std::string& get_name() const = 0;
   int64_t get_capacity() const
   {
     return device_size;
@@ -317,46 +90,9 @@ public:
     return block_size;
   }
 
-  // The following class implements Allocator's free extents histogram.
-  // Which is a set of N buckets to track extents layout.
-  // Extent matches a bucket depending on its length using the following
-  // length spans:
-  // [0..4K] (4K..16K] (16K..64K] .. (4M..16M] (16M..]
-  // Each bucket tracks:
-  // - total amount of extents of specific lengths
-  // - amount of extents aligned with allocation boundary
-  // - amount of allocation units in aligned extents
-  //
-  class FreeStateHistogram {
-    const LenPartitionedSetTraitsPow2 myTraits;
-    enum {
-      BASE_BITS = 12, // 4096 bytes
-      FACTOR = 2,
-    };
-    struct free_state_hist_bucket {
-      size_t total = 0;
-      size_t aligned = 0;
-      size_t alloc_units = 0;
-    };
-    std::vector<free_state_hist_bucket> buckets;
-  public:
-
-    FreeStateHistogram(size_t num_buckets)
-      : myTraits(num_buckets, BASE_BITS, FACTOR) {
-      buckets.resize(num_buckets);
-    }
-
-    void record_extent(uint64_t alloc_unit, uint64_t off, uint64_t len);
-    void foreach(
-      std::function<void(uint64_t, uint64_t, uint64_t, uint64_t)> cb);
-  };
-
-private:
-  class SocketHook;
-  SocketHook* asok_hook = nullptr;
 protected:
   const int64_t device_size = 0;
   const int64_t block_size = 0;
 };
 
-#endif
\ No newline at end of file
+#endif
index 99777cdf7ffbc0dde56b8cd0a5a01bf4d0d68c13..9e146f32257552fa401185585bc4ffeebf28c776 100644 (file)
@@ -109,20 +109,20 @@ public:
       f->dump_float("fragmentation_rating", alloc->get_fragmentation());
       f->close_section();
     } else if (command == "bluestore allocator fragmentation histogram " + name) {
-      int64_t alloc_unit = 4096;
+      int64_t alloc_unit = alloc->get_block_size();
       cmd_getval(cmdmap, "alloc_unit", alloc_unit);
       if (alloc_unit <= 0  ||
           p2align(alloc_unit, alloc->get_block_size()) != alloc_unit) {
         ss << "Invalid allocation unit: '" << alloc_unit
-           << ", to be aligned with: '" << alloc->get_block_size()
-           << std::endl;
+           << "', to be aligned with: '" << alloc->get_block_size()
+           << "'" << std::endl;
         return -EINVAL;
       }
       int64_t num_buckets = 8;
       cmd_getval(cmdmap, "num_buckets", num_buckets);
       if (num_buckets < 2) {
         ss << "Invalid amount of buckets (min=2): '" << num_buckets
-           << std::endl;
+           << "'" << std::endl;
         return -EINVAL;
       }
 
index 0c577f4fe1b3fa7fa531fe83e114cde4f41bfb6f..d557f6168ccefbe2e74e9ae8326873147c784ac7 100644 (file)
@@ -13,6 +13,7 @@
 #include "include/stringify.h"
 #include "include/Context.h"
 #include "os/bluestore/Allocator.h"
+#include "os/bluestore/AllocatorBase.h"
 
 #include <boost/random/mersenne_twister.hpp>
 #include <boost/random/uniform_int.hpp>
@@ -294,7 +295,7 @@ struct OverwriteTextContext : public Thread {
 
   void build_histogram() {
     const size_t num_buckets = 8;
-    Allocator::FreeStateHistogram hist(num_buckets);
+    AllocatorBase::FreeStateHistogram hist(num_buckets);
     alloc->foreach(
       [&](size_t off, size_t len) {
        hist.record_extent(uint64_t(alloc_unit), off, len);
index 874a172b50cd1dae13f2622634fa0976d05b6ffc..b76fd7f7514dfd4d671f1ea1875adfc8a497a2af 100644 (file)
@@ -16,6 +16,7 @@
 #include "include/denc.h"
 #include "global/global_init.h"
 #include "os/bluestore/Allocator.h"
+#include "os/bluestore/AllocatorBase.h"
 
 using namespace std;
 
@@ -773,7 +774,7 @@ int main(int argc, char **argv)
         std::cout << "Allocation unit:" << alloc_unit
                   << std::endl;
 
-        Allocator::FreeStateHistogram hist(num_buckets);
+        AllocatorBase::FreeStateHistogram hist(num_buckets);
         a->foreach(
           [&](size_t off, size_t len) {
             hist.record_extent(uint64_t(alloc_unit), off, len);