From: Radosław Zarzyński <rzarzyns@redhat.com>
Date: Tue, 29 Aug 2023 09:53:46 +0000 (+0200)
Subject: mempool: implement sharding for type_t::items
X-Git-Tag: v19.3.0~407^2~3
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=fd05b5a1bcc9fda4b135e43be472a0ab29413f06;p=ceph.git

mempool: implement sharding for type_t::items

It looks this counter was initially thought to be a debug one
but -- because of the `force_register` --  flag it is widely
used now.

Found by Yingxin Cheng <yingxin.cheng@intel.com>

Signed-off-by: Radosław Zarzyński <rzarzyns@redhat.com>
---

diff --git a/src/common/mempool.cc b/src/common/mempool.cc
index a1b83c2e9f86..5aa1bda4e54a 100644
--- a/src/common/mempool.cc
+++ b/src/common/mempool.cc
@@ -127,8 +127,12 @@ void mempool::pool_t::get_stats(
     for (auto &p : type_map) {
       std::string n = ceph_demangle(p.second.type_name);
       stats_t &s = (*by_type)[n];
-      s.bytes = p.second.items * p.second.item_size;
-      s.items = p.second.items;
+      s.bytes = 0;
+      s.items = 0;
+      for (size_t i = 0 ; i < num_shards; ++i) {
+        s.bytes += p.second.shards[i].items * p.second.item_size;
+        s.items += p.second.shards[i].items;
+      }
     }
   }
 }
diff --git a/src/include/mempool.h b/src/include/mempool.h
index 23d7e80dc9eb..6b633c64d6fc 100644
--- a/src/include/mempool.h
+++ b/src/include/mempool.h
@@ -205,6 +205,24 @@ enum {
   num_shards = 1 << num_shard_bits
 };
 
+static size_t pick_a_shard_int() {
+#ifndef _GNU_SOURCE
+  // Dirt cheap, see:
+  //   https://fossies.org/dox/glibc-2.32/pthread__self_8c_source.html
+  size_t me = (size_t)pthread_self();
+  size_t i = (me >> CEPH_PAGE_SHIFT) & ((1 << num_shard_bits) - 1);
+  return i;
+#else
+  // a thread local storage is actually just an approximation;
+  // what we truly want is a _cpu local storage_.
+  //
+  // on the architectures we care about sched_getcpu() is
+  // a syscall-handled-in-userspace (vdso!). it grabs the cpu
+  // id kernel exposes to a task on context switch.
+  return sched_getcpu() & ((1 << num_shard_bits) - 1);
+#endif
+}
+
 //
 // Align shard to a cacheline.
 //
@@ -244,9 +262,16 @@ const char *get_pool_name(pool_index_t ix);
 struct type_t {
   const char *type_name;
   size_t item_size;
-  ceph::atomic<ssize_t> items = {0};  // signed
+  struct type_shard_t {
+    ceph::atomic<ssize_t> items = {0}; // signed
+    char __padding[128 - sizeof(ceph::atomic<ssize_t>)];
+  } __attribute__ ((aligned (128)));
+  type_shard_t shards[num_shards];
 };
 
+static_assert(sizeof(type_t::type_shard_t) == 128,
+	      "type_shard_t should be cacheline-sized");
+
 struct type_info_hash {
   std::size_t operator()(const std::type_info& k) const {
     return k.hash_code();
@@ -259,6 +284,8 @@ class pool_t {
   mutable std::mutex lock;  // only used for types list
   std::unordered_map<const char *, type_t> type_map;
 
+  template<pool_index_t, typename T>
+  friend class pool_allocator;
 public:
   //
   // How much this pool consumes. O(<num_shards>)
@@ -268,29 +295,6 @@ public:
 
   void adjust_count(ssize_t items, ssize_t bytes);
 
-  static size_t pick_a_shard_int() {
-#ifndef _GNU_SOURCE
-    // Dirt cheap, see:
-    //   https://fossies.org/dox/glibc-2.32/pthread__self_8c_source.html
-    size_t me = (size_t)pthread_self();
-    size_t i = (me >> CEPH_PAGE_SHIFT) & ((1 << num_shard_bits) - 1);
-    return i;
-#else
-    // a thread local storage is actually just an approximation;
-    // what we truly want is a _cpu local storage_.
-    //
-    // on the architectures we care about sched_getcpu() is
-    // a syscall-handled-in-userspace (vdso!). it grabs the cpu
-    // id kernel exposes to a task on context switch.
-    return sched_getcpu() & ((1 << num_shard_bits) - 1);
-#endif
-  }
-
-  shard_t* pick_a_shard() {
-    size_t i = pick_a_shard_int();
-    return &shard[i];
-  }
-
   type_t *get_type(const std::type_info& ti, size_t size) {
     std::lock_guard<std::mutex> l(lock);
     auto p = type_map.find(ti.name());
@@ -353,11 +357,12 @@ public:
 
   T* allocate(size_t n, void *p = nullptr) {
     size_t total = sizeof(T) * n;
-    shard_t *shard = pool->pick_a_shard();
-    shard->bytes += total;
-    shard->items += n;
+    const auto shid = pick_a_shard_int();
+    auto& shard = pool->shard[shid];
+    shard.bytes += total;
+    shard.items += n;
     if (type) {
-      type->items += n;
+      type->shards[shid].items += n;
     }
     T* r = reinterpret_cast<T*>(new char[total]);
     return r;
@@ -365,22 +370,24 @@ public:
 
   void deallocate(T* p, size_t n) {
     size_t total = sizeof(T) * n;
-    shard_t *shard = pool->pick_a_shard();
-    shard->bytes -= total;
-    shard->items -= n;
+    const auto shid = pick_a_shard_int();
+    auto& shard = pool->shard[shid];
+    shard.bytes -= total;
+    shard.items -= n;
     if (type) {
-      type->items -= n;
+      type->shards[shid].items -= n;
     }
     delete[] reinterpret_cast<char*>(p);
   }
 
   T* allocate_aligned(size_t n, size_t align, void *p = nullptr) {
     size_t total = sizeof(T) * n;
-    shard_t *shard = pool->pick_a_shard();
-    shard->bytes += total;
-    shard->items += n;
+    const auto shid = pick_a_shard_int();
+    auto& shard = pool->shard[shid];
+    shard.bytes += total;
+    shard.items += n;
     if (type) {
-      type->items += n;
+      type->shards[shid].items += n;
     }
     char *ptr;
     int rc = ::posix_memalign((void**)(void*)&ptr, align, total);
@@ -392,11 +399,12 @@ public:
 
   void deallocate_aligned(T* p, size_t n) {
     size_t total = sizeof(T) * n;
-    shard_t *shard = pool->pick_a_shard();
-    shard->bytes -= total;
-    shard->items -= n;
+    const auto shid = pick_a_shard_int();
+    auto& shard = pool->shard[shid];
+    shard.bytes -= total;
+    shard.items -= n;
     if (type) {
-      type->items -= n;
+      type->shards[shid].items -= n;
     }
     aligned_free(p);
   }
diff --git a/src/test/test_c2c.cc b/src/test/test_c2c.cc
index 5fe1ac2f1fcd..1569be305e58 100644
--- a/src/test/test_c2c.cc
+++ b/src/test/test_c2c.cc
@@ -70,7 +70,7 @@ int main(int argc, const char **argv)
 	  while(1) {
 	    size_t i;
 	    if (sharding) {
-	      i = mempool::pool_t::pick_a_shard_int();
+	      i = mempool::pick_a_shard_int();
 	    } else {
 	      i = 0;
 	    }