]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: use values instead of Buffer pointers in buffer_map
authorPere Diaz Bou <pere-altea@hotmail.com>
Thu, 23 Nov 2023 15:47:57 +0000 (16:47 +0100)
committerPere Diaz Bou <pere-altea@hotmail.com>
Tue, 30 Jan 2024 15:05:48 +0000 (16:05 +0100)
buffer_map is a map<int, unique_ptr<Buffer>>, therefore, when reading a
Buffer we will have 2 cache misses. This commit refactors buffer_map to
be a map<int, Buffer> so that Buffers are contigous in memory to improve
data locality and reduce cache misses

Signed-off-by: Pere Diaz Bou <pere-altea@hotmail.com>
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h

index 3f33ff4eab01dbf2d2926d3ad14487fdc404349a..32de7981f8b013172ba552b69162fd5e1f77184e 100644 (file)
@@ -627,8 +627,8 @@ void _dump_extent_map(CephContext *cct, const BlueStore::ExtentMap &em)
     std::lock_guard l(e.blob->shared_blob->get_cache()->lock);
     for (auto& i : e.blob->get_bc().buffer_map) {
       dout(LogLevelV) << __func__ << "       0x" << std::hex << i.first
-                     << "~" << i.second->length << std::dec
-                     << " " << *i.second << dendl;
+                     << "~" << i.second.length << std::dec
+                     << " " << i.second << dendl;
     }
   }
 }
@@ -1714,7 +1714,7 @@ int BlueStore::BufferSpace::_discard(BufferCacheShard* cache, uint32_t offset, u
   auto i = _data_lower_bound(offset);
   uint32_t end = offset + length;
   while (i != buffer_map.end()) {
-    Buffer *b = i->second.get();
+    Buffer *b = &i->second;
     if (b->offset >= end) {
       break;
     }
@@ -1729,13 +1729,9 @@ int BlueStore::BufferSpace::_discard(BufferCacheShard* cache, uint32_t offset, u
        if (b->data.length()) {
          bufferlist bl;
          bl.substr_of(b->data, b->length - tail, tail);
-         Buffer *nb = new Buffer(this, b->state, b->seq, end, bl, b->flags);
-         nb->maybe_rebuild();
-         _add_buffer(cache, nb, 0, b);
+         _add_buffer(cache, this, Buffer(this, b->state, b->seq, end, bl, b->flags), 0, 0, b);
        } else {
-         _add_buffer(cache, new Buffer(this, b->state, b->seq, end, tail,
-                                        b->flags),
-                     0, b);
+         _add_buffer(cache, this, Buffer(this, b->state, b->seq, end, tail, b->flags), 0, 0, b);
        }
        if (!b->is_writing()) {
          cache->_adjust_size(b, front - (int64_t)b->length);
@@ -1765,13 +1761,11 @@ int BlueStore::BufferSpace::_discard(BufferCacheShard* cache, uint32_t offset, u
     if (b->data.length()) {
       bufferlist bl;
       bl.substr_of(b->data, b->length - keep, keep);
-      Buffer *nb = new Buffer(this, b->state, b->seq, end, bl, b->flags);
-      nb->maybe_rebuild();
-      _add_buffer(cache, nb, 0, b);
+      _add_buffer(cache, this,
+                  Buffer(this, b->state, b->seq, end, bl, b->flags), 0, 0, b);
     } else {
-      _add_buffer(cache, new Buffer(this, b->state, b->seq, end, keep,
-                                    b->flags),
-                  0, b);
+      _add_buffer(cache, this,
+                  Buffer(this, b->state, b->seq, end, keep, b->flags), 0, 0, b);
     }
     _rm_buffer(cache, i);
     cache->_audit("discard end 2");
@@ -1796,9 +1790,8 @@ void BlueStore::BufferSpace::read(
   {
     std::lock_guard l(cache->lock);
     for (auto i = _data_lower_bound(offset);
-         i != buffer_map.end() && offset < end && i->first < end;
-         ++i) {
-      Buffer *b = i->second.get();
+         i != buffer_map.end() && offset < end && i->first < end; ++i) {
+      Buffer *b = &i->second;
       ceph_assert(b->end() > offset);
 
       bool val = false;
@@ -1898,9 +1891,10 @@ bool BlueStore::BufferSpace::_dup_writing(BufferCacheShard* cache, BufferSpace*
     copied = true;
     for (auto it = writing.begin(); it != writing.end(); ++it) {
       Buffer& b = *it;
-      Buffer* to_b = new Buffer(to, b.state, b.seq, b.offset, b.data, b.flags);
-      ceph_assert(to_b->is_writing());
-      to->_add_buffer(cache, to_b, 0, nullptr);
+      ceph_assert(b.is_writing());
+      to->_add_buffer(cache, to,
+                      Buffer(to, b.state, b.seq, b.offset, b.data, b.flags), 0,
+                      0, nullptr);
     }
   }
   return copied;
@@ -1914,39 +1908,42 @@ void BlueStore::BufferSpace::split(BufferCacheShard* cache, size_t pos, BlueStor
 
   auto p = --buffer_map.end();
   while (true) {
-    if (p->second->end() <= pos)
-      break;
+    if (p->second.end() <= pos) break;
 
-    if (p->second->offset < pos) {
-      ldout(cache->cct, 30) << __func__ << " cut " << *p->second << dendl;
-      size_t left = pos - p->second->offset;
-      size_t right = p->second->length - left;
-      if (p->second->data.length()) {
-       bufferlist bl;
-       bl.substr_of(p->second->data, left, right);
-       r._add_buffer(cache, new Buffer(&r, p->second->state, p->second->seq,
-                                        0, bl, p->second->flags),
-                     0, p->second.get());
+    if (p->second.offset < pos) {
+      ldout(cache->cct, 30) << __func__ << " cut " << p->second << dendl;
+      size_t left = pos - p->second.offset;
+      size_t right = p->second.length - left;
+      if (p->second.data.length()) {
+        bufferlist bl;
+        bl.substr_of(p->second.data, left, right);
+        r._add_buffer(
+            cache, &r,
+            Buffer(&r, p->second.state, p->second.seq, 0, bl, p->second.flags),
+            0, 0, &p->second);
       } else {
-       r._add_buffer(cache, new Buffer(&r, p->second->state, p->second->seq,
-                                        0, right, p->second->flags),
-                     0, p->second.get());
+        r._add_buffer(cache, &r, Buffer(&r, p->second.state, p->second.seq, 0, right,
+                      p->second.flags), 0, 0, &p->second);
       }
-      cache->_adjust_size(p->second.get(), -right);
-      p->second->truncate(left);
+      cache->_adjust_size(&p->second, -right);
+      p->second.truncate(left);
       break;
     }
 
-    ceph_assert(p->second->end() > pos);
-    ldout(cache->cct, 30) << __func__ << " move " << *p->second << dendl;
-    if (p->second->data.length()) {
-      r._add_buffer(cache, new Buffer(&r, p->second->state, p->second->seq,
-                               p->second->offset - pos, p->second->data, p->second->flags),
-                    0, p->second.get());
+    ceph_assert(p->second.end() > pos);
+    ldout(cache->cct, 30) << __func__ << " move " << p->second << dendl;
+    if (p->second.data.length()) {
+      r._add_buffer(cache, &r,
+                    Buffer(&r, p->second.state, p->second.seq,
+                                     p->second.offset - pos, p->second.data,
+                                     p->second.flags),
+                    0, 0, &p->second);
     } else {
-      r._add_buffer(cache, new Buffer(&r, p->second->state, p->second->seq,
-                               p->second->offset - pos, p->second->length, p->second->flags),
-                    0, p->second.get());
+      r._add_buffer(cache, &r,
+                    Buffer(&r, p->second.state, p->second.seq,
+                                     p->second.offset - pos, p->second.length,
+                                     p->second.flags),
+                    0, 0, &p->second);
     }
     if (p == buffer_map.begin()) {
       _rm_buffer(cache, p);
@@ -1964,7 +1961,7 @@ void BlueStore::BufferSpace::split(BufferCacheShard* cache, size_t pos, BlueStor
 std::ostream& operator<<(std::ostream& out, const BlueStore::BufferSpace& bc)
 {
   for (auto& [i, j] : bc.buffer_map) {
-    out << " [0x" << std::hex << i << "]=" << *j << std::dec;
+    out << " [0x" << std::hex << i << "]=" << j << std::dec;
   }
   if (!bc.writing.empty()) {
     out << " writing:";
@@ -2908,9 +2905,9 @@ uint32_t BlueStore::Blob::merge_blob(CephContext* cct, Blob* blob_to_dissolve)
   // move BufferSpace buffers
   while(!src->bc.buffer_map.empty()) {
     auto buf = src->bc.buffer_map.extract(src->bc.buffer_map.cbegin());
-    buf.mapped()->space = &dst->bc;
+    buf.mapped().space = &dst->bc;
     if (dst->bc.buffer_map.count(buf.key()) == 0) {
-      dst->bc.buffer_map[buf.key()] = std::move(buf.mapped());
+      dst->bc.buffer_map.insert({buf.key(), std::move(buf.mapped())});
     }
   }
   // move BufferSpace writing
@@ -5202,12 +5199,12 @@ void BlueStore::Collection::split_cache(
 
       auto rehome_blob = [&](Blob* b) {
        for (auto& i : b->bc.buffer_map) {
-         if (!i.second->is_writing()) {
-           ldout(store->cct, 1) << __func__ << "   moving " << *i.second
+         if (!i.second.is_writing()) {
+           ldout(store->cct, 1) << __func__ << "   moving " << i.second
                                 << dendl;
-           dest->cache->_move(cache, i.second.get());
+           dest->cache->_move(cache, &i.second);
          } else {
-           ldout(store->cct, 1) << __func__ << "   not moving " << *i.second
+           ldout(store->cct, 1) << __func__ << "   not moving " << i.second
                                 << dendl;
          }
        }
@@ -5236,13 +5233,13 @@ void BlueStore::Collection::split_cache(
        b.second->last_encoded_id = -1;
       }
       for (auto& e : o->extent_map.extent_map) {
-       cache->rm_extent();
-       dest->cache->add_extent();
+        cache->rm_extent();
+        dest->cache->add_extent();
        Blob* tb = e.blob.get();
-       if (tb->last_encoded_id == -1) {
-         rehome_blob(tb);
-         tb->last_encoded_id = 0;
-       }
+        if (tb->last_encoded_id == -1) {
+          rehome_blob(tb);
+          tb->last_encoded_id = 0;
+        }
       }
       for (auto& b : o->extent_map.spanning_blob_map) {
        Blob* tb = b.second.get();
index 44fd8b2e809bae13ed35d8504cbe3ff65bf05229..4b157f86502c4da28732f2e4c6133ab9b4f88819 100644 (file)
@@ -17,6 +17,7 @@
 
 #include "acconfig.h"
 
+#include <tuple>
 #include <unistd.h>
 
 #include <atomic>
@@ -32,6 +33,7 @@
 #include <boost/functional/hash.hpp>
 #include <boost/dynamic_bitset.hpp>
 #include <boost/circular_buffer.hpp>
+#include <utility>
 
 #include "include/cpp-btree/btree_set.h"
 
@@ -366,7 +368,7 @@ public:
        boost::intrusive::list_member_hook<>,
        &Buffer::state_item> > state_list_t;
 
-    mempool::bluestore_cache_meta::map<uint32_t, std::unique_ptr<Buffer>>
+    mempool::bluestore_cache_meta::map<uint32_t, Buffer>
       buffer_map;
 
     // we use a bare intrusive list here instead of std::map because
@@ -379,56 +381,63 @@ public:
       ceph_assert(writing.empty());
     }
 
-    void _add_buffer(BufferCacheShard* cache, Buffer* b, int level, Buffer* near) {
+    void _add_buffer(BufferCacheShard *cache, BufferSpace *space, Buffer&& buffer,
+                     uint16_t cache_private, int level, Buffer *near) {
+      auto it = buffer_map.emplace(buffer.offset, std::move(buffer));
+      Buffer *cached_buffer = &it.first->second;
+      cached_buffer->cache_private = cache_private;
+      _add_buffer(cache, space, cached_buffer, level, near);
+    }
+
+    void _add_buffer(BufferCacheShard *cache, BufferSpace *space,
+                     Buffer *buffer, int level, Buffer *near) {
       cache->_audit("_add_buffer start");
-      buffer_map[b->offset].reset(b);
-      if (b->is_writing()) {
+      if (buffer->is_writing()) {
         // we might get already cached data for which resetting mempool is inppropriate
         // hence calling try_assign_to_mempool
-        b->data.try_assign_to_mempool(mempool::mempool_bluestore_writing);
-        if (writing.empty() || writing.rbegin()->seq <= b->seq) {
-          writing.push_back(*b);
+        buffer->data.try_assign_to_mempool(mempool::mempool_bluestore_writing);
+        if (writing.empty() || writing.rbegin()->seq <= buffer->seq) {
+          writing.push_back(*buffer);
         } else {
           auto it = writing.begin();
-          while (it->seq < b->seq) {
+          while (it->seq < buffer->seq) {
             ++it;
           }
 
-          ceph_assert(it->seq >= b->seq);
+          ceph_assert(it->seq >= buffer->seq);
           // note that this will insert b before it
           // hence the order is maintained
-          writing.insert(it, *b);
+          writing.insert(it, *buffer);
         }
       } else {
-        b->data.reassign_to_mempool(mempool::mempool_bluestore_cache_data);
-        cache->_add(b, level, near);
+        buffer->data.reassign_to_mempool(mempool::mempool_bluestore_cache_data);
+        cache->_add(buffer, level, near);
       }
       cache->_audit("_add_buffer end");
     }
     void _rm_buffer(BufferCacheShard* cache, Buffer *b) {
       _rm_buffer(cache, buffer_map.find(b->offset));
     }
-    std::map<uint32_t, std::unique_ptr<Buffer>>::iterator
+    std::map<uint32_t, Buffer>::iterator
     _rm_buffer(BufferCacheShard* cache,
-                   std::map<uint32_t, std::unique_ptr<Buffer>>::iterator p) {
+                   std::map<uint32_t, Buffer>::iterator p) {
       ceph_assert(p != buffer_map.end());
       cache->_audit("_rm_buffer start");
-      if (p->second->is_writing()) {
-        writing.erase(writing.iterator_to(*p->second));
+      if (p->second.is_writing()) {
+        writing.erase(writing.iterator_to(p->second));
       } else {
-       cache->_rm(p->second.get());
+       cache->_rm(&p->second);
       }
       p = buffer_map.erase(p);
       cache->_audit("_rm_buffer end");
       return p;
     }
 
-    std::map<uint32_t,std::unique_ptr<Buffer>>::iterator _data_lower_bound(
-      uint32_t offset) {
+    std::map<uint32_t, Buffer>::iterator _data_lower_bound(uint32_t offset) {
       auto i = buffer_map.lower_bound(offset);
       if (i != buffer_map.begin()) {
        --i;
-       if (i->first + i->second->length <= offset)
+       if (i->first + i->second.length <= offset)
          ++i;
       }
       return i;
@@ -449,18 +458,25 @@ public:
     void write(BufferCacheShard* cache, uint64_t seq, uint32_t offset, ceph::buffer::list& bl,
               unsigned flags) {
       std::lock_guard l(cache->lock);
-      Buffer *b = new Buffer(this, Buffer::STATE_WRITING, seq, offset, bl,
+      Buffer b(this, Buffer::STATE_WRITING, seq, offset, bl,
                             flags);
-      b->cache_private = _discard(cache, offset, bl.length());
-      _add_buffer(cache, b, (flags & Buffer::FLAG_NOCACHE) ? 0 : 1, nullptr);
+      uint16_t cache_private = _discard(cache, offset, bl.length());
+      _add_buffer(cache, this,
+                  Buffer(this, Buffer::STATE_WRITING, seq, offset, bl,
+                                   flags),
+                  cache_private, (flags & Buffer::FLAG_NOCACHE) ? 0 : 1,
+                  nullptr);
       cache->_trim();
     }
     void _finish_write(BufferCacheShard* cache, uint64_t seq);
     void did_read(BufferCacheShard* cache, uint32_t offset, ceph::buffer::list& bl) {
       std::lock_guard l(cache->lock);
-      Buffer *b = new Buffer(this, Buffer::STATE_CLEAN, 0, offset, bl);
-      b->cache_private = _discard(cache, offset, bl.length());
-      _add_buffer(cache, b, 1, nullptr);
+      Buffer b(this, Buffer::STATE_CLEAN, 0, offset, bl);
+      uint16_t cache_private = _discard(cache, offset, bl.length());
+      _add_buffer(
+          cache, this,
+          Buffer(this, Buffer::STATE_CLEAN, 0, offset, bl, 0),
+          cache_private, 1, nullptr);
       cache->_trim();
     }
 
@@ -481,8 +497,8 @@ public:
       f->open_array_section("buffers");
       for (auto& i : buffer_map) {
        f->open_object_section("buffer");
-       ceph_assert(i.first == i.second->offset);
-       i.second->dump(f);
+       ceph_assert(i.first == i.second.offset);
+       i.second.dump(f);
        f->close_section();
       }
       f->close_section();