From: Pere Diaz Bou Date: Thu, 23 Nov 2023 15:47:57 +0000 (+0100) Subject: os/bluestore: use values instead of Buffer pointers in buffer_map X-Git-Tag: v19.3.0~51^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=4da590009dbb1c153a47e7032f14bd95e53aec7d;p=ceph.git os/bluestore: use values instead of Buffer pointers in buffer_map buffer_map is a map>, therefore, when reading a Buffer we will have 2 cache misses. This commit refactors buffer_map to be a map so that Buffers are contigous in memory to improve data locality and reduce cache misses Signed-off-by: Pere Diaz Bou --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 3f33ff4eab01..32de7981f8b0 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -627,8 +627,8 @@ void _dump_extent_map(CephContext *cct, const BlueStore::ExtentMap &em) std::lock_guard l(e.blob->shared_blob->get_cache()->lock); for (auto& i : e.blob->get_bc().buffer_map) { dout(LogLevelV) << __func__ << " 0x" << std::hex << i.first - << "~" << i.second->length << std::dec - << " " << *i.second << dendl; + << "~" << i.second.length << std::dec + << " " << i.second << dendl; } } } @@ -1714,7 +1714,7 @@ int BlueStore::BufferSpace::_discard(BufferCacheShard* cache, uint32_t offset, u auto i = _data_lower_bound(offset); uint32_t end = offset + length; while (i != buffer_map.end()) { - Buffer *b = i->second.get(); + Buffer *b = &i->second; if (b->offset >= end) { break; } @@ -1729,13 +1729,9 @@ int BlueStore::BufferSpace::_discard(BufferCacheShard* cache, uint32_t offset, u if (b->data.length()) { bufferlist bl; bl.substr_of(b->data, b->length - tail, tail); - Buffer *nb = new Buffer(this, b->state, b->seq, end, bl, b->flags); - nb->maybe_rebuild(); - _add_buffer(cache, nb, 0, b); + _add_buffer(cache, this, Buffer(this, b->state, b->seq, end, bl, b->flags), 0, 0, b); } else { - _add_buffer(cache, new Buffer(this, b->state, b->seq, end, tail, - b->flags), - 0, b); + _add_buffer(cache, this, Buffer(this, b->state, b->seq, end, tail, b->flags), 0, 0, b); } if (!b->is_writing()) { cache->_adjust_size(b, front - (int64_t)b->length); @@ -1765,13 +1761,11 @@ int BlueStore::BufferSpace::_discard(BufferCacheShard* cache, uint32_t offset, u if (b->data.length()) { bufferlist bl; bl.substr_of(b->data, b->length - keep, keep); - Buffer *nb = new Buffer(this, b->state, b->seq, end, bl, b->flags); - nb->maybe_rebuild(); - _add_buffer(cache, nb, 0, b); + _add_buffer(cache, this, + Buffer(this, b->state, b->seq, end, bl, b->flags), 0, 0, b); } else { - _add_buffer(cache, new Buffer(this, b->state, b->seq, end, keep, - b->flags), - 0, b); + _add_buffer(cache, this, + Buffer(this, b->state, b->seq, end, keep, b->flags), 0, 0, b); } _rm_buffer(cache, i); cache->_audit("discard end 2"); @@ -1796,9 +1790,8 @@ void BlueStore::BufferSpace::read( { std::lock_guard l(cache->lock); for (auto i = _data_lower_bound(offset); - i != buffer_map.end() && offset < end && i->first < end; - ++i) { - Buffer *b = i->second.get(); + i != buffer_map.end() && offset < end && i->first < end; ++i) { + Buffer *b = &i->second; ceph_assert(b->end() > offset); bool val = false; @@ -1898,9 +1891,10 @@ bool BlueStore::BufferSpace::_dup_writing(BufferCacheShard* cache, BufferSpace* copied = true; for (auto it = writing.begin(); it != writing.end(); ++it) { Buffer& b = *it; - Buffer* to_b = new Buffer(to, b.state, b.seq, b.offset, b.data, b.flags); - ceph_assert(to_b->is_writing()); - to->_add_buffer(cache, to_b, 0, nullptr); + ceph_assert(b.is_writing()); + to->_add_buffer(cache, to, + Buffer(to, b.state, b.seq, b.offset, b.data, b.flags), 0, + 0, nullptr); } } return copied; @@ -1914,39 +1908,42 @@ void BlueStore::BufferSpace::split(BufferCacheShard* cache, size_t pos, BlueStor auto p = --buffer_map.end(); while (true) { - if (p->second->end() <= pos) - break; + if (p->second.end() <= pos) break; - if (p->second->offset < pos) { - ldout(cache->cct, 30) << __func__ << " cut " << *p->second << dendl; - size_t left = pos - p->second->offset; - size_t right = p->second->length - left; - if (p->second->data.length()) { - bufferlist bl; - bl.substr_of(p->second->data, left, right); - r._add_buffer(cache, new Buffer(&r, p->second->state, p->second->seq, - 0, bl, p->second->flags), - 0, p->second.get()); + if (p->second.offset < pos) { + ldout(cache->cct, 30) << __func__ << " cut " << p->second << dendl; + size_t left = pos - p->second.offset; + size_t right = p->second.length - left; + if (p->second.data.length()) { + bufferlist bl; + bl.substr_of(p->second.data, left, right); + r._add_buffer( + cache, &r, + Buffer(&r, p->second.state, p->second.seq, 0, bl, p->second.flags), + 0, 0, &p->second); } else { - r._add_buffer(cache, new Buffer(&r, p->second->state, p->second->seq, - 0, right, p->second->flags), - 0, p->second.get()); + r._add_buffer(cache, &r, Buffer(&r, p->second.state, p->second.seq, 0, right, + p->second.flags), 0, 0, &p->second); } - cache->_adjust_size(p->second.get(), -right); - p->second->truncate(left); + cache->_adjust_size(&p->second, -right); + p->second.truncate(left); break; } - ceph_assert(p->second->end() > pos); - ldout(cache->cct, 30) << __func__ << " move " << *p->second << dendl; - if (p->second->data.length()) { - r._add_buffer(cache, new Buffer(&r, p->second->state, p->second->seq, - p->second->offset - pos, p->second->data, p->second->flags), - 0, p->second.get()); + ceph_assert(p->second.end() > pos); + ldout(cache->cct, 30) << __func__ << " move " << p->second << dendl; + if (p->second.data.length()) { + r._add_buffer(cache, &r, + Buffer(&r, p->second.state, p->second.seq, + p->second.offset - pos, p->second.data, + p->second.flags), + 0, 0, &p->second); } else { - r._add_buffer(cache, new Buffer(&r, p->second->state, p->second->seq, - p->second->offset - pos, p->second->length, p->second->flags), - 0, p->second.get()); + r._add_buffer(cache, &r, + Buffer(&r, p->second.state, p->second.seq, + p->second.offset - pos, p->second.length, + p->second.flags), + 0, 0, &p->second); } if (p == buffer_map.begin()) { _rm_buffer(cache, p); @@ -1964,7 +1961,7 @@ void BlueStore::BufferSpace::split(BufferCacheShard* cache, size_t pos, BlueStor std::ostream& operator<<(std::ostream& out, const BlueStore::BufferSpace& bc) { for (auto& [i, j] : bc.buffer_map) { - out << " [0x" << std::hex << i << "]=" << *j << std::dec; + out << " [0x" << std::hex << i << "]=" << j << std::dec; } if (!bc.writing.empty()) { out << " writing:"; @@ -2908,9 +2905,9 @@ uint32_t BlueStore::Blob::merge_blob(CephContext* cct, Blob* blob_to_dissolve) // move BufferSpace buffers while(!src->bc.buffer_map.empty()) { auto buf = src->bc.buffer_map.extract(src->bc.buffer_map.cbegin()); - buf.mapped()->space = &dst->bc; + buf.mapped().space = &dst->bc; if (dst->bc.buffer_map.count(buf.key()) == 0) { - dst->bc.buffer_map[buf.key()] = std::move(buf.mapped()); + dst->bc.buffer_map.insert({buf.key(), std::move(buf.mapped())}); } } // move BufferSpace writing @@ -5202,12 +5199,12 @@ void BlueStore::Collection::split_cache( auto rehome_blob = [&](Blob* b) { for (auto& i : b->bc.buffer_map) { - if (!i.second->is_writing()) { - ldout(store->cct, 1) << __func__ << " moving " << *i.second + if (!i.second.is_writing()) { + ldout(store->cct, 1) << __func__ << " moving " << i.second << dendl; - dest->cache->_move(cache, i.second.get()); + dest->cache->_move(cache, &i.second); } else { - ldout(store->cct, 1) << __func__ << " not moving " << *i.second + ldout(store->cct, 1) << __func__ << " not moving " << i.second << dendl; } } @@ -5236,13 +5233,13 @@ void BlueStore::Collection::split_cache( b.second->last_encoded_id = -1; } for (auto& e : o->extent_map.extent_map) { - cache->rm_extent(); - dest->cache->add_extent(); + cache->rm_extent(); + dest->cache->add_extent(); Blob* tb = e.blob.get(); - if (tb->last_encoded_id == -1) { - rehome_blob(tb); - tb->last_encoded_id = 0; - } + if (tb->last_encoded_id == -1) { + rehome_blob(tb); + tb->last_encoded_id = 0; + } } for (auto& b : o->extent_map.spanning_blob_map) { Blob* tb = b.second.get(); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 44fd8b2e809b..4b157f86502c 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -17,6 +17,7 @@ #include "acconfig.h" +#include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include #include "include/cpp-btree/btree_set.h" @@ -366,7 +368,7 @@ public: boost::intrusive::list_member_hook<>, &Buffer::state_item> > state_list_t; - mempool::bluestore_cache_meta::map> + mempool::bluestore_cache_meta::map buffer_map; // we use a bare intrusive list here instead of std::map because @@ -379,56 +381,63 @@ public: ceph_assert(writing.empty()); } - void _add_buffer(BufferCacheShard* cache, Buffer* b, int level, Buffer* near) { + void _add_buffer(BufferCacheShard *cache, BufferSpace *space, Buffer&& buffer, + uint16_t cache_private, int level, Buffer *near) { + auto it = buffer_map.emplace(buffer.offset, std::move(buffer)); + Buffer *cached_buffer = &it.first->second; + cached_buffer->cache_private = cache_private; + _add_buffer(cache, space, cached_buffer, level, near); + } + + void _add_buffer(BufferCacheShard *cache, BufferSpace *space, + Buffer *buffer, int level, Buffer *near) { cache->_audit("_add_buffer start"); - buffer_map[b->offset].reset(b); - if (b->is_writing()) { + if (buffer->is_writing()) { // we might get already cached data for which resetting mempool is inppropriate // hence calling try_assign_to_mempool - b->data.try_assign_to_mempool(mempool::mempool_bluestore_writing); - if (writing.empty() || writing.rbegin()->seq <= b->seq) { - writing.push_back(*b); + buffer->data.try_assign_to_mempool(mempool::mempool_bluestore_writing); + if (writing.empty() || writing.rbegin()->seq <= buffer->seq) { + writing.push_back(*buffer); } else { auto it = writing.begin(); - while (it->seq < b->seq) { + while (it->seq < buffer->seq) { ++it; } - ceph_assert(it->seq >= b->seq); + ceph_assert(it->seq >= buffer->seq); // note that this will insert b before it // hence the order is maintained - writing.insert(it, *b); + writing.insert(it, *buffer); } } else { - b->data.reassign_to_mempool(mempool::mempool_bluestore_cache_data); - cache->_add(b, level, near); + buffer->data.reassign_to_mempool(mempool::mempool_bluestore_cache_data); + cache->_add(buffer, level, near); } cache->_audit("_add_buffer end"); } void _rm_buffer(BufferCacheShard* cache, Buffer *b) { _rm_buffer(cache, buffer_map.find(b->offset)); } - std::map>::iterator + std::map::iterator _rm_buffer(BufferCacheShard* cache, - std::map>::iterator p) { + std::map::iterator p) { ceph_assert(p != buffer_map.end()); cache->_audit("_rm_buffer start"); - if (p->second->is_writing()) { - writing.erase(writing.iterator_to(*p->second)); + if (p->second.is_writing()) { + writing.erase(writing.iterator_to(p->second)); } else { - cache->_rm(p->second.get()); + cache->_rm(&p->second); } p = buffer_map.erase(p); cache->_audit("_rm_buffer end"); return p; } - std::map>::iterator _data_lower_bound( - uint32_t offset) { + std::map::iterator _data_lower_bound(uint32_t offset) { auto i = buffer_map.lower_bound(offset); if (i != buffer_map.begin()) { --i; - if (i->first + i->second->length <= offset) + if (i->first + i->second.length <= offset) ++i; } return i; @@ -449,18 +458,25 @@ public: void write(BufferCacheShard* cache, uint64_t seq, uint32_t offset, ceph::buffer::list& bl, unsigned flags) { std::lock_guard l(cache->lock); - Buffer *b = new Buffer(this, Buffer::STATE_WRITING, seq, offset, bl, + Buffer b(this, Buffer::STATE_WRITING, seq, offset, bl, flags); - b->cache_private = _discard(cache, offset, bl.length()); - _add_buffer(cache, b, (flags & Buffer::FLAG_NOCACHE) ? 0 : 1, nullptr); + uint16_t cache_private = _discard(cache, offset, bl.length()); + _add_buffer(cache, this, + Buffer(this, Buffer::STATE_WRITING, seq, offset, bl, + flags), + cache_private, (flags & Buffer::FLAG_NOCACHE) ? 0 : 1, + nullptr); cache->_trim(); } void _finish_write(BufferCacheShard* cache, uint64_t seq); void did_read(BufferCacheShard* cache, uint32_t offset, ceph::buffer::list& bl) { std::lock_guard l(cache->lock); - Buffer *b = new Buffer(this, Buffer::STATE_CLEAN, 0, offset, bl); - b->cache_private = _discard(cache, offset, bl.length()); - _add_buffer(cache, b, 1, nullptr); + Buffer b(this, Buffer::STATE_CLEAN, 0, offset, bl); + uint16_t cache_private = _discard(cache, offset, bl.length()); + _add_buffer( + cache, this, + Buffer(this, Buffer::STATE_CLEAN, 0, offset, bl, 0), + cache_private, 1, nullptr); cache->_trim(); } @@ -481,8 +497,8 @@ public: f->open_array_section("buffers"); for (auto& i : buffer_map) { f->open_object_section("buffer"); - ceph_assert(i.first == i.second->offset); - i.second->dump(f); + ceph_assert(i.first == i.second.offset); + i.second.dump(f); f->close_section(); } f->close_section();