From feb5b094e8ccebbb7a8a7a48f116be836d80db9e Mon Sep 17 00:00:00 2001 From: Mark Nelson Date: Wed, 16 Oct 2019 10:34:59 -0400 Subject: [PATCH] os/bluestore: Keep separate onode cache pinned list. Signed-off-by: Mark Nelson --- src/os/bluestore/BlueStore.cc | 85 +++++++++++++++++++++++------------ src/os/bluestore/BlueStore.h | 48 +++++++++++++++----- 2 files changed, 95 insertions(+), 38 deletions(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index ee9556a4837..ccbfa25d2b3 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -831,58 +831,81 @@ struct LruOnodeCacheShard : public BlueStore::OnodeCacheShard { BlueStore::Onode, boost::intrusive::list_member_hook<>, &BlueStore::Onode::lru_item> > list_t; + typedef boost::intrusive::list< + BlueStore::Onode, + boost::intrusive::member_hook< + BlueStore::Onode, + boost::intrusive::list_member_hook<>, + &BlueStore::Onode::pin_item> > pin_list_t; + list_t lru; + pin_list_t pin_list; explicit LruOnodeCacheShard(CephContext *cct) : BlueStore::OnodeCacheShard(cct) {} void _add(BlueStore::OnodeRef& o, int level) override { (level > 0) ? lru.push_front(*o) : lru.push_back(*o); + o->s = this; num = lru.size(); } void _rm(BlueStore::OnodeRef& o) override { - lru.erase(lru.iterator_to(*o)); + o->s = nullptr; + if (o->pinned) { + o->pinned = false; + pin_list.erase(pin_list.iterator_to(*o)); + } else { + lru.erase(lru.iterator_to(*o)); + } num = lru.size(); + num_pinned = pin_list.size(); } void _touch(BlueStore::OnodeRef& o) override { + if (o->pinned) { + return; + } lru.erase(lru.iterator_to(*o)); lru.push_front(*o); num = lru.size(); } - void _trim_to(uint64_t max) override + void _pin(BlueStore::Onode& o) override + { + if (o.pinned == true) { + return; + } + lru.erase(lru.iterator_to(o)); + pin_list.push_front(o); + o.pinned = true; + num = lru.size(); + num_pinned = pin_list.size(); + dout(30) << __func__ << " " << o.oid << " pinned" << dendl; + + } + void _unpin(BlueStore::Onode& o) override + { + if (o.pinned == false) { + return; + } + pin_list.erase(pin_list.iterator_to(o)); + lru.push_front(o); + o.pinned = false; + num = lru.size(); + num_pinned = pin_list.size(); + dout(30) << __func__ << " " << o.oid << " unpinned" << dendl; + } + void _trim_to(uint64_t new_size) override { - if (max >= lru.size()) { + if (new_size >= lru.size()) { return; // don't even try } - uint64_t n = lru.size() - max; - + uint64_t n = lru.size() - new_size; auto p = lru.end(); ceph_assert(p != lru.begin()); --p; - int skipped = 0; - int max_skipped = g_conf()->bluestore_cache_trim_max_skip_pinned; while (n > 0) { BlueStore::Onode *o = &*p; - int refs = o->nref.load(); - if (refs > 1) { - dout(20) << __func__ << " " << o->oid << " has " << refs - << " refs, skipping" << dendl; - if (++skipped >= max_skipped) { - dout(20) << __func__ << " maximum skip pinned reached; stopping with " - << n << " left to trim" << dendl; - break; - } - - if (p == lru.begin()) { - break; - } else { - p--; - n--; - continue; - } - } dout(30) << __func__ << " rm " << o->oid << dendl; if (p != lru.begin()) { lru.erase(p--); @@ -890,6 +913,7 @@ struct LruOnodeCacheShard : public BlueStore::OnodeCacheShard { lru.erase(p); ceph_assert(n == 1); } + o->s = nullptr; o->get(); // paranoia o->c->onode_map.remove(o->oid); o->put(); @@ -897,9 +921,10 @@ struct LruOnodeCacheShard : public BlueStore::OnodeCacheShard { } num = lru.size(); } - void add_stats(uint64_t *onodes) override + void add_stats(uint64_t *onodes, uint64_t *pinned_onodes) override { - *onodes += num; + *onodes += num + num_pinned; + *pinned_onodes += num_pinned; } }; @@ -4565,6 +4590,8 @@ void BlueStore::_init_logger() b.add_u64(l_bluestore_onodes, "bluestore_onodes", "Number of onodes in cache"); + b.add_u64(l_bluestore_pinned_onodes, "bluestore_pinned_onodes", + "Number of pinned onodes in cache"); b.add_u64_counter(l_bluestore_onode_hits, "bluestore_onode_hits", "Sum for onode-lookups hit in the cache"); b.add_u64_counter(l_bluestore_onode_misses, "bluestore_onode_misses", @@ -9166,18 +9193,20 @@ void BlueStore::_reap_collections() void BlueStore::_update_cache_logger() { uint64_t num_onodes = 0; + uint64_t num_pinned_onodes = 0; uint64_t num_extents = 0; uint64_t num_blobs = 0; uint64_t num_buffers = 0; uint64_t num_buffer_bytes = 0; for (auto c : onode_cache_shards) { - c->add_stats(&num_onodes); + c->add_stats(&num_onodes, &num_pinned_onodes); } for (auto c : buffer_cache_shards) { c->add_stats(&num_extents, &num_blobs, &num_buffers, &num_buffer_bytes); } logger->set(l_bluestore_onodes, num_onodes); + logger->set(l_bluestore_pinned_onodes, num_pinned_onodes); logger->set(l_bluestore_extents, num_extents); logger->set(l_bluestore_blobs, num_blobs); logger->set(l_bluestore_buffers, num_buffers); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 8976c03b027..61c86eb4e7f 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -99,6 +99,7 @@ enum { l_bluestore_compressed_allocated, l_bluestore_compressed_original, l_bluestore_onodes, + l_bluestore_pinned_onodes, l_bluestore_onode_hits, l_bluestore_onode_misses, l_bluestore_onode_shard_hits, @@ -1045,20 +1046,22 @@ public: }; struct OnodeSpace; - + struct OnodeCacheShard; /// an in-memory object struct Onode { MEMPOOL_CLASS_HELPERS(); + // Not persisted and updated on cache insertion/removal + OnodeCacheShard *s; + bool pinned = false; // Only to be used by the onode cache shard std::atomic_int nref; ///< reference count Collection *c; - ghobject_t oid; /// key under PREFIX_OBJ where we are stored mempool::bluestore_cache_other::string key; - boost::intrusive::list_member_hook<> lru_item; + boost::intrusive::list_member_hook<> lru_item, pin_item; bluestore_onode_t onode; ///< metadata stored as value in kv store bool exists; ///< true if object logically exists @@ -1075,7 +1078,8 @@ public: Onode(Collection *c, const ghobject_t& o, const mempool::bluestore_cache_other::string& k) - : nref(0), + : s(nullptr), + nref(0), c(c), oid(o), key(k), @@ -1084,7 +1088,8 @@ public: } Onode(Collection* c, const ghobject_t& o, const string& k) - : nref(0), + : s(nullptr), + nref(0), c(c), oid(o), key(k), @@ -1093,7 +1098,8 @@ public: } Onode(Collection* c, const ghobject_t& o, const char* k) - : nref(0), + : s(nullptr), + nref(0), c(c), oid(o), key(k), @@ -1111,11 +1117,18 @@ public: void flush(); void get() { - ++nref; + if (++nref == 2 && s != nullptr) { + s->pin(*this); + } } void put() { - if (--nref == 0) + int n = --nref; + if (n == 1 && s != nullptr) { + s->unpin(*this); + } + if (n == 0) { delete this; + } } const string& get_omap_prefix(); @@ -1150,7 +1163,7 @@ public: return num; } - virtual void _trim_to(uint64_t max) = 0; + virtual void _trim_to(uint64_t new_size) = 0; void _trim() { if (cct->_conf->objectstore_blackhole) { // do not trim if we are throwing away IOs a layer down @@ -1158,6 +1171,7 @@ public: } _trim_to(max); } + void trim() { std::lock_guard l(lock); _trim(); @@ -1178,6 +1192,8 @@ public: /// A Generic onode Cache Shard struct OnodeCacheShard : public CacheShard { + std::atomic num_pinned = {0}; + std::array, 64> dumped_onodes; public: OnodeCacheShard(CephContext* cct) : CacheShard(cct) {} @@ -1186,8 +1202,20 @@ public: virtual void _add(OnodeRef& o, int level) = 0; virtual void _rm(OnodeRef& o) = 0; virtual void _touch(OnodeRef& o) = 0; - virtual void add_stats(uint64_t *onodes) = 0; + virtual void _pin(Onode& o) = 0; + virtual void _unpin(Onode& o) = 0; + + void pin(Onode& o) { + std::lock_guard l(lock); + _pin(o); + } + + void unpin(Onode& o) { + std::lock_guard l(lock); + _unpin(o); + } + virtual void add_stats(uint64_t *onodes, uint64_t *pinned_onodes) = 0; bool empty() { return _get_num() == 0; } -- 2.39.5