From: Mark Nelson Date: Tue, 15 Jan 2019 21:50:36 +0000 (-0600) Subject: os/bluestore: Trim cache on add rather than in loop. X-Git-Tag: v15.1.0~2073^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=200bc7a37306d80d42f9c7decff6beb7dc7f5f34;p=ceph.git os/bluestore: Trim cache on add rather than in loop. Signed-off-by: Mark Nelson --- diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 5db054135e16..651390b158f9 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -898,16 +898,23 @@ BlueStore::Cache *BlueStore::Cache::create(CephContext* cct, string type, return c; } -void BlueStore::Cache::trim(uint64_t onode_max, uint64_t buffer_max) +void BlueStore::Cache::trim_onodes() { std::lock_guard l(lock); - _trim(onode_max, buffer_max); + _trim_onodes(); } -void BlueStore::Cache::trim_all() +void BlueStore::Cache::trim_buffers() { std::lock_guard l(lock); - _trim(0, 0); + _trim_buffers(); +} + +void BlueStore::Cache::flush() +{ + std::lock_guard l(lock); + _trim_buffers_to(0); + _trim_onodes_to(0); } // LRUCache @@ -921,33 +928,11 @@ void BlueStore::LRUCache::_touch_onode(OnodeRef& o) onode_lru.push_front(*o); } -void BlueStore::LRUCache::_trim(uint64_t onode_max, uint64_t buffer_max) -{ - dout(20) << __func__ << " onodes " << onode_lru.size() << " / " << onode_max - << " buffers " << buffer_size << " / " << buffer_max - << dendl; - - _audit("trim start"); - - // buffers - while (buffer_size > buffer_max) { - auto i = buffer_lru.rbegin(); - if (i == buffer_lru.rend()) { - // stop if buffer_lru is now empty - break; - } - - Buffer *b = &*i; - ceph_assert(b->is_clean()); - dout(20) << __func__ << " rm " << *b << dendl; - b->space->_rm_buffer(this, b); - } - - // onodes - if (onode_max >= onode_lru.size()) { +void BlueStore::LRUCache::_trim_onodes_to(uint64_t max) { + if (max >= onode_lru.size()) { return; // don't even try } - uint64_t num = onode_lru.size() - onode_max; + uint64_t num = onode_lru.size() - max; auto p = onode_lru.end(); ceph_assert(p != onode_lru.begin()); @@ -959,7 +944,7 @@ void BlueStore::LRUCache::_trim(uint64_t onode_max, uint64_t buffer_max) int refs = o->nref.load(); if (refs > 1) { dout(20) << __func__ << " " << o->oid << " has " << refs - << " refs, skipping" << dendl; + << " refs, skipping" << dendl; if (++skipped >= max_skipped) { dout(20) << __func__ << " maximum skip pinned reached; stopping with " << num << " left to trim" << dendl; @@ -988,6 +973,21 @@ void BlueStore::LRUCache::_trim(uint64_t onode_max, uint64_t buffer_max) } } +void BlueStore::LRUCache::_trim_buffers_to(uint64_t max) { + while (buffer_size > max) { + auto i = buffer_lru.rbegin(); + if (i == buffer_lru.rend()) { + // stop if buffer_lru is now empty + break; + } + + Buffer *b = &*i; + ceph_assert(b->is_clean()); + dout(20) << __func__ << " rm " << *b << dendl; + b->space->_rm_buffer(this, b); + } +} + #ifdef DEBUG_CACHE void BlueStore::LRUCache::_audit(const char *when) { @@ -1139,18 +1139,56 @@ void BlueStore::TwoQCache::_adjust_buffer_size(Buffer *b, int64_t delta) } } -void BlueStore::TwoQCache::_trim(uint64_t onode_max, uint64_t buffer_max) -{ - dout(20) << __func__ << " onodes " << onode_lru.size() << " / " << onode_max - << " buffers " << buffer_bytes << " / " << buffer_max - << dendl; +void BlueStore::TwoQCache::_trim_onodes_to(uint64_t max) { + if (max >= onode_lru.size()) { + return; // don't even try + } + uint64_t num = onode_lru.size() - max; - _audit("trim start"); + auto p = onode_lru.end(); + ceph_assert(p != onode_lru.begin()); + --p; + int skipped = 0; + int max_skipped = g_conf()->bluestore_cache_trim_max_skip_pinned; + while (num > 0) { + Onode *o = &*p; + dout(20) << __func__ << " considering " << o << dendl; + int refs = o->nref.load(); + if (refs > 1) { + dout(20) << __func__ << " " << o->oid << " has " << refs + << " refs; skipping" << dendl; + if (++skipped >= max_skipped) { + dout(20) << __func__ << " maximum skip pinned reached; stopping with " + << num << " left to trim" << dendl; + break; + } + + if (p == onode_lru.begin()) { + break; + } else { + p--; + num--; + continue; + } + } + dout(30) << __func__ << " " << o->oid << " num=" << num <<" lru size="<get(); // paranoia + o->c->onode_map.remove(o->oid); + o->put(); + --num; + } +} - // buffers - if (buffer_bytes > buffer_max) { - uint64_t kin = buffer_max * cct->_conf->bluestore_2q_cache_kin_ratio; - uint64_t khot = buffer_max - kin; +void BlueStore::TwoQCache::_trim_buffers_to(uint64_t max) { + if (buffer_bytes > max) { + uint64_t kin = max * cct->_conf->bluestore_2q_cache_kin_ratio; + uint64_t khot = max - kin; // pre-calculate kout based on average buffer size too, // which is typical(the warm_in and hot lists may change later) @@ -1159,7 +1197,7 @@ void BlueStore::TwoQCache::_trim(uint64_t onode_max, uint64_t buffer_max) if (buffer_num) { uint64_t buffer_avg_size = buffer_bytes / buffer_num; ceph_assert(buffer_avg_size); - uint64_t calculated_buffer_num = buffer_max / buffer_avg_size; + uint64_t calculated_buffer_num = max / buffer_avg_size; kout = calculated_buffer_num * cct->_conf->bluestore_2q_cache_kout_ratio; } @@ -1239,51 +1277,6 @@ void BlueStore::TwoQCache::_trim(uint64_t onode_max, uint64_t buffer_max) b->space->_rm_buffer(this, b); } } - - // onodes - if (onode_max >= onode_lru.size()) { - return; // don't even try - } - uint64_t num = onode_lru.size() - onode_max; - - auto p = onode_lru.end(); - ceph_assert(p != onode_lru.begin()); - --p; - int skipped = 0; - int max_skipped = g_conf()->bluestore_cache_trim_max_skip_pinned; - while (num > 0) { - Onode *o = &*p; - dout(20) << __func__ << " considering " << o << dendl; - int refs = o->nref.load(); - if (refs > 1) { - dout(20) << __func__ << " " << o->oid << " has " << refs - << " refs; skipping" << dendl; - if (++skipped >= max_skipped) { - dout(20) << __func__ << " maximum skip pinned reached; stopping with " - << num << " left to trim" << dendl; - break; - } - - if (p == onode_lru.begin()) { - break; - } else { - p--; - num--; - continue; - } - } - dout(30) << __func__ << " " << o->oid << " num=" << num <<" lru size="<get(); // paranoia - o->c->onode_map.remove(o->oid); - o->put(); - --num; - } } #ifdef DEBUG_CACHE @@ -1413,6 +1406,7 @@ int BlueStore::BufferSpace::_discard(Cache* cache, uint32_t offset, uint32_t len cache->_audit("discard end 2"); break; } + cache->_trim_buffers(); return cache_private; } @@ -1517,7 +1511,7 @@ void BlueStore::BufferSpace::_finish_write(Cache* cache, uint64_t seq) ldout(cache->cct, 20) << __func__ << " added " << *b << dendl; } } - + cache->_trim_buffers(); cache->_audit("finish_write end"); } @@ -1569,6 +1563,7 @@ void BlueStore::BufferSpace::split(Cache* cache, size_t pos, BlueStore::BufferSp } } ceph_assert(writing.empty()); + cache->_trim_buffers(); } // OnodeSpace @@ -1589,6 +1584,7 @@ BlueStore::OnodeRef BlueStore::OnodeSpace::add(const ghobject_t& oid, OnodeRef o ldout(cache->cct, 30) << __func__ << " " << oid << " " << o << dendl; onode_map[oid] = o; cache->_add_onode(o, 1); + cache->_trim_onodes(); return o; } @@ -1663,7 +1659,7 @@ void BlueStore::OnodeSpace::rename( oldo.reset(new Onode(o->c, old_oid, o->key)); po->second = oldo; cache->_add_onode(po->second, 1); - + cache->_trim_onodes(); // add at new position and fix oid, key onode_map.insert(make_pair(new_oid, o)); cache->_touch_onode(o); @@ -3636,6 +3632,7 @@ void BlueStore::Collection::split_cache( } } } + dest->cache->_trim_onodes(); } // ======================================================= @@ -3702,8 +3699,8 @@ void *BlueStore::MempoolThread::entry() next_resize += resize_interval; } - // Now Trim - _trim_shards(interval_stats_trim); + // Now Resize the shards + _resize_shards(interval_stats_trim); interval_stats_trim = false; store->_update_cache_logger(); @@ -3724,7 +3721,7 @@ void BlueStore::MempoolThread::_adjust_cache_settings() data_cache->set_cache_ratio(store->cache_data_ratio); } -void BlueStore::MempoolThread::_trim_shards(bool interval_stats) +void BlueStore::MempoolThread::_resize_shards(bool interval_stats) { auto cct = store->cct; size_t num_shards = store->cache_shards.size(); @@ -3774,7 +3771,8 @@ void BlueStore::MempoolThread::_trim_shards(bool interval_stats) << " max_shard_buffer: " << max_shard_buffer << dendl; for (auto i : store->cache_shards) { - i->trim(max_shard_onodes, max_shard_buffer); + i->set_onode_max(max_shard_onodes); + i->set_buffer_max(max_shard_buffer); } } @@ -13708,7 +13706,7 @@ void BlueStore::_flush_cache() { dout(10) << __func__ << dendl; for (auto i : cache_shards) { - i->trim_all(); + i->flush(); ceph_assert(i->empty()); } for (auto& p : coll_map) { @@ -13734,7 +13732,7 @@ int BlueStore::flush_cache(ostream *os) { dout(10) << __func__ << dendl; for (auto i : cache_shards) { - i->trim_all(); + i->flush(); } return 0; diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 227716dfe3cd..ac788116f553 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -347,6 +347,7 @@ public: flags); b->cache_private = _discard(cache, offset, bl.length()); _add_buffer(cache, b, (flags & Buffer::FLAG_NOCACHE) ? 0 : 1, nullptr); + cache->_trim_buffers(); } void _finish_write(Cache* cache, uint64_t seq); void did_read(Cache* cache, uint32_t offset, bufferlist& bl) { @@ -354,6 +355,7 @@ public: Buffer *b = new Buffer(this, Buffer::STATE_CLEAN, 0, offset, bl); b->cache_private = _discard(cache, offset, bl.length()); _add_buffer(cache, b, 1, nullptr); + cache->_trim_buffers(); } void read(Cache* cache, uint32_t offset, uint32_t length, @@ -1096,6 +1098,8 @@ public: std::atomic num_extents = {0}; std::atomic num_blobs = {0}; + std::atomic onode_max = {0}; + std::atomic buffer_max = {0}; std::array, 64> dumped_onodes; @@ -1131,11 +1135,28 @@ public: --num_blobs; } - void trim(uint64_t onode_max, uint64_t buffer_max); + void set_onode_max(uint64_t max) { + onode_max = max; + } + + void set_buffer_max(uint64_t max) { + buffer_max = max; + } + + void flush(); + void trim_onodes(); + void trim_buffers(); + + virtual void _trim_onodes_to(uint64_t max) = 0; + virtual void _trim_buffers_to(uint64_t max) = 0; - void trim_all(); + void _trim_onodes() { + _trim_onodes_to(onode_max); + } - virtual void _trim(uint64_t onode_max, uint64_t buffer_max) = 0; + void _trim_buffers() { + _trim_buffers_to(buffer_max); + } virtual void add_stats(uint64_t *onodes, uint64_t *extents, uint64_t *blobs, @@ -1227,7 +1248,8 @@ public: _audit("_touch_buffer end"); } - void _trim(uint64_t onode_max, uint64_t buffer_max) override; + void _trim_onodes_to(uint64_t max) override; + void _trim_buffers_to(uint64_t max) override; void add_stats(uint64_t *onodes, uint64_t *extents, uint64_t *blobs, @@ -1322,7 +1344,8 @@ public: _audit("_touch_buffer end"); } - void _trim(uint64_t onode_max, uint64_t buffer_max) override; + void _trim_onodes_to(uint64_t max) override; + void _trim_buffers_to(uint64_t max) override; void add_stats(uint64_t *onodes, uint64_t *extents, uint64_t *blobs, @@ -2158,7 +2181,7 @@ private: private: void _adjust_cache_settings(); - void _trim_shards(bool interval_stats); + void _resize_shards(bool interval_stats); void _tune_cache_size(bool interval_stats); void _balance_cache( const std::list>& caches);