os/bluestore: Trim cache on add rather than in loop.

author Mark Nelson <mnelson@redhat.com>

Tue, 15 Jan 2019 21:50:36 +0000 (15:50 -0600)

committer Mark Nelson <mnelson@redhat.com>

Sat, 15 Jun 2019 10:51:48 +0000 (06:51 -0400)
author Mark Nelson <mnelson@redhat.com>
Tue, 15 Jan 2019 21:50:36 +0000 (15:50 -0600)
committer Mark Nelson <mnelson@redhat.com>
Sat, 15 Jun 2019 10:51:48 +0000 (06:51 -0400)
diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc

index 5db054135e167ed80f56ba949d9d184da2dc2ba8..651390b158f936fc60c83f06500f5a3828c6e993 100644 (file)
--- a/src/os/bluestore/BlueStore.cc
+++ b/src/os/bluestore/BlueStore.cc
@@ -898,16 +898,23 @@ BlueStore::Cache *BlueStore::Cache::create(CephContext* cct, string type,
    return c;
  }
  
-void BlueStore::Cache::trim(uint64_t onode_max, uint64_t buffer_max)
+void BlueStore::Cache::trim_onodes()
  {
    std::lock_guard l(lock);
-  _trim(onode_max, buffer_max);
+  _trim_onodes();
  }
  
-void BlueStore::Cache::trim_all()
+void BlueStore::Cache::trim_buffers()
  {
    std::lock_guard l(lock);
-  _trim(0, 0);
+  _trim_buffers();
+}
+
+void BlueStore::Cache::flush()
+{
+  std::lock_guard l(lock);
+  _trim_buffers_to(0);
+  _trim_onodes_to(0);
  }
  
  // LRUCache
@@ -921,33 +928,11 @@ void BlueStore::LRUCache::_touch_onode(OnodeRef& o)
    onode_lru.push_front(*o);
  }
  
-void BlueStore::LRUCache::_trim(uint64_t onode_max, uint64_t buffer_max)
-{
-  dout(20) << __func__ << " onodes " << onode_lru.size() << " / " << onode_max
-          << " buffers " << buffer_size << " / " << buffer_max
-          << dendl;
-
-  _audit("trim start");
-
-  // buffers
-  while (buffer_size > buffer_max) {
-    auto i = buffer_lru.rbegin();
-    if (i == buffer_lru.rend()) {
-      // stop if buffer_lru is now empty
-      break;
-    }
-
-    Buffer *b = &*i;
-    ceph_assert(b->is_clean());
-    dout(20) << __func__ << " rm " << *b << dendl;
-    b->space->_rm_buffer(this, b);
-  }
-
-  // onodes
-  if (onode_max >= onode_lru.size()) {
+void BlueStore::LRUCache::_trim_onodes_to(uint64_t max) {
+  if (max >= onode_lru.size()) {
      return; // don't even try
    }
-  uint64_t num = onode_lru.size() - onode_max;
+  uint64_t num = onode_lru.size() - max;
  
    auto p = onode_lru.end();
    ceph_assert(p != onode_lru.begin());
@@ -959,7 +944,7 @@ void BlueStore::LRUCache::_trim(uint64_t onode_max, uint64_t buffer_max)
      int refs = o->nref.load();
      if (refs > 1) {
        dout(20) << __func__ << "  " << o->oid << " has " << refs
-              << " refs, skipping" << dendl;
+               << " refs, skipping" << dendl;
        if (++skipped >= max_skipped) {
          dout(20) << __func__ << " maximum skip pinned reached; stopping with "
                   << num << " left to trim" << dendl;
@@ -988,6 +973,21 @@ void BlueStore::LRUCache::_trim(uint64_t onode_max, uint64_t buffer_max)
    }
  }
  
+void BlueStore::LRUCache::_trim_buffers_to(uint64_t max) {
+  while (buffer_size > max) {
+    auto i = buffer_lru.rbegin();
+    if (i == buffer_lru.rend()) {
+      // stop if buffer_lru is now empty
+      break;
+    }
+
+    Buffer *b = &*i;
+    ceph_assert(b->is_clean());
+    dout(20) << __func__ << " rm " << *b << dendl;
+    b->space->_rm_buffer(this, b);
+  }
+}
+
  #ifdef DEBUG_CACHE
  void BlueStore::LRUCache::_audit(const char *when)
  {
@@ -1139,18 +1139,56 @@ void BlueStore::TwoQCache::_adjust_buffer_size(Buffer *b, int64_t delta)
    }
  }
  
-void BlueStore::TwoQCache::_trim(uint64_t onode_max, uint64_t buffer_max)
-{
-  dout(20) << __func__ << " onodes " << onode_lru.size() << " / " << onode_max
-          << " buffers " << buffer_bytes << " / " << buffer_max
-          << dendl;
+void BlueStore::TwoQCache::_trim_onodes_to(uint64_t max) {
+  if (max >= onode_lru.size()) {
+    return; // don't even try
+  }
+  uint64_t num = onode_lru.size() - max;
  
-  _audit("trim start");
+  auto p = onode_lru.end();
+  ceph_assert(p != onode_lru.begin());
+  --p;
+  int skipped = 0;
+  int max_skipped = g_conf()->bluestore_cache_trim_max_skip_pinned;
+  while (num > 0) {
+    Onode *o = &*p;
+    dout(20) << __func__ << " considering " << o << dendl;
+    int refs = o->nref.load();
+    if (refs > 1) {
+      dout(20) << __func__ << "  " << o->oid << " has " << refs
+               << " refs; skipping" << dendl;
+      if (++skipped >= max_skipped) {
+        dout(20) << __func__ << " maximum skip pinned reached; stopping with "
+                 << num << " left to trim" << dendl;
+        break;
+      }
+
+      if (p == onode_lru.begin()) {
+        break;
+      } else {
+        p--;
+        num--;
+        continue;
+      }
+    }
+    dout(30) << __func__ << " " << o->oid << " num=" << num <<" lru size="<<onode_lru.size()<< dendl;
+    if (p != onode_lru.begin()) {
+      onode_lru.erase(p--);
+    } else {
+      onode_lru.erase(p);
+      ceph_assert(num == 1);
+    }
+    o->get();  // paranoia
+    o->c->onode_map.remove(o->oid);
+    o->put();
+    --num;
+  }
+}
  
-  // buffers
-  if (buffer_bytes > buffer_max) {
-    uint64_t kin = buffer_max * cct->_conf->bluestore_2q_cache_kin_ratio;
-    uint64_t khot = buffer_max - kin;
+void BlueStore::TwoQCache::_trim_buffers_to(uint64_t max) {
+  if (buffer_bytes > max) {
+    uint64_t kin = max * cct->_conf->bluestore_2q_cache_kin_ratio;
+    uint64_t khot = max - kin;
  
      // pre-calculate kout based on average buffer size too,
      // which is typical(the warm_in and hot lists may change later)
@@ -1159,7 +1197,7 @@ void BlueStore::TwoQCache::_trim(uint64_t onode_max, uint64_t buffer_max)
      if (buffer_num) {
        uint64_t buffer_avg_size = buffer_bytes / buffer_num;
        ceph_assert(buffer_avg_size);
-      uint64_t calculated_buffer_num = buffer_max / buffer_avg_size;
+      uint64_t calculated_buffer_num = max / buffer_avg_size;
        kout = calculated_buffer_num * cct->_conf->bluestore_2q_cache_kout_ratio;
      }
  
@@ -1239,51 +1277,6 @@ void BlueStore::TwoQCache::_trim(uint64_t onode_max, uint64_t buffer_max)
        b->space->_rm_buffer(this, b);
      }
    }
-
-  // onodes
-  if (onode_max >= onode_lru.size()) {
-    return; // don't even try
-  }
-  uint64_t num = onode_lru.size() - onode_max;
-
-  auto p = onode_lru.end();
-  ceph_assert(p != onode_lru.begin());
-  --p;
-  int skipped = 0;
-  int max_skipped = g_conf()->bluestore_cache_trim_max_skip_pinned;
-  while (num > 0) {
-    Onode *o = &*p;
-    dout(20) << __func__ << " considering " << o << dendl;
-    int refs = o->nref.load();
-    if (refs > 1) {
-      dout(20) << __func__ << "  " << o->oid << " has " << refs
-              << " refs; skipping" << dendl;
-      if (++skipped >= max_skipped) {
-        dout(20) << __func__ << " maximum skip pinned reached; stopping with "
-                 << num << " left to trim" << dendl;
-        break;
-      }
-
-      if (p == onode_lru.begin()) {
-        break;
-      } else {
-        p--;
-        num--;
-        continue;
-      }
-    }
-    dout(30) << __func__ << " " << o->oid << " num=" << num <<" lru size="<<onode_lru.size()<< dendl;
-    if (p != onode_lru.begin()) {
-      onode_lru.erase(p--);
-    } else {
-      onode_lru.erase(p);
-      ceph_assert(num == 1);
-    }
-    o->get();  // paranoia
-    o->c->onode_map.remove(o->oid);
-    o->put();
-    --num;
-  }
  }
  
  #ifdef DEBUG_CACHE
@@ -1413,6 +1406,7 @@ int BlueStore::BufferSpace::_discard(Cache* cache, uint32_t offset, uint32_t len
      cache->_audit("discard end 2");
      break;
    }
+  cache->_trim_buffers();
    return cache_private;
  }
  
@@ -1517,7 +1511,7 @@ void BlueStore::BufferSpace::_finish_write(Cache* cache, uint64_t seq)
        ldout(cache->cct, 20) << __func__ << " added " << *b << dendl;
      }
    }
-
+  cache->_trim_buffers();
    cache->_audit("finish_write end");
  }
  
@@ -1569,6 +1563,7 @@ void BlueStore::BufferSpace::split(Cache* cache, size_t pos, BlueStore::BufferSp
      }
    }
    ceph_assert(writing.empty());
+  cache->_trim_buffers();
  }
  
  // OnodeSpace
@@ -1589,6 +1584,7 @@ BlueStore::OnodeRef BlueStore::OnodeSpace::add(const ghobject_t& oid, OnodeRef o
    ldout(cache->cct, 30) << __func__ << " " << oid << " " << o << dendl;
    onode_map[oid] = o;
    cache->_add_onode(o, 1);
+  cache->_trim_onodes();
    return o;
  }
  
@@ -1663,7 +1659,7 @@ void BlueStore::OnodeSpace::rename(
    oldo.reset(new Onode(o->c, old_oid, o->key));
    po->second = oldo;
    cache->_add_onode(po->second, 1);
-
+  cache->_trim_onodes();
    // add at new position and fix oid, key
    onode_map.insert(make_pair(new_oid, o));
    cache->_touch_onode(o);
@@ -3636,6 +3632,7 @@ void BlueStore::Collection::split_cache(
        }
      }
    }
+  dest->cache->_trim_onodes();
  }
  
  // =======================================================
@@ -3702,8 +3699,8 @@ void *BlueStore::MempoolThread::entry()
        next_resize += resize_interval;
      }
  
-    // Now Trim
-    _trim_shards(interval_stats_trim);
+    // Now Resize the shards 
+    _resize_shards(interval_stats_trim);
      interval_stats_trim = false;
  
      store->_update_cache_logger();
@@ -3724,7 +3721,7 @@ void BlueStore::MempoolThread::_adjust_cache_settings()
    data_cache->set_cache_ratio(store->cache_data_ratio);
  }
  
-void BlueStore::MempoolThread::_trim_shards(bool interval_stats)
+void BlueStore::MempoolThread::_resize_shards(bool interval_stats)
  {
    auto cct = store->cct;
    size_t num_shards = store->cache_shards.size();
@@ -3774,7 +3771,8 @@ void BlueStore::MempoolThread::_trim_shards(bool interval_stats)
                   << " max_shard_buffer: " << max_shard_buffer << dendl;
  
    for (auto i : store->cache_shards) {
-    i->trim(max_shard_onodes, max_shard_buffer);
+    i->set_onode_max(max_shard_onodes);
+    i->set_buffer_max(max_shard_buffer);
    }
  }
  
@@ -13708,7 +13706,7 @@ void BlueStore::_flush_cache()
  {
    dout(10) << __func__ << dendl;
    for (auto i : cache_shards) {
-    i->trim_all();
+    i->flush();
      ceph_assert(i->empty());
    }
    for (auto& p : coll_map) {
@@ -13734,7 +13732,7 @@ int BlueStore::flush_cache(ostream *os)
  {
    dout(10) << __func__ << dendl;
    for (auto i : cache_shards) {
-    i->trim_all();
+    i->flush();
    }
  
    return 0;
diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h

index 227716dfe3cd0a8f890f371dcb29827cf3af7d4e..ac788116f553158e77b55184f0e6bada4160f79d 100644 (file)
--- a/src/os/bluestore/BlueStore.h
+++ b/src/os/bluestore/BlueStore.h
@@ -347,6 +347,7 @@ public:
                              flags);
        b->cache_private = _discard(cache, offset, bl.length());
        _add_buffer(cache, b, (flags & Buffer::FLAG_NOCACHE) ? 0 : 1, nullptr);
+      cache->_trim_buffers();
      }
      void _finish_write(Cache* cache, uint64_t seq);
      void did_read(Cache* cache, uint32_t offset, bufferlist& bl) {
@@ -354,6 +355,7 @@ public:
        Buffer *b = new Buffer(this, Buffer::STATE_CLEAN, 0, offset, bl);
        b->cache_private = _discard(cache, offset, bl.length());
        _add_buffer(cache, b, 1, nullptr);
+      cache->_trim_buffers();
      }
  
      void read(Cache* cache, uint32_t offset, uint32_t length,
@@ -1096,6 +1098,8 @@ public:
  
      std::atomic<uint64_t> num_extents = {0};
      std::atomic<uint64_t> num_blobs = {0};
+    std::atomic<uint64_t> onode_max = {0};
+    std::atomic<uint64_t> buffer_max = {0};
  
      std::array<std::pair<ghobject_t, mono_clock::time_point>, 64> dumped_onodes;
  
@@ -1131,11 +1135,28 @@ public:
        --num_blobs;
      }
  
-    void trim(uint64_t onode_max, uint64_t buffer_max);
+    void set_onode_max(uint64_t max) {
+      onode_max = max;
+    }
+
+    void set_buffer_max(uint64_t max) {
+      buffer_max = max;
+    }
+
+    void flush();
+    void trim_onodes();
+    void trim_buffers();
+
+    virtual void _trim_onodes_to(uint64_t max) = 0;
+    virtual void _trim_buffers_to(uint64_t max) = 0;
  
-    void trim_all();
+    void _trim_onodes() {
+      _trim_onodes_to(onode_max);
+    }
  
-    virtual void _trim(uint64_t onode_max, uint64_t buffer_max) = 0;
+    void _trim_buffers() {
+      _trim_buffers_to(buffer_max);
+    }
  
      virtual void add_stats(uint64_t *onodes, uint64_t *extents,
                            uint64_t *blobs,
@@ -1227,7 +1248,8 @@ public:
        _audit("_touch_buffer end");
      }
  
-    void _trim(uint64_t onode_max, uint64_t buffer_max) override;
+    void _trim_onodes_to(uint64_t max) override;
+    void _trim_buffers_to(uint64_t max) override;
  
      void add_stats(uint64_t *onodes, uint64_t *extents,
                    uint64_t *blobs,
@@ -1322,7 +1344,8 @@ public:
        _audit("_touch_buffer end");
      }
  
-    void _trim(uint64_t onode_max, uint64_t buffer_max) override;
+    void _trim_onodes_to(uint64_t max) override;
+    void _trim_buffers_to(uint64_t max) override;
  
      void add_stats(uint64_t *onodes, uint64_t *extents,
                    uint64_t *blobs,
@@ -2158,7 +2181,7 @@ private:
  
    private:
      void _adjust_cache_settings();
-    void _trim_shards(bool interval_stats);
+    void _resize_shards(bool interval_stats);
      void _tune_cache_size(bool interval_stats);
      void _balance_cache(
          const std::list<std::shared_ptr<PriorityCache::PriCache>>& caches);
author	Mark Nelson <mnelson@redhat.com>
	Tue, 15 Jan 2019 21:50:36 +0000 (15:50 -0600)
committer	Mark Nelson <mnelson@redhat.com>
	Sat, 15 Jun 2019 10:51:48 +0000 (06:51 -0400)
src/os/bluestore/BlueStore.cc		patch \| blob \| history
src/os/bluestore/BlueStore.h		patch \| blob \| history