common/PriorityCache: Automatic chunk sizing

author Mark Nelson <mnelson@redhat.com>

Wed, 12 Dec 2018 19:41:11 +0000 (13:41 -0600)

committer Mark Nelson <mnelson@redhat.com>

Fri, 4 Jan 2019 18:45:20 +0000 (12:45 -0600)
author Mark Nelson <mnelson@redhat.com>
Wed, 12 Dec 2018 19:41:11 +0000 (13:41 -0600)
committer Mark Nelson <mnelson@redhat.com>
Fri, 4 Jan 2019 18:45:20 +0000 (12:45 -0600)
diff --git a/src/common/PriorityCache.cc b/src/common/PriorityCache.cc

index d62e61cc637ca58ed3867348b71b2d125dc5e26d..cbcf174304ad507647b452fd30205c3fd2db4d49 100644 (file)
--- a/src/common/PriorityCache.cc
+++ b/src/common/PriorityCache.cc
@@ -15,12 +15,38 @@
  #include "PriorityCache.h"
  
  namespace PriorityCache {
-  int64_t get_chunk(uint64_t usage, uint64_t chunk_bytes) {
-    // Add a chunk of headroom and round up to the near chunk
-    uint64_t val = usage + chunk_bytes;
-    uint64_t r = (val) % chunk_bytes;
+  int64_t get_chunk(uint64_t usage, uint64_t total_bytes) {
+    uint64_t chunk = total_bytes;
+
+    // Find the nearest power of 2
+    chunk -= 1;
+    chunk |= chunk >> 1;
+    chunk |= chunk >> 2;
+    chunk |= chunk >> 4;
+    chunk |= chunk >> 8;
+    chunk |= chunk >> 16;
+    chunk |= chunk >> 32;
+    chunk += 1;
+    // shrink it to 1/256 of the rounded up cache size
+    chunk /= 256;
+
+    // bound the chunk size to be between 4MB and 32MB
+    chunk = (chunk > 4ul*1024*1024) ? chunk : 4ul*1024*1024;
+    chunk = (chunk < 16ul*1024*1024) ? chunk : 16ul*1024*1024;
+
+    /* Add 16 chunks of headroom and round up to the near chunk.  Note that
+     * if RocksDB is used, it's a good idea to have N MB of headroom where
+     * N is the target_file_size_base value.  RocksDB will read SST files
+     * into the block cache during compaction which potentially can force out
+     * all existing cached data.  Once compaction is finished, the SST data is
+     * released leaving an empty cache.  Having enough headroom to absorb
+     * compaction reads allows the kv cache grow even during extremely heavy
+     * compaction workloads.
+     */
+    uint64_t val = usage + (16 * chunk);
+    uint64_t r = (val) % chunk;
      if (r > 0)
-      val = val + chunk_bytes - r;
+      val = val + chunk - r;
      return val;
    }
  
diff --git a/src/common/PriorityCache.h b/src/common/PriorityCache.h

index c31f896e5de6df6368be1afba3e500e7f7a548a1..8dcb3e03a7cc408ac4b71db25748a14c0bfc635e 100644 (file)
--- a/src/common/PriorityCache.h
+++ b/src/common/PriorityCache.h
@@ -27,18 +27,15 @@ namespace PriorityCache {
      LAST = PRI3,
    };
  
-  int64_t get_chunk(uint64_t usage, uint64_t chunk_bytes);
+  int64_t get_chunk(uint64_t usage, uint64_t total_bytes);
  
    struct PriCache {
      virtual ~PriCache();
  
-    /* Ask the cache to request memory for the given priority rounded up to
-     * the nearst chunk_bytes.  This for example, may return the size of all
-     * items associated with this priority plus some additional space for
-     * future growth.  Note that the cache may ultimately be allocated less 
-     * memory than it requests here.
+    /* Ask the cache to request memory for the given priority. Note that the
+     * cache may ultimately be allocated less memory than it requests here.
       */
-    virtual int64_t request_cache_bytes(PriorityCache::Priority pri, uint64_t chunk_bytes) const = 0;
+    virtual int64_t request_cache_bytes(PriorityCache::Priority pri, uint64_t total_cache) const = 0;
  
      // Get the number of bytes currently allocated to the given priority.
      virtual int64_t get_cache_bytes(PriorityCache::Priority pri) const = 0;
@@ -52,8 +49,15 @@ namespace PriorityCache {
      // Allocate additional bytes for a given priority.
      virtual void add_cache_bytes(PriorityCache::Priority pri, int64_t bytes) = 0;
  
-    // Commit the current number of bytes allocated to the cache.
-    virtual int64_t commit_cache_size() = 0;
+    /* Commit the current number of bytes allocated to the cache.  Space is
+     * allocated in chunks based on the allocation size and current total size
+     * of memory available for caches. */
+    virtual int64_t commit_cache_size(uint64_t total_cache) = 0;
+
+    /* Get the current number of bytes allocated to the cache. this may be
+     * larger than the value returned by get_cache_bytes as it includes extra
+     * space for future growth. */
+    virtual int64_t get_committed_size() const = 0;
  
      // Get the ratio of available memory this cache should target.
      virtual double get_cache_ratio() const = 0;
diff --git a/src/common/options.cc b/src/common/options.cc

index 2ed6e83f71b0ffcdd252b960ca4cbdc581ed473b..23ab5de3c0732e53ea96c14865213fdbc430201a 100644 (file)
--- a/src/common/options.cc
+++ b/src/common/options.cc
@@ -4185,11 +4185,6 @@ std::vector<Option> get_global_options() {
      .add_see_also("bluestore_cache_meta_ratio")
      .set_description("Automatically tune the ratio of caches while respecting min values."),
  
-    Option("bluestore_cache_autotune_chunk_size", Option::TYPE_SIZE, Option::LEVEL_DEV)
-    .set_default(33554432)
-    .add_see_also("bluestore_cache_autotune")
-    .set_description("The chunk size in bytes to allocate to caches when cache autotune is enabled."),
-
      Option("bluestore_cache_autotune_interval", Option::TYPE_FLOAT, Option::LEVEL_DEV)
      .set_default(5)
      .add_see_also("bluestore_cache_autotune")
diff --git a/src/kv/KeyValueDB.h b/src/kv/KeyValueDB.h

index bbea6119fed1c04f52deabbfccba03dfaf6bfcf6..dd3df17fab05487bef1c11bdadaee2c1ea91e2b5 100644 (file)
--- a/src/kv/KeyValueDB.h
+++ b/src/kv/KeyValueDB.h
@@ -390,7 +390,11 @@ public:
      cache_bytes[pri] += bytes;
    }
  
-  virtual int64_t commit_cache_size() {
+  virtual int64_t commit_cache_size(uint64_t total_cache) {
+    return -EOPNOTSUPP;
+  }
+
+  virtual int64_t get_committed_size() const {
      return -EOPNOTSUPP;
    }
  
diff --git a/src/kv/RocksDBStore.cc b/src/kv/RocksDBStore.cc

index 2f47cb5358f80def84e3b5dccc6d7dd82b32df8e..7d858784ccf809c323a990842b20cbd8be29ddf6 100644 (file)
--- a/src/kv/RocksDBStore.cc
+++ b/src/kv/RocksDBStore.cc
@@ -1301,7 +1301,6 @@ int64_t RocksDBStore::request_cache_bytes(PriorityCache::Priority pri, uint64_t
    default:
      break;
    }
-  request = PriorityCache::get_chunk(usage, chunk_bytes);
    request = (request > assigned) ? request - assigned : 0;
    dout(10) << __func__ << " Priority: " << static_cast<uint32_t>(pri) 
             << " Usage: " << usage << " Request: " << request << dendl;
@@ -1313,28 +1312,26 @@ int64_t RocksDBStore::get_cache_usage() const
    return static_cast<int64_t>(bbt_opts.block_cache->GetUsage());
  }
  
-int64_t RocksDBStore::commit_cache_size()
+int64_t RocksDBStore::commit_cache_size(uint64_t total_bytes)
  {
    size_t old_bytes = bbt_opts.block_cache->GetCapacity();
-  int64_t total_bytes = get_cache_bytes();
+  int64_t new_bytes = PriorityCache::get_chunk(
+      get_cache_bytes(), total_bytes);
    dout(10) << __func__ << " old: " << old_bytes
-           << " new: " << total_bytes << dendl;
-  bbt_opts.block_cache->SetCapacity((size_t) total_bytes);
+           << " new: " << new_bytes << dendl;
+  bbt_opts.block_cache->SetCapacity((size_t) new_bytes);
  
    // Set the high priority pool ratio is this is the binned LRU cache.
    if (g_conf()->rocksdb_cache_type == "binned_lru") {
      auto binned_cache =
          std::static_pointer_cast<rocksdb_cache::BinnedLRUCache>(bbt_opts.block_cache);
-    int64_t high_pri_bytes = get_cache_bytes(PriorityCache::Priority::PRI0);
-    double ratio = (double) high_pri_bytes / total_bytes;
+    int64_t high_pri_bytes = PriorityCache::get_chunk(
+        binned_cache->GetHighPriPoolUsage()+1, total_bytes);
+    double ratio = (double) high_pri_bytes / new_bytes;
      dout(10) << __func__ << " High Pri Pool Ratio set to " << ratio << dendl;
      binned_cache->SetHighPriPoolRatio(ratio);
    }
-  return total_bytes;
-}
-
-int64_t RocksDBStore::get_cache_capacity() {
-  return bbt_opts.block_cache->GetCapacity();
+  return new_bytes;
  }
  
  RocksDBStore::RocksDBWholeSpaceIteratorImpl::~RocksDBWholeSpaceIteratorImpl()
diff --git a/src/kv/RocksDBStore.h b/src/kv/RocksDBStore.h

index 041474138c1cfd6319a2a1735aeb197d511155db..5354285be1dfd09d91aa621821a67cbeb5df01f7 100644 (file)
--- a/src/kv/RocksDBStore.h
+++ b/src/kv/RocksDBStore.h
@@ -480,7 +480,10 @@ err:
  
    virtual int64_t request_cache_bytes(
        PriorityCache::Priority pri, uint64_t cache_bytes) const override;
-  virtual int64_t commit_cache_size() override;
+  virtual int64_t commit_cache_size(uint64_t total_cache) override;
+  virtual int64_t get_committed_size() const override {
+    return bbt_opts.block_cache->GetCapacity();
+  }
    virtual std::string get_cache_name() const override {
      return "RocksDB Block Cache";
    }
diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc

index 47d9ff94c67b00c6c88b2d71e6c03ee73a4f4120..217a7e94e9068b73c7ec38f8a0712ef68f9e3606 100644 (file)
--- a/src/os/bluestore/BlueStore.cc
+++ b/src/os/bluestore/BlueStore.cc
@@ -3522,9 +3522,9 @@ void BlueStore::MempoolThread::_trim_shards(bool interval_stats)
    if (store->cache_autotune) {
      cache_size = autotune_cache_size;
  
-    kv_alloc = store->db->get_cache_bytes();
-    meta_alloc = meta_cache.get_cache_bytes();
-    data_alloc = data_cache.get_cache_bytes();
+    kv_alloc = store->db->get_committed_size();
+    meta_alloc = meta_cache.get_committed_size();
+    data_alloc = data_cache.get_committed_size();
    }
    
    if (interval_stats) {
@@ -3612,6 +3612,14 @@ void BlueStore::MempoolThread::_balance_cache(
      const std::list<PriorityCache::PriCache *>& caches)
  {
    int64_t mem_avail = autotune_cache_size;
+  /* Each cache is going to get at least 1 chunk's worth of memory from get_chunk
+   * so shrink the available memory here to compensate.  Don't shrink the amount of
+   * memory below 0 however.
+   */
+  mem_avail -= PriorityCache::get_chunk(1, autotune_cache_size) * caches.size();
+  if (mem_avail < 0) {
+    mem_avail = 0;
+  }
  
    // Assign memory for each priority level
    for (int i = 0; i < PriorityCache::Priority::LAST + 1; i++) {
@@ -3634,7 +3642,7 @@ void BlueStore::MempoolThread::_balance_cache(
  
    // Finally commit the new cache sizes
    for (auto it = caches.begin(); it != caches.end(); it++) {
-    (*it)->commit_cache_size();
+    (*it)->commit_cache_size(autotune_cache_size);
    }
  }
  
@@ -3658,7 +3666,7 @@ void BlueStore::MempoolThread::_balance_cache_pri(int64_t *mem_avail,
      uint64_t total_assigned = 0;
  
      for (auto it = tmp_caches.begin(); it != tmp_caches.end(); ) {
-      int64_t cache_wants = (*it)->request_cache_bytes(pri, store->cache_autotune_chunk_size);
+      int64_t cache_wants = (*it)->request_cache_bytes(pri, autotune_cache_size);
  
        // Usually the ratio should be set to the fraction of the current caches'
        // assigned ratio compared to the total ratio of all caches that still
@@ -4083,8 +4091,6 @@ int BlueStore::_set_cache_sizes()
  {
    ceph_assert(bdev);
    cache_autotune = cct->_conf.get_val<bool>("bluestore_cache_autotune");
-  cache_autotune_chunk_size = 
-      cct->_conf.get_val<Option::size_t>("bluestore_cache_autotune_chunk_size");
    cache_autotune_interval =
        cct->_conf.get_val<double>("bluestore_cache_autotune_interval");
    osd_memory_target = cct->_conf.get_val<uint64_t>("osd_memory_target");
diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h

index ee4b2327b577a83e98013284f4fed791c9f4ecd0..07dc6612102606aa5061ca34a12f5e0f788b139d 100644 (file)
--- a/src/os/bluestore/BlueStore.h
+++ b/src/os/bluestore/BlueStore.h
@@ -1964,7 +1964,6 @@ private:
    double cache_kv_ratio = 0;     ///< cache ratio dedicated to kv (e.g., rocksdb)
    double cache_data_ratio = 0;   ///< cache ratio dedicated to object data
    bool cache_autotune = false;   ///< cache autotune setting
-  uint64_t cache_autotune_chunk_size = 0; ///< cache autotune chunk size
    double cache_autotune_interval = 0; ///< time to wait between cache rebalancing
    uint64_t osd_memory_target = 0;   ///< OSD memory target when autotuning cache
    uint64_t osd_memory_base = 0;     ///< OSD base memory when autotuning cache
@@ -1986,6 +1985,7 @@ private:
      struct MempoolCache : public PriorityCache::PriCache {
        BlueStore *store;
        int64_t cache_bytes[PriorityCache::Priority::LAST+1];
+      int64_t committed_bytes = 0;
        double cache_ratio = 0;
  
        MempoolCache(BlueStore *s) : store(s) {};
@@ -1993,15 +1993,14 @@ private:
        virtual uint64_t _get_used_bytes() const = 0;
  
        virtual int64_t request_cache_bytes(
-          PriorityCache::Priority pri, uint64_t chunk_bytes) const {
+          PriorityCache::Priority pri, uint64_t total_cache) const {
          int64_t assigned = get_cache_bytes(pri);
  
          switch (pri) {
          // All cache items are currently shoved into the LAST priority 
          case PriorityCache::Priority::LAST:
            {
-            uint64_t usage = _get_used_bytes();
-            int64_t request = PriorityCache::get_chunk(usage, chunk_bytes);
+            int64_t request = _get_used_bytes();
              return(request > assigned) ? request - assigned : 0;
            }
          default:
@@ -2028,8 +2027,13 @@ private:
        virtual void add_cache_bytes(PriorityCache::Priority pri, int64_t bytes) {
          cache_bytes[pri] += bytes;
        }
-      virtual int64_t commit_cache_size() {
-        return get_cache_bytes(); 
+      virtual int64_t commit_cache_size(uint64_t total_cache) {
+        committed_bytes = PriorityCache::get_chunk(
+            get_cache_bytes(), total_cache);
+        return committed_bytes;
+      }
+      virtual int64_t get_committed_size() const {
+        return committed_bytes;
        }
        virtual double get_cache_ratio() const {
          return cache_ratio;
author	Mark Nelson <mnelson@redhat.com>
	Wed, 12 Dec 2018 19:41:11 +0000 (13:41 -0600)
committer	Mark Nelson <mnelson@redhat.com>
	Fri, 4 Jan 2019 18:45:20 +0000 (12:45 -0600)
src/common/PriorityCache.cc		patch \| blob \| history
src/common/PriorityCache.h		patch \| blob \| history
src/common/options.cc		patch \| blob \| history
src/kv/KeyValueDB.h		patch \| blob \| history
src/kv/RocksDBStore.cc		patch \| blob \| history
src/kv/RocksDBStore.h		patch \| blob \| history
src/os/bluestore/BlueStore.cc		patch \| blob \| history
src/os/bluestore/BlueStore.h		patch \| blob \| history