]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: set bluestore_cache_kv_ratio to configure rocksdb cache size 15580/head
authorSage Weil <sage@redhat.com>
Thu, 8 Jun 2017 16:40:11 +0000 (12:40 -0400)
committerSage Weil <sage@redhat.com>
Fri, 9 Jun 2017 14:50:56 +0000 (10:50 -0400)
Devote 40% to kv (rocksdb), 50% to metadata (onodes etc), 10% to data.

Note that if we don't consume the data portion (e.g., no cache hints) that
the onode metadata will "borrow" that space.

Signed-off-by: Sage Weil <sage@redhat.com>
src/common/config_opts.h
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h

index ec3fa66ece380822916eece6ea65cdb609d99a33..705e7b437a1938d7b68683e6f21456cef4e07d04 100644 (file)
@@ -1137,7 +1137,8 @@ OPTION(bluestore_cache_type, OPT_STR, "2q")   // lru, 2q
 OPTION(bluestore_2q_cache_kin_ratio, OPT_DOUBLE, .5)    // kin page slot size / max page slot size
 OPTION(bluestore_2q_cache_kout_ratio, OPT_DOUBLE, .5)   // number of kout page slot / total number of page slot
 OPTION(bluestore_cache_size, OPT_U64, 1024*1024*1024)
-OPTION(bluestore_cache_meta_ratio, OPT_DOUBLE, .9)
+OPTION(bluestore_cache_meta_ratio, OPT_DOUBLE, .7)
+OPTION(bluestore_cache_kv_ratio, OPT_DOUBLE, .2)
 OPTION(bluestore_kvbackend, OPT_STR, "rocksdb")
 OPTION(bluestore_allocator, OPT_STR, "bitmap")     // stupid | bitmap
 OPTION(bluestore_freelist_blocks_per_key, OPT_INT, 128)
index a974b5ac836fcac30ff713869d8e70cea1c1890f..e89986833920896c961c6d4e1bbdc1c5cbfad872 100644 (file)
@@ -752,6 +752,7 @@ void BlueStore::Cache::trim_all()
 void BlueStore::Cache::trim(
   uint64_t target_bytes,
   float target_meta_ratio,
+  float target_data_ratio,
   float bytes_per_onode)
 {
   std::lock_guard<std::recursive_mutex> l(lock);
@@ -759,23 +760,18 @@ void BlueStore::Cache::trim(
   uint64_t current_buffer = _get_buffer_bytes();
   uint64_t current = current_meta + current_buffer;
 
-  uint64_t target_meta = target_bytes * (double)target_meta_ratio; //need to cast to double
-                                                                   //since float(1) might produce inaccurate value
-                                                                   // for target_meta (a bit greater than target_bytes)
-                                                                   // that causes overflow in target_buffer below.
-                                                                   //Consider the following code:
-                                                                   //uint64_t i =(uint64_t)227*1024*1024*1024 + 1;
-                                                                   //float f = 1;
-                                                                   //uint64_t i2 = i*f;
-                                                                   //assert(i == i2);
+  uint64_t target_meta = target_bytes * target_meta_ratio;
+  uint64_t target_buffer = target_bytes * target_data_ratio;
 
-  target_meta = min(target_bytes, target_meta); //and just in case that ratio is > 1
-  uint64_t target_buffer = target_bytes - target_meta;
+  // correct for overflow or float imprecision
+  target_meta = min(target_bytes, target_meta);
+  target_buffer = min(target_bytes - target_meta, target_buffer);
 
   if (current <= target_bytes) {
     dout(10) << __func__
             << " shard target " << pretty_si_t(target_bytes)
-            << " ratio " << target_meta_ratio << " ("
+            << " meta/data ratios " << target_meta_ratio
+            << " + " << target_data_ratio << " ("
             << pretty_si_t(target_meta) << " + "
             << pretty_si_t(target_buffer) << "), "
             << " current " << pretty_si_t(current) << " ("
@@ -3263,8 +3259,10 @@ void *BlueStore::MempoolThread::entry()
     uint64_t shard_target = store->cct->_conf->bluestore_cache_size / num_shards;
 
     for (auto i : store->cache_shards) {
-      i->trim(shard_target, store->cct->_conf->bluestore_cache_meta_ratio,
-        bytes_per_onode);
+      i->trim(shard_target,
+             store->cache_meta_ratio,
+             store->cache_data_ratio,
+             bytes_per_onode);
     }
 
     store->_update_cache_logger();
@@ -3543,6 +3541,36 @@ void BlueStore::_set_blob_size()
            << std::dec << dendl;
 }
 
+int BlueStore::_set_cache_sizes()
+{
+  cache_meta_ratio = cct->_conf->bluestore_cache_meta_ratio;
+  cache_kv_ratio = cct->_conf->bluestore_cache_kv_ratio;
+  cache_data_ratio = 1.0 - cache_meta_ratio - cache_kv_ratio;
+
+  if (cache_meta_ratio <= 0 || cache_meta_ratio > 1.0) {
+    derr << __func__ << "bluestore_cache_meta_ratio (" << cache_meta_ratio
+        << ") must be in range (0,1.0]" << dendl;
+    return -EINVAL;
+  }
+  if (cache_kv_ratio <= 0 || cache_kv_ratio > 1.0) {
+    derr << __func__ << "bluestore_cache_kv_ratio (" << cache_kv_ratio
+        << ") must be in range (0,1.0]" << dendl;
+    return -EINVAL;
+  }
+  if (cache_meta_ratio + cache_kv_ratio > 1.0) {
+    derr << __func__ << "bluestore_cache_meta_ratio (" << cache_meta_ratio
+        << ") + bluestore_cache_kv_ratio (" << cache_kv_ratio
+        << ") = " << cache_meta_ratio + cache_kv_ratio << "; must be <= 1.0"
+        << dendl;
+    return -EINVAL;
+  }
+  dout(1) << __func__ << " meta " << cache_meta_ratio
+         << " kv " << cache_kv_ratio
+         << " data " << cache_data_ratio
+         << dendl;
+  return 0;
+}
+
 void BlueStore::_init_logger()
 {
   PerfCountersBuilder b(cct, "bluestore",
@@ -3718,6 +3746,12 @@ int BlueStore::get_block_device_fsid(CephContext* cct, const string& path,
 
 int BlueStore::_open_path()
 {
+  // initial sanity check
+  int r = _set_cache_sizes();
+  if (r < 0) {
+    return r;
+  }
+
   assert(path_fd < 0);
   path_fd = ::open(path.c_str(), O_DIRECTORY);
   if (path_fd < 0) {
@@ -4431,6 +4465,8 @@ int BlueStore::_open_db(bool create)
   FreelistManager::setup_merge_operators(db);
   db->set_merge_operator(PREFIX_STAT, merge_op);
 
+  db->set_cache_size(cct->_conf->bluestore_cache_size * cache_kv_ratio);
+
   if (kv_backend == "rocksdb")
     options = cct->_conf->bluestore_rocksdb_options;
   db->init(options);
index 83fe45d275217dbcb55665c8ada253eec06b1263..de347ec9855917cec17f3c371f612bf30e9c44b8 100644 (file)
@@ -132,6 +132,7 @@ public:
   void _set_csum();
   void _set_compression();
   void _set_throttle_params();
+  int _set_cache_sizes();
 
   class TransContext;
 
@@ -1067,7 +1068,9 @@ public:
       --num_blobs;
     }
 
-    void trim(uint64_t target_bytes, float target_meta_ratio,
+    void trim(uint64_t target_bytes,
+             float target_meta_ratio,
+             float target_data_ratio,
              float bytes_per_onode);
 
     void trim_all();
@@ -1900,6 +1903,10 @@ private:
   uint64_t kv_ios = 0;
   uint64_t kv_throttle_costs = 0;
 
+  float cache_meta_ratio = 0;   ///< cache ratio dedicated to metadata
+  float cache_kv_ratio = 0;     ///< cache ratio dedicated to kv (e.g., rocksdb)
+  float cache_data_ratio = 0;   ///< cache ratio dedicated to object data
+
   // cache trim control
 
   std::mutex vstatfs_lock;