]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rocksdb backend optimization
authorxinxin shu <xinxin.shu@intel.com>
Mon, 16 Jun 2014 00:27:22 +0000 (08:27 +0800)
committerSage Weil <sage@redhat.com>
Wed, 30 Jul 2014 04:45:32 +0000 (21:45 -0700)
Signed-off-by: xinxin shu <xinxin.shu@intel.com>
src/common/config_opts.h
src/os/RocksDBStore.cc
src/os/RocksDBStore.h

index 7aeb0d62583686f44dfe4fdd4c3b3facafa83aa9..c7f39713d2ef03198070c343eaec68eb734f08ce 100644 (file)
@@ -578,13 +578,24 @@ OPTION(kinetic_use_ssl, OPT_BOOL, false) // whether to secure kinetic traffic wi
 
 OPTION(rocksdb_compact_on_mount, OPT_BOOL, false)
 OPTION(rocksdb_write_buffer_size, OPT_U64, 0) // rocksdb write buffer size
+OPTION(rocksdb_target_file_size_base, OPT_U64, 0) // target file size for compaction
 OPTION(rocksdb_cache_size, OPT_U64, 0) // rocksdb cache size
 OPTION(rocksdb_block_size, OPT_U64, 0) // rocksdb block size
 OPTION(rocksdb_bloom_size, OPT_INT, 0) // rocksdb bloom bits per entry
+OPTION(rocksdb_write_buffer_num, OPT_INT, 0) // rocksdb bloom bits per entry
+OPTION(rocksdb_background_compactions, OPT_INT, 0) // number for background compaction jobs
+OPTION(rocksdb_background_flushes, OPT_INT, 0) // number for background flush jobs
 OPTION(rocksdb_max_open_files, OPT_INT, 0) // rocksdb max open files
 OPTION(rocksdb_compression, OPT_STR, "") // rocksdb uses compression : none, snappy, zlib, bzip2
 OPTION(rocksdb_paranoid, OPT_BOOL, false) // rocksdb paranoid flag
-OPTION(rocksdb_log, OPT_STR, "")  // enable rocksdb log file
+OPTION(rocksdb_log, OPT_STR, "/dev/null")  // enable rocksdb log file
+OPTION(rocksdb_level0_file_num_compaction_trigger, OPT_U64, 0)
+OPTION(rocksdb_level0_slowdown_writes_trigger, OPT_U64, 0)
+OPTION(rocksdb_level0_stop_writes_trigger, OPT_U64, 0)
+OPTION(rocksdb_disableDataSync, OPT_BOOL, true)
+OPTION(rocksdb_disableWAL, OPT_BOOL, false)
+OPTION(rocksdb_num_levels, OPT_INT, 0)
+OPTION(rocksdb_wal_dir, OPT_STR, "")  //  rocksdb write ahead log file
 
 /**
  * osd_client_op_priority and osd_recovery_op_priority adjust the relative
index 5cb73e4766225f27758f90889978988485e4a22b..916951b066793098975969907049d83242152c6a 100644 (file)
@@ -30,6 +30,17 @@ int RocksDBStore::init()
   options.paranoid_checks = g_conf->rocksdb_paranoid;
   options.max_open_files = g_conf->rocksdb_max_open_files;
   options.log_file = g_conf->rocksdb_log;
+  options.write_buffer_num = g_conf->rocksdb_write_buffer_num;
+  options.max_background_compactions = g_conf->rocksdb_background_compactions;
+  options.max_background_flushes = g_conf->rocksdb_background_flushes;
+  options.target_file_size_base = g_conf->rocksdb_target_file_size_base;
+  options.level0_file_num_compaction_trigger = g_conf->rocksdb_level0_file_num_compaction_trigger;
+  options.level0_slowdown_writes_trigger = g_conf->rocksdb_level0_slowdown_writes_trigger;
+  options.level0_stop_writes_trigger = g_conf->rocksdb_level0_stop_writes_trigger;
+  options.disableDataSync = g_conf->rocksdb_disableDataSync;
+  options.num_levels = g_conf->rocksdb_num_levels;
+  options.disableWAL = g_conf->rocksdb_disableWAL;
+  options.wal_dir = g_conf->rocksdb_wal_dir;
   return 0;
 }
 
@@ -39,6 +50,14 @@ int RocksDBStore::do_open(ostream &out, bool create_if_missing)
 
   if (options.write_buffer_size)
     ldoptions.write_buffer_size = options.write_buffer_size;
+  if (options.write_buffer_num)
+    ldoptions.max_write_buffer_number = options.write_buffer_num;
+  if (options.max_background_compactions)
+    ldoptions.max_background_compactions = options.max_background_compactions;
+  if (options.max_background_flushes)
+    ldoptions.max_background_flushes = options.max_background_flushes;
+  if (options.target_file_size_base)
+    ldoptions.target_file_size_base = options.target_file_size_base;
   if (options.max_open_files)
     ldoptions.max_open_files = options.max_open_files;
   if (options.cache_size) {
@@ -73,6 +92,19 @@ int RocksDBStore::do_open(ostream &out, bool create_if_missing)
     rocksdb::Env *env = rocksdb::Env::Default();
     env->NewLogger(options.log_file, &ldoptions.info_log);
   }
+  if(options.disableDataSync)
+    ldoptions.disableDataSync = options.disableDataSync;
+  if(options.num_levels)
+    ldoptions.num_levels = options.num_levels;
+  if(options.level0_file_num_compaction_trigger)
+    ldoptions.level0_file_num_compaction_trigger = options.level0_file_num_compaction_trigger;
+  if(options.level0_slowdown_writes_trigger)
+    ldoptions.level0_slowdown_writes_trigger = options.level0_slowdown_writes_trigger;
+  if(options.level0_stop_writes_trigger)
+    ldoptions.level0_stop_writes_trigger = options.level0_stop_writes_trigger;
+  if(options.wal_dir.length())
+    ldoptions.wal_dir = options.wal_dir;
+
 
   //rocksdb::DB *_db;
   rocksdb::Status status = rocksdb::DB::Open(ldoptions, path, &db);
@@ -141,7 +173,9 @@ int RocksDBStore::submit_transaction(KeyValueDB::Transaction t)
 {
   RocksDBTransactionImpl * _t =
     static_cast<RocksDBTransactionImpl *>(t.get());
-  rocksdb::Status s = db->Write(rocksdb::WriteOptions(), _t->bat);
+  rocksdb::WriteOptions woptions;
+  woptions.disableWAL = options.disableWAL;
+  rocksdb::Status s = db->Write(woptions, _t->bat);
   logger->inc(l_rocksdb_txns);
   return s.ok() ? 0 : -1;
 }
@@ -150,9 +184,10 @@ int RocksDBStore::submit_transaction_sync(KeyValueDB::Transaction t)
 {
   RocksDBTransactionImpl * _t =
     static_cast<RocksDBTransactionImpl *>(t.get());
-  rocksdb::WriteOptions options;
-  options.sync = true;
-  rocksdb::Status s = db->Write(options, _t->bat);
+  rocksdb::WriteOptions woptions;
+  woptions.sync = true;
+  woptions.disableWAL = options.disableWAL;
+  rocksdb::Status s = db->Write(woptions, _t->bat);
   logger->inc(l_rocksdb_txns);
   return s.ok() ? 0 : -1;
 }
index 103f955863dfb054145ce535cf8c21b873be988c..1122abc38bd7857eec132f9dd341bd14b2ccebb1 100644 (file)
@@ -109,6 +109,10 @@ public:
    */
   struct options_t {
     uint64_t write_buffer_size; /// in-memory write buffer size
+    uint64_t write_buffer_num; /// in-memory write buffer number
+    uint64_t target_file_size_base; /// Target file size for compaction
+    int max_background_compactions; /// Maximum number of concurrent background compaction jobs
+    int max_background_flushes; /// Maximum number of concurrent background memtable flushea jobs
     int max_open_files; /// maximum number of files RocksDB can open at once
     uint64_t cache_size; /// size of extra decompressed cache to use
     uint64_t block_size; /// user data per block
@@ -119,8 +123,15 @@ public:
     int block_restart_interval;
     bool error_if_exists;
     bool paranoid_checks;
+    uint64_t level0_file_num_compaction_trigger;
+    uint64_t level0_slowdown_writes_trigger;
+    uint64_t level0_stop_writes_trigger;
+    bool disableDataSync;
+    bool disableWAL;
+    int num_levels;
 
     string log_file;
+    string wal_dir;
 
     options_t() :
       write_buffer_size(0), //< 0 means default
@@ -131,7 +142,13 @@ public:
       compression_type("none"), //< set to false for no compression
       block_restart_interval(0), //< 0 means default
       error_if_exists(false), //< set to true if you want to check nonexistence
-      paranoid_checks(false) //< set to true if you want paranoid checks
+      paranoid_checks(false), //< set to true if you want paranoid checks
+      level0_file_num_compaction_trigger(0),
+      level0_slowdown_writes_trigger(0),
+      level0_stop_writes_trigger(0),
+      disableDataSync(false),
+      disableWAL(false),
+      num_levels(0)
     {}
   } options;