Merge pull request #4717 from athanatos/wip-8635

author Samuel Just <sam.just@inktank.com>

Thu, 4 Jun 2015 18:25:12 +0000 (11:25 -0700)

committer Samuel Just <sam.just@inktank.com>

Thu, 4 Jun 2015 18:25:12 +0000 (11:25 -0700)
author Samuel Just <sam.just@inktank.com>
Thu, 4 Jun 2015 18:25:12 +0000 (11:25 -0700)
committer Samuel Just <sam.just@inktank.com>
Thu, 4 Jun 2015 18:25:12 +0000 (11:25 -0700)
diff --cc src/common/config_opts.h

index 8c5a2bf291d04413079bea7ee9d325d4ffd58280,998e68c61b5cb0bc8612908a5a850ee25a88abe6..d762a2c9de3667b63dd1d0a1cb27718c0eeb53be
--- 1/src/common/config_opts.h
--- 2/src/common/config_opts.h
+++ b/src/common/config_opts.h
@@@ -708,23 -702,47 +703,20 @@@ OPTION(kinetic_hmac_key, OPT_STR, "asdf
   OPTION(kinetic_use_ssl, OPT_BOOL, false) // whether to secure kinetic traffic with TLS
   
   
- -//in memory write buffer configuration
- -OPTION(rocksdb_write_buffer_size, OPT_U64, 8*1024*1024) // rocksdb write buffer size, should be larger than average write size.
- -OPTION(rocksdb_write_buffer_num, OPT_INT, 2) // The maximum number of write buffers that are built up in memory.
- -OPTION(rocksdb_min_write_buffer_number_to_merge, OPT_INT, 1) // The min write buffers that will be merged together before writing to storage.
- -//on disk level0 configuration
- -OPTION(rocksdb_level0_file_num_compaction_trigger, OPT_INT, 4) // Number of files to trigger level-0 compaction
- -OPTION(rocksdb_level0_slowdown_writes_trigger, OPT_INT, -1)  // number of level-0 files at which we start slowing down write. -1 means not set.
- -OPTION(rocksdb_level0_stop_writes_trigger, OPT_INT, -1)  // number of level-0 files at which we stop writes. -1 means not set.
- -//on disk level1+ configuration
- -OPTION(rocksdb_max_bytes_for_level_base, OPT_U64, 10*1024*1024)  // max total bytes for level 1
- -OPTION(rocksdb_max_bytes_for_level_multiplier, OPT_INT, 10)  // max total bytes for level 1
- -OPTION(rocksdb_target_file_size_base, OPT_U64, 2*1024*1024) // target file size for level 1
- -OPTION(rocksdb_target_file_size_multiplier, OPT_INT, 1) // target file size for Level-N = (multiplier)^(N-1) * file_size_base
- -OPTION(rocksdb_num_levels, OPT_INT, 7) // number of levels for this database,chang
- -OPTION(rocksdb_cache_size, OPT_U64, 0) // rocksdb cache size
- -OPTION(rocksdb_block_size, OPT_U64, 4*1024) // rocksdb block size
- -OPTION(rocksdb_bloom_bits_per_key, OPT_INT, 10) // rocksdb bloom bits per entry
- -//concurrency of compaction and flush
- -OPTION(rocksdb_max_background_compactions, OPT_INT, 1) // number for background compaction jobs
- -OPTION(rocksdb_compaction_threads, OPT_INT, 1) // number for background compaction jobs
- -OPTION(rocksdb_max_background_flushes, OPT_INT, 1) // number for background flush jobs
- -OPTION(rocksdb_flusher_threads, OPT_INT, 1) // number for background compaction jobs
- -//Other
- -OPTION(rocksdb_max_open_files, OPT_INT, 5000) // rocksdb max open files
- -OPTION(rocksdb_compression, OPT_STR, "snappy") // rocksdb uses compression : none, snappy, zlib, bzip2
- -OPTION(rocksdb_compact_on_mount, OPT_BOOL, false)
- -OPTION(rocksdb_paranoid, OPT_BOOL, false) // RocksDB will aggressively check consistency of the data.
- -OPTION(rocksdb_log, OPT_STR, "/dev/null")  // enable rocksdb log file
- -OPTION(rocksdb_info_log_level, OPT_STR, "info")  // info log level : debug , info , warn, error, fatal
- -OPTION(rocksdb_wal_dir, OPT_STR, "")  //  rocksdb write ahead log file, put it to fast device will benifit wrtie performance
- -OPTION(rocksdb_disableDataSync, OPT_BOOL, false) // if true, data files are not synced to stable storage
- -OPTION(rocksdb_disableWAL, OPT_BOOL, false)  // if true, writes will not first go to the write ahead log
- -
+ +// rocksdb options that will be used for keyvaluestore(if backend is rocksdb)
+ +OPTION(keyvaluestore_rocksdb_options, OPT_STR, "")
+ +// rocksdb options that will be used for omap(if omap_backend is rocksdb)
+ +OPTION(filestore_rocksdb_options, OPT_STR, "")
+ +// rocksdb options that will be used in monstore
+ +OPTION(mon_rocksdb_options, OPT_STR, "")
   
   /**
-  * osd_client_op_priority and osd_recovery_op_priority adjust the relative
-  * priority of client io vs recovery io.
+  * osd_*_priority adjust the relative priority of client io, recovery io,
+  * snaptrim io, etc
    *
-  * osd_client_op_priority/osd_recovery_op_priority determines the ratio of
-  * available io between client and recovery.  Each option may be set between
+  * osd_*_priority determines the ratio of available io between client and
+  * recovery.  Each option may be set between
    * 1..63.
-  *
-  * osd_recovery_op_warn_multiple scales the normal warning threshhold,
-  * osd_op_complaint_time, so that slow recovery ops won't cause noise
    */
   OPTION(osd_client_op_priority, OPT_U32, 63)
   OPTION(osd_recovery_op_priority, OPT_U32, 10)
diff --cc src/osd/OSD.cc
Simple merge
diff --cc src/osd/OSD.h

index 2cf6819f596b836519696e36ef3d919e7ee85739,df968594edf641526bbe29bc9a891661f75d6b06..85452057ca380f6afac510e3e95dc5a85489acc1
--- 1/src/osd/OSD.h
--- 2/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@@ -2092,150 -2175,9 +2189,9 @@@ protected
     // -- scrubbing --
     void sched_scrub();
     bool scrub_random_backoff();
- -  bool scrub_should_schedule();
+ +  bool scrub_load_below_threshold();
     bool scrub_time_permit(utime_t now);
   
-   xlist<PG*> scrub_queue;
- 
-   struct ScrubWQ : public ThreadPool::WorkQueue<PG> {
-     OSD *osd;
-     ScrubWQ(OSD *o, time_t ti, time_t si, ThreadPool *tp)
-       : ThreadPool::WorkQueue<PG>("OSD::ScrubWQ", ti, si, tp), osd(o) {}
- 
-     bool _empty() {
-       return osd->scrub_queue.empty();
-     }
-     bool _enqueue(PG *pg) {
-       if (pg->scrub_item.is_on_list()) {
-       return false;
-       }
-       pg->get("ScrubWQ");
-       osd->scrub_queue.push_back(&pg->scrub_item);
-       return true;
-     }
-     void _dequeue(PG *pg) {
-       if (pg->scrub_item.remove_myself()) {
-       pg->put("ScrubWQ");
-       }
-     }
-     PG *_dequeue() {
-       if (osd->scrub_queue.empty())
-       return NULL;
-       PG *pg = osd->scrub_queue.front();
-       osd->scrub_queue.pop_front();
-       return pg;
-     }
-     void _process(
-       PG *pg,
-       ThreadPool::TPHandle &handle) {
-       pg->scrub(handle);
-       pg->put("ScrubWQ");
-     }
-     void _clear() {
-       while (!osd->scrub_queue.empty()) {
-       PG *pg = osd->scrub_queue.front();
-       osd->scrub_queue.pop_front();
-       pg->put("ScrubWQ");
-       }
-     }
-   } scrub_wq;
- 
-   struct RepScrubWQ : public ThreadPool::WorkQueue<MOSDRepScrub> {
-   private: 
-     OSD *osd;
-     list<MOSDRepScrub*> rep_scrub_queue;
- 
-   public:
-     RepScrubWQ(OSD *o, time_t ti, time_t si, ThreadPool *tp)
-       : ThreadPool::WorkQueue<MOSDRepScrub>("OSD::RepScrubWQ", ti, si, tp), osd(o) {}
- 
-     bool _empty() {
-       return rep_scrub_queue.empty();
-     }
-     bool _enqueue(MOSDRepScrub *msg) {
-       rep_scrub_queue.push_back(msg);
-       return true;
-     }
-     void _dequeue(MOSDRepScrub *msg) {
-       assert(0); // Not applicable for this wq
-       return;
-     }
-     MOSDRepScrub *_dequeue() {
-       if (rep_scrub_queue.empty())
-       return NULL;
-       MOSDRepScrub *msg = rep_scrub_queue.front();
-       rep_scrub_queue.pop_front();
-       return msg;
-     }
-     void _process(
-       MOSDRepScrub *msg,
-       ThreadPool::TPHandle &handle) {
-       PG *pg = NULL;
-       {
-       Mutex::Locker lock(osd->osd_lock);
-       if (osd->is_stopping() ||
-           !osd->_have_pg(msg->pgid)) {
-         msg->put();
-         return;
-       }
-       pg = osd->_lookup_lock_pg(msg->pgid);
-       }
-       assert(pg);
-       pg->replica_scrub(msg, handle);
-       msg->put();
-       pg->unlock();
-     }
-     void _clear() {
-       while (!rep_scrub_queue.empty()) {
-       MOSDRepScrub *msg = rep_scrub_queue.front();
-       rep_scrub_queue.pop_front();
-       msg->put();
-       }
-     }
-   } rep_scrub_wq;
- 
     // -- removing --
     struct RemoveWQ :
       public ThreadPool::WorkQueueVal<pair<PGRef, DeletingStateRef> > {
diff --cc src/osd/PG.cc
Simple merge
diff --cc src/osd/ReplicatedPG.cc
Simple merge
diff --cc src/osd/ReplicatedPG.h
Simple merge
author	Samuel Just <sam.just@inktank.com>
	Thu, 4 Jun 2015 18:25:12 +0000 (11:25 -0700)
committer	Samuel Just <sam.just@inktank.com>
	Thu, 4 Jun 2015 18:25:12 +0000 (11:25 -0700)
		1	2
src/common/config_opts.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/osd/OSD.cc	patch \|	diff1 \|	diff2 \|	blob \| history
src/osd/OSD.h	patch \|	diff1 \|	diff2 \|	blob \| history
src/osd/PG.cc	patch \|	diff1 \|	diff2 \|	blob \| history
src/osd/ReplicatedPG.cc	patch \|	diff1 \|	diff2 \|	blob \| history
src/osd/ReplicatedPG.h	patch \|	diff1 \|	diff2 \|	blob \| history