]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: Added WithoutSlowDirVolumeSelector as a method to avoid spillover
authorKinga Karczewska <kkarczewska@cloudferro.com>
Tue, 25 Aug 2020 08:38:34 +0000 (10:38 +0200)
committerKinga Karczewska <kkarczewska@cloudferro.com>
Thu, 24 Sep 2020 12:14:56 +0000 (14:14 +0200)
The commit adds new volume selector WithoutSlowDirVolumeSelector, which can be enabled by specyfying bluestore_volume_selection_policy='without_slow_dir'. It can be used to avoid spillover from faster to slower disk.
It passes to RocksDB information about only one path, which transfers responsability of choosing faster/slower device for 'db' data from database to allocator. In addition it enabled the user to turn on level_compaction_dynamic_level_bytes in RocksDB options.

Signed-off-by: Kinga Karczewska <kkarczewska@cloudferro.com>
Signed-off-by: Kajetan Janiak <kjaniak@cloudferro.com>
src/common/options.cc
src/os/bluestore/BlueFS.cc
src/os/bluestore/BlueFS.h
src/os/bluestore/BlueStore.cc

index 7d0e70b3bdda53995cd974e591c53e16240f0952..f89cac95591c269bcea4d70c6e1e7d48c388d55f 100644 (file)
@@ -4687,9 +4687,12 @@ std::vector<Option> get_global_options() {
 
     Option("bluestore_volume_selection_policy", Option::TYPE_STR, Option::LEVEL_DEV)
     .set_default("use_some_extra")
-    .set_enum_allowed({ "rocksdb_original", "use_some_extra" })
+    .set_enum_allowed({ "rocksdb_original", "use_some_extra", "without_slow_dir" })
     .set_description("Determines bluefs volume selection policy")
-    .set_long_description("Determines bluefs volume selection policy. 'use_some_extra' policy allows to override RocksDB level granularity and put high level's data to faster device even when the level doesn't completely fit there"),
+    .set_long_description("Determines bluefs volume selection policy. 'use_some_extra' policy allows to override RocksDB level "
+                          "granularity and put high level's data to faster device even when the level doesn't completely fit there. "
+                          "'without_slow_dir' policy enables using 100% of faster disk capacity and allows the user to turn on "
+                          "'level_compaction_dynamic_level_bytes' option in RocksDB options."),
 
     Option("bluestore_volume_selection_reserved_factor", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
       .set_flag(Option::FLAG_STARTUP)
index 6c95d845d7d0d4e0c6250fdf5c17afdf30d35ab7..084d70992fa98ab15a0f77ca1cf8cb05df82f5e4 100644 (file)
@@ -3594,3 +3594,10 @@ void OriginalVolumeSelector::dump(ostream& sout) {
     << ", slow_total:" << slow_total
     << std::endl;
 }
+
+// ===============================================
+// WithoutSlowDirVolumeSelector
+
+void WithoutSlowDirVolumeSelector::get_paths(const std::string& base, paths& res) const {
+  res.emplace_back(base, 1);  // size of the last db_path has no effect
+}
index d7ac37488deba7037c35192aeda663c5f7755dcb..0f1816dc8520da91c58ccf9c82bd0be574362014 100644 (file)
@@ -637,4 +637,15 @@ public:
   void dump(std::ostream& sout) override;
 };
 
+class WithoutSlowDirVolumeSelector : public OriginalVolumeSelector {
+public:
+  WithoutSlowDirVolumeSelector(
+    uint64_t _wal_total,
+    uint64_t _db_total,
+    uint64_t _slow_total)
+    : OriginalVolumeSelector(_wal_total, _db_total, _slow_total) {}
+
+  void get_paths(const std::string& base, paths& res) const override;
+};
+
 #endif
index 9a7cc2e4aec3f389da452f3dec4ac429a4bfedcf..f97df6f9f6993e7707aa575fc06e730f708d57de 100644 (file)
@@ -5601,13 +5601,13 @@ int BlueStore::_open_bluefs(bool create, bool read_only)
   if (r < 0) {
     return r;
   }
-  RocksDBBlueFSVolumeSelector* vselector = nullptr;
+  BlueFSVolumeSelector* vselector = nullptr;
   if (bluefs_layout.shared_bdev == BlueFS::BDEV_SLOW) {
 
     string options = cct->_conf->bluestore_rocksdb_options;
 
     rocksdb::Options rocks_opts;
-    int r = RocksDBStore::ParseOptionsFromStringStatic(
+    r = RocksDBStore::ParseOptionsFromStringStatic(
       cct,
       options,
       rocks_opts,
@@ -5615,19 +5615,25 @@ int BlueStore::_open_bluefs(bool create, bool read_only)
     if (r < 0) {
       return r;
     }
-
-    double reserved_factor = cct->_conf->bluestore_volume_selection_reserved_factor;
-    vselector =
-      new RocksDBBlueFSVolumeSelector(
+    if (cct->_conf->bluestore_volume_selection_policy == "without_slow_dir") {
+      vselector = new WithoutSlowDirVolumeSelector(
         bluefs->get_block_device_size(BlueFS::BDEV_WAL) * 95 / 100,
         bluefs->get_block_device_size(BlueFS::BDEV_DB) * 95 / 100,
-        bluefs->get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100,
-        1024 * 1024 * 1024, //FIXME: set expected l0 size here
-        rocks_opts.max_bytes_for_level_base,
-        rocks_opts.max_bytes_for_level_multiplier,
-        reserved_factor,
-        cct->_conf->bluestore_volume_selection_reserved,
-        cct->_conf->bluestore_volume_selection_policy != "rocksdb_original");
+        bluefs->get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100);
+    } else {
+      double reserved_factor = cct->_conf->bluestore_volume_selection_reserved_factor;
+      vselector =
+        new RocksDBBlueFSVolumeSelector(
+          bluefs->get_block_device_size(BlueFS::BDEV_WAL) * 95 / 100,
+          bluefs->get_block_device_size(BlueFS::BDEV_DB) * 95 / 100,
+          bluefs->get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100,
+          1024 * 1024 * 1024, //FIXME: set expected l0 size here
+          rocks_opts.max_bytes_for_level_base,
+          rocks_opts.max_bytes_for_level_multiplier,
+          reserved_factor,
+          cct->_conf->bluestore_volume_selection_reserved,
+          cct->_conf->bluestore_volume_selection_policy == "use_some_extra");
+    }    
   }
   if (create) {
     bluefs->mkfs(fsid, bluefs_layout);