]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
os/bluestore: make vselector reserved* parameters applicable in run-time 63642/head
authorIgor Fedotov <igor.fedotov@croit.io>
Mon, 2 Jun 2025 15:12:28 +0000 (18:12 +0300)
committerIgor Fedotov <igor.fedotov@croit.io>
Tue, 3 Jun 2025 21:57:48 +0000 (00:57 +0300)
Hence one can change 'bluestore_volume_selection_reserved' and
'bluestore_volume_selection_reserved_factor' on the fly

Signed-off-by: Igor Fedotov <igor.fedotov@croit.io>
src/os/bluestore/BlueFS.cc
src/os/bluestore/BlueFS.h
src/os/bluestore/BlueStore.cc
src/os/bluestore/BlueStore.h
src/test/objectstore/test_bluestore_vselector.cc

index a8f961074a50784f02405cc7a3857d02cc049db6..0a577f24bf6946ed92a3e599303176d7eebe4a68 100644 (file)
@@ -703,6 +703,7 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout)
         _get_block_device_size(BlueFS::BDEV_WAL) * 95 / 100,
         _get_block_device_size(BlueFS::BDEV_DB) * 95 / 100,
         _get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100));
+    vselector->update_from_config(cct);
   }
 
   _init_logger();
@@ -1070,6 +1071,7 @@ int BlueFS::mount()
         _get_block_device_size(BlueFS::BDEV_WAL) * 95 / 100,
         _get_block_device_size(BlueFS::BDEV_DB) * 95 / 100,
         _get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100));
+    vselector->update_from_config(cct);
   }
 
   _init_alloc();
index a7e12e02e737a4c486b37c7675522378f30640b7..730b6913ee4b5ce01de3bffcf2339b8f9828f9d9 100644 (file)
@@ -98,6 +98,13 @@ public:
 
   virtual ~BlueFSVolumeSelector() {
   }
+
+  /**
+  *  Update config parameters from the config database.
+  *
+  */
+  virtual void update_from_config(CephContext* cct) = 0;
+
   /**
   *  Method to learn a hint (aka logic level discriminator)  specific for
   *  BlueFS log
@@ -860,10 +867,16 @@ public:
     vselector.reset(s);
   }
   void dump_volume_selector(std::ostream& sout) {
+    ceph_assert(vselector);
     vselector->dump(sout);
   }
+  void update_volume_selector_from_config() {
+    ceph_assert(vselector);
+    vselector->update_from_config(cct);
+  }
   void get_vselector_paths(const std::string& base,
                            BlueFSVolumeSelector::paths& res) const {
+    ceph_assert(vselector);
     return vselector->get_paths(base, res);
   }
 
@@ -950,6 +963,7 @@ public:
     uint64_t _slow_total)
     : wal_total(_wal_total), db_total(_db_total), slow_total(_slow_total) {}
 
+  void update_from_config(CephContext* cct) override {}
   void* get_hint_for_log() const override;
   void* get_hint_by_dir(std::string_view dirname) const override;
 
index 841f9509bc15d19c6953848ccac0018726abbec8..6323c91071314d83a842d0b2709322fe930fa06a 100644 (file)
@@ -5743,7 +5743,9 @@ std::vector<std::string> BlueStore::get_tracked_keys() const noexcept
     "bluestore_warn_on_no_per_pg_omap"s,
     "bluestore_max_defer_interval"s,
     "bluestore_onode_segment_size"s,
-    "bluestore_allocator_lookup_policy"s
+    "bluestore_allocator_lookup_policy"s,
+    "bluestore_volume_selection_reserved_factor"s,
+    "bluestore_volume_selection_reserved"s
   };
 }
 
@@ -5818,6 +5820,11 @@ void BlueStore::handle_conf_change(const ConfigProxy& conf,
   if (changed.count("bluestore_allocator_lookup_policy")) {
     _update_allocator_lookup_policy();
   }
+  if (changed.count("bluestore_volume_selection_reserved_factor") ||
+    changed.count("bluestore_volume_selection_reserved")) {
+    if (bluefs)
+      bluefs->update_volume_selector_from_config();
+  }
 }
 
 void BlueStore::_set_compression()
@@ -7662,20 +7669,16 @@ int BlueStore::_open_bluefs(bool create, bool read_only)
         bluefs->get_block_device_size(BlueFS::BDEV_DB) * 95 / 100,
         bluefs->get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100);
     } else {
-      double reserved_factor = cct->_conf->bluestore_volume_selection_reserved_factor;
-      vselector =
-        new RocksDBBlueFSVolumeSelector(
-          bluefs->get_block_device_size(BlueFS::BDEV_WAL) * 95 / 100,
-          bluefs->get_block_device_size(BlueFS::BDEV_DB) * 95 / 100,
-          bluefs->get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100,
-         rocks_opts.write_buffer_size * rocks_opts.max_write_buffer_number,
-          rocks_opts.max_bytes_for_level_base,
-          rocks_opts.max_bytes_for_level_multiplier,
-          reserved_factor,
-          cct->_conf->bluestore_volume_selection_reserved,
-          cct->_conf->bluestore_volume_selection_policy.find("use_some_extra")
-             == 0);
+      vselector = new RocksDBBlueFSVolumeSelector(
+       bluefs->get_block_device_size(BlueFS::BDEV_WAL) * 95 / 100,
+       bluefs->get_block_device_size(BlueFS::BDEV_DB) * 95 / 100,
+       bluefs->get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100,
+       rocks_opts.write_buffer_size * rocks_opts.max_write_buffer_number,
+       rocks_opts.max_bytes_for_level_base,
+       rocks_opts.max_bytes_for_level_multiplier,
+       cct->_conf->bluestore_volume_selection_policy.find("use_some_extra") == 0);
     }    
+    vselector->update_from_config(cct);
   }
   if (create) {
     bluefs->mkfs(fsid, bluefs_layout);
@@ -19740,9 +19743,7 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) {
 
 BlueFSVolumeSelector* RocksDBBlueFSVolumeSelector::clone_empty() const {
   RocksDBBlueFSVolumeSelector* ns =
-    new RocksDBBlueFSVolumeSelector(0, 0, 0,
-                                   0, 0, 0,
-                                   0, 0, false);
+    new RocksDBBlueFSVolumeSelector(0, 0, 0, 0, 0, 0, false);
   return ns;
 }
 
index 373eefe4caaf38903e249aa05a902e13700e2a4a..200ff3bab4a30906357e159321f6ebaaa12e1d8c 100644 (file)
@@ -4471,6 +4471,7 @@ class RocksDBBlueFSVolumeSelector : public BlueFSVolumeSelector
   uint64_t level0_size = 0;
   uint64_t level_base = 0;
   uint64_t level_multiplier = 0;
+  bool new_pol = false;
   size_t extra_level = 0;
   enum {
     OLD_POLICY,
@@ -4485,37 +4486,49 @@ public:
     uint64_t _level0_size,
     uint64_t _level_base,
     uint64_t _level_multiplier,
-    double reserved_factor,
-    uint64_t reserved,
-    bool new_pol)
-  {
+    bool _new_pol) {
+
     l_totals[LEVEL_LOG - LEVEL_FIRST] = 0; // not used at the moment
     l_totals[LEVEL_WAL - LEVEL_FIRST] = _wal_total;
     l_totals[LEVEL_DB - LEVEL_FIRST] = _db_total;
     l_totals[LEVEL_SLOW - LEVEL_FIRST] = _slow_total;
 
+    level0_size = _level0_size;
+    level_base = _level_base;
+    level_multiplier = _level_multiplier;
+
+    new_pol = _new_pol;
+  }
+
+  void update_from_config(CephContext* cct) override
+  {
     if (!new_pol) {
       return;
     }
+
+    db_avail4slow = 0;
+    extra_level = 0;
+    double reserved_factor =
+      cct->_conf->bluestore_volume_selection_reserved_factor;
+    uint64_t reserved = cct->_conf->bluestore_volume_selection_reserved;
+
+    auto db_total = l_totals[LEVEL_DB - LEVEL_FIRST];
     // Calculating how much extra space is available at DB volume.
     // Depending on the presence of explicit reserved size specification it might be either
     // * DB volume size - reserved
     // or
     // * DB volume size - sum_max_level_size(0, L-1) - max_level_size(L) * reserved_factor
     if (!reserved) {
-      level0_size = _level0_size;
-      level_base = _level_base;
-      level_multiplier = _level_multiplier;
-      uint64_t prev_levels = _level0_size;
-      uint64_t cur_level = _level_base;
+      uint64_t prev_levels = level0_size;
+      uint64_t cur_level = level_base;
       extra_level = 1;
       do {
-       uint64_t next_level = cur_level * _level_multiplier;
+       uint64_t next_level = cur_level * level_multiplier;
         uint64_t next_threshold = prev_levels + cur_level + next_level;
         ++extra_level;
-        if (_db_total <= next_threshold) {
+        if (db_total <= next_threshold) {
          uint64_t cur_threshold = prev_levels + cur_level * reserved_factor;
-          db_avail4slow = cur_threshold < _db_total ? _db_total - cur_threshold : 0;
+          db_avail4slow = cur_threshold < db_total ? db_total - cur_threshold : 0;
           break;
         } else {
           prev_levels += cur_level;
@@ -4523,7 +4536,7 @@ public:
         }
       } while (true);
     } else {
-      db_avail4slow = reserved < _db_total ? _db_total - reserved : 0;
+      db_avail4slow = reserved < db_total ? db_total - reserved : 0;
       extra_level = 0;
     }
   }
index 35987df4d4826ee6f158c52751d597edb393298a..113d47d3caa24841e40ee79a1d0edb748256c4d3 100644 (file)
@@ -22,10 +22,9 @@ TEST(rocksdb_bluefs_vselector, basic) {
     1ull << 30,
     level_base,
     level_multi,
-    g_ceph_context->_conf->bluestore_volume_selection_reserved_factor,
-    g_ceph_context->_conf->bluestore_volume_selection_reserved,
     g_ceph_context->_conf->bluestore_volume_selection_policy.find("use_some_extra")
       == 0);
+  selector.update_from_config(g_ceph_context);
 
   // taken from RocksDBBlueFSVolumeSelector::
   size_t log_bdev = 1; // LEVEL_LOG