From 41ecee36cb9ce986656bd6910bf56caf917b99d0 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Mon, 2 Jun 2025 18:12:28 +0300 Subject: [PATCH] os/bluestore: make vselector reserved* parameters applicable in run-time Hence one can change 'bluestore_volume_selection_reserved' and 'bluestore_volume_selection_reserved_factor' on the fly Signed-off-by: Igor Fedotov --- src/os/bluestore/BlueFS.cc | 2 + src/os/bluestore/BlueFS.h | 14 +++++++ src/os/bluestore/BlueStore.cc | 35 +++++++++-------- src/os/bluestore/BlueStore.h | 39 ++++++++++++------- .../objectstore/test_bluestore_vselector.cc | 3 +- 5 files changed, 61 insertions(+), 32 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index a8f961074a507..0a577f24bf694 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -703,6 +703,7 @@ int BlueFS::mkfs(uuid_d osd_uuid, const bluefs_layout_t& layout) _get_block_device_size(BlueFS::BDEV_WAL) * 95 / 100, _get_block_device_size(BlueFS::BDEV_DB) * 95 / 100, _get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100)); + vselector->update_from_config(cct); } _init_logger(); @@ -1070,6 +1071,7 @@ int BlueFS::mount() _get_block_device_size(BlueFS::BDEV_WAL) * 95 / 100, _get_block_device_size(BlueFS::BDEV_DB) * 95 / 100, _get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100)); + vselector->update_from_config(cct); } _init_alloc(); diff --git a/src/os/bluestore/BlueFS.h b/src/os/bluestore/BlueFS.h index a7e12e02e737a..730b6913ee4b5 100644 --- a/src/os/bluestore/BlueFS.h +++ b/src/os/bluestore/BlueFS.h @@ -98,6 +98,13 @@ public: virtual ~BlueFSVolumeSelector() { } + + /** + * Update config parameters from the config database. + * + */ + virtual void update_from_config(CephContext* cct) = 0; + /** * Method to learn a hint (aka logic level discriminator) specific for * BlueFS log @@ -860,10 +867,16 @@ public: vselector.reset(s); } void dump_volume_selector(std::ostream& sout) { + ceph_assert(vselector); vselector->dump(sout); } + void update_volume_selector_from_config() { + ceph_assert(vselector); + vselector->update_from_config(cct); + } void get_vselector_paths(const std::string& base, BlueFSVolumeSelector::paths& res) const { + ceph_assert(vselector); return vselector->get_paths(base, res); } @@ -950,6 +963,7 @@ public: uint64_t _slow_total) : wal_total(_wal_total), db_total(_db_total), slow_total(_slow_total) {} + void update_from_config(CephContext* cct) override {} void* get_hint_for_log() const override; void* get_hint_by_dir(std::string_view dirname) const override; diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 841f9509bc15d..6323c91071314 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -5743,7 +5743,9 @@ std::vector BlueStore::get_tracked_keys() const noexcept "bluestore_warn_on_no_per_pg_omap"s, "bluestore_max_defer_interval"s, "bluestore_onode_segment_size"s, - "bluestore_allocator_lookup_policy"s + "bluestore_allocator_lookup_policy"s, + "bluestore_volume_selection_reserved_factor"s, + "bluestore_volume_selection_reserved"s }; } @@ -5818,6 +5820,11 @@ void BlueStore::handle_conf_change(const ConfigProxy& conf, if (changed.count("bluestore_allocator_lookup_policy")) { _update_allocator_lookup_policy(); } + if (changed.count("bluestore_volume_selection_reserved_factor") || + changed.count("bluestore_volume_selection_reserved")) { + if (bluefs) + bluefs->update_volume_selector_from_config(); + } } void BlueStore::_set_compression() @@ -7662,20 +7669,16 @@ int BlueStore::_open_bluefs(bool create, bool read_only) bluefs->get_block_device_size(BlueFS::BDEV_DB) * 95 / 100, bluefs->get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100); } else { - double reserved_factor = cct->_conf->bluestore_volume_selection_reserved_factor; - vselector = - new RocksDBBlueFSVolumeSelector( - bluefs->get_block_device_size(BlueFS::BDEV_WAL) * 95 / 100, - bluefs->get_block_device_size(BlueFS::BDEV_DB) * 95 / 100, - bluefs->get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100, - rocks_opts.write_buffer_size * rocks_opts.max_write_buffer_number, - rocks_opts.max_bytes_for_level_base, - rocks_opts.max_bytes_for_level_multiplier, - reserved_factor, - cct->_conf->bluestore_volume_selection_reserved, - cct->_conf->bluestore_volume_selection_policy.find("use_some_extra") - == 0); + vselector = new RocksDBBlueFSVolumeSelector( + bluefs->get_block_device_size(BlueFS::BDEV_WAL) * 95 / 100, + bluefs->get_block_device_size(BlueFS::BDEV_DB) * 95 / 100, + bluefs->get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100, + rocks_opts.write_buffer_size * rocks_opts.max_write_buffer_number, + rocks_opts.max_bytes_for_level_base, + rocks_opts.max_bytes_for_level_multiplier, + cct->_conf->bluestore_volume_selection_policy.find("use_some_extra") == 0); } + vselector->update_from_config(cct); } if (create) { bluefs->mkfs(fsid, bluefs_layout); @@ -19740,9 +19743,7 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) { BlueFSVolumeSelector* RocksDBBlueFSVolumeSelector::clone_empty() const { RocksDBBlueFSVolumeSelector* ns = - new RocksDBBlueFSVolumeSelector(0, 0, 0, - 0, 0, 0, - 0, 0, false); + new RocksDBBlueFSVolumeSelector(0, 0, 0, 0, 0, 0, false); return ns; } diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 373eefe4caaf3..200ff3bab4a30 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -4471,6 +4471,7 @@ class RocksDBBlueFSVolumeSelector : public BlueFSVolumeSelector uint64_t level0_size = 0; uint64_t level_base = 0; uint64_t level_multiplier = 0; + bool new_pol = false; size_t extra_level = 0; enum { OLD_POLICY, @@ -4485,37 +4486,49 @@ public: uint64_t _level0_size, uint64_t _level_base, uint64_t _level_multiplier, - double reserved_factor, - uint64_t reserved, - bool new_pol) - { + bool _new_pol) { + l_totals[LEVEL_LOG - LEVEL_FIRST] = 0; // not used at the moment l_totals[LEVEL_WAL - LEVEL_FIRST] = _wal_total; l_totals[LEVEL_DB - LEVEL_FIRST] = _db_total; l_totals[LEVEL_SLOW - LEVEL_FIRST] = _slow_total; + level0_size = _level0_size; + level_base = _level_base; + level_multiplier = _level_multiplier; + + new_pol = _new_pol; + } + + void update_from_config(CephContext* cct) override + { if (!new_pol) { return; } + + db_avail4slow = 0; + extra_level = 0; + double reserved_factor = + cct->_conf->bluestore_volume_selection_reserved_factor; + uint64_t reserved = cct->_conf->bluestore_volume_selection_reserved; + + auto db_total = l_totals[LEVEL_DB - LEVEL_FIRST]; // Calculating how much extra space is available at DB volume. // Depending on the presence of explicit reserved size specification it might be either // * DB volume size - reserved // or // * DB volume size - sum_max_level_size(0, L-1) - max_level_size(L) * reserved_factor if (!reserved) { - level0_size = _level0_size; - level_base = _level_base; - level_multiplier = _level_multiplier; - uint64_t prev_levels = _level0_size; - uint64_t cur_level = _level_base; + uint64_t prev_levels = level0_size; + uint64_t cur_level = level_base; extra_level = 1; do { - uint64_t next_level = cur_level * _level_multiplier; + uint64_t next_level = cur_level * level_multiplier; uint64_t next_threshold = prev_levels + cur_level + next_level; ++extra_level; - if (_db_total <= next_threshold) { + if (db_total <= next_threshold) { uint64_t cur_threshold = prev_levels + cur_level * reserved_factor; - db_avail4slow = cur_threshold < _db_total ? _db_total - cur_threshold : 0; + db_avail4slow = cur_threshold < db_total ? db_total - cur_threshold : 0; break; } else { prev_levels += cur_level; @@ -4523,7 +4536,7 @@ public: } } while (true); } else { - db_avail4slow = reserved < _db_total ? _db_total - reserved : 0; + db_avail4slow = reserved < db_total ? db_total - reserved : 0; extra_level = 0; } } diff --git a/src/test/objectstore/test_bluestore_vselector.cc b/src/test/objectstore/test_bluestore_vselector.cc index 35987df4d4826..113d47d3caa24 100644 --- a/src/test/objectstore/test_bluestore_vselector.cc +++ b/src/test/objectstore/test_bluestore_vselector.cc @@ -22,10 +22,9 @@ TEST(rocksdb_bluefs_vselector, basic) { 1ull << 30, level_base, level_multi, - g_ceph_context->_conf->bluestore_volume_selection_reserved_factor, - g_ceph_context->_conf->bluestore_volume_selection_reserved, g_ceph_context->_conf->bluestore_volume_selection_policy.find("use_some_extra") == 0); + selector.update_from_config(g_ceph_context); // taken from RocksDBBlueFSVolumeSelector:: size_t log_bdev = 1; // LEVEL_LOG -- 2.47.3