From f6a9ee72d70176212985efca8ef9d1cf3a1c52a0 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Mon, 25 Apr 2022 12:29:49 -0700 Subject: [PATCH] bluestore: Revert "os/bluestore: Add CoDel to BlueStore for Bufferbloat mitigation" Merge caused: https://tracker.ceph.com/issues/55433 Signed-off-by: Samuel Just --- doc/dev/bluestore-codel.rst | 41 --- qa/objectstore_debug/bluestore-codel.yaml | 45 --- src/common/options/global.yaml.in | 69 ----- src/common/regression_utils.h | 127 -------- src/crimson/os/alienstore/CMakeLists.txt | 1 - src/os/CMakeLists.txt | 1 - src/os/bluestore/BlueStore.cc | 42 --- src/os/bluestore/BlueStore.h | 10 - src/os/bluestore/BlueStoreSlowFastCoDel.cc | 272 ------------------ src/os/bluestore/BlueStoreSlowFastCoDel.h | 128 --------- src/test/common/CMakeLists.txt | 5 - src/test/common/test_regression_utils.cc | 89 ------ src/test/os/CMakeLists.txt | 1 - src/test/os/bluestore/CMakeLists.txt | 15 - .../bluestore/TestBlueStoreSlowFastCoDel.cc | 191 ------------ 15 files changed, 1037 deletions(-) delete mode 100644 doc/dev/bluestore-codel.rst delete mode 100644 qa/objectstore_debug/bluestore-codel.yaml delete mode 100644 src/common/regression_utils.h delete mode 100644 src/os/bluestore/BlueStoreSlowFastCoDel.cc delete mode 100644 src/os/bluestore/BlueStoreSlowFastCoDel.h delete mode 100644 src/test/common/test_regression_utils.cc delete mode 100644 src/test/os/bluestore/CMakeLists.txt delete mode 100644 src/test/os/bluestore/TestBlueStoreSlowFastCoDel.cc diff --git a/doc/dev/bluestore-codel.rst b/doc/dev/bluestore-codel.rst deleted file mode 100644 index 4aa0154456b..00000000000 --- a/doc/dev/bluestore-codel.rst +++ /dev/null @@ -1,41 +0,0 @@ -============================================ -BlueStore Bufferbloat Mitigation Using CoDel -============================================ - - -Introduction ------------- -Bufferbloat happens when a frontend buffer too much data to a backend. -This can introduce latency spikes to the backend and compromise the -request schedulability of the frontend. - -BlueStore has the bufferbloat problem due to its large queue. All -write requests are submitted immediately to BlueStore to achieve high -performance. However, this can compromise request schedulability in OSD. -As a solution, the CoDel algorithm is implemented in the BlueStore as -an admission control system to control the amount of transaction -submitted to BlueStore. This mechanism will negatively impact the -throughput of BlueStore. However, a tradeoff parameter has been introduced -to control BlueStore throughput loss versus BlueStore latency decrease. - -Configurations --------------- -CoDel can be enabled using "*bluestore_codel*" config. The other important -config that needs to be set is "*bluestore_codel_throughput_latency_tradeoff*". -This config adjust the tradeoff between BlueStore throughput loss and -BlueStore latency decrease. This parameter defines the amount of throughput -loss in MB/s for one ms decrease in BlueStore latency. For example, a value -of 5 means that we are willing to lose maximum of 5 MB/s of throughput for -every 1 ms decrease in BlueStore latency. - -Experiments ------------ -For measuring the impact of BlueStore CoDel on BlueStore, we measured the -transaction latency inside the BlueStore (BlueStore latency) and BlueStore -throughput. We compared this measurements with measurements from Vanilla BlueStore. -These experiments shows that: - -1. The BlueStore CoDel can decrease the BlueStore latency by small and controllable -impact on throughput. -2. The BlueStore CoDel can react to workload changes to keep the desired tradeoff -between latency and throughput. diff --git a/qa/objectstore_debug/bluestore-codel.yaml b/qa/objectstore_debug/bluestore-codel.yaml deleted file mode 100644 index 40f6fc084e1..00000000000 --- a/qa/objectstore_debug/bluestore-codel.yaml +++ /dev/null @@ -1,45 +0,0 @@ -overrides: - thrashosds: - bdev_inject_crash: 2 - bdev_inject_crash_probability: .5 - ceph: - fs: xfs - conf: - osd: - osd objectstore: bluestore - bluestore block size: 96636764160 - debug bluestore: 20 - debug bluefs: 20 - debug rocksdb: 10 - bluestore fsck on mount: true - bluestore allocator: bitmap - # lower the full ratios since we can fill up a 100gb osd so quickly - mon osd full ratio: .9 - mon osd backfillfull_ratio: .85 - mon osd nearfull ratio: .8 - osd failsafe full ratio: .95 - # this doesn't work with failures bc the log writes are not atomic across the two backends - # bluestore bluefs env mirror: true - bdev enable discard: true - bdev async discard: true - bluestore codel: true - ceph-deploy: - fs: xfs - bluestore: yes - conf: - osd: - osd objectstore: bluestore - bluestore block size: 96636764160 - debug bluestore: 20 - debug bluefs: 20 - debug rocksdb: 10 - bluestore fsck on mount: true - # lower the full ratios since we can fill up a 100gb osd so quickly - mon osd full ratio: .9 - mon osd backfillfull_ratio: .85 - mon osd nearfull ratio: .8 - osd failsafe full ratio: .95 - bdev enable discard: true - bdev async discard: true - bluestore codel: true - diff --git a/src/common/options/global.yaml.in b/src/common/options/global.yaml.in index 66c180b7b24..bf20361ed06 100644 --- a/src/common/options/global.yaml.in +++ b/src/common/options/global.yaml.in @@ -6312,72 +6312,3 @@ options: default: 0 services: - mgr -- name: bluestore_codel - type: bool - level: advanced - desc: enable/disable bluestore SlowFastCodel - default: false - with_legacy: true -- name: bluestore_codel_throughput_latency_tradeoff - type: float - level: advanced - desc: adjust the tradeoff between throughput and bluestore latency in SlowFastCodel - long_desc: This parameter defines the amount of throughput loss (MB/s) for one ms - decrease in bluestore latency. (a value of 5 means that we are willing to lose - maximum of 5 MB/s of throughput for every 1 ms decrease in bluestore latency) - default: 5 - with_legacy: true -- name: bluestore_codel_initial_target_latency - type: float - level: advanced - desc: initial target latency for SlowFastCodel in ms - default: 5.0 - with_legacy: true -- name: bluestore_codel_slow_interval - type: float - level: advanced - desc: the interval of slow loop in SlowFastCodel in ms (this parameter should be larger that 'bluestore_codel_fast_interval') - default: 500.0 - with_legacy: true -- name: bluestore_codel_fast_interval - type: float - level: advanced - desc: the interval of the fast loop in SlowFastCodel in ms - default: 50.0 - with_legacy: true -- name: bluestore_codel_min_target_latency - type: float - level: advanced - desc: the minimum possible target latency in SlowFastCodel in ms - default: 1.0 - with_legacy: true -- name: bluestore_codel_max_target_latency - type: float - level: advanced - desc: the maximum possible target latency in SlowFastCodel in ms - default: 1000.0 - with_legacy: true -- name: bluestore_codel_initial_budget_bytes - type: size - level: advanced - desc: the initial bluestore throttle budget in SlowFastCodel - default: 100_K - with_legacy: true -- name: bluestore_codel_min_budget_bytes - type: size - level: advanced - desc: the minimum bluestore throttle budget in SlowFastCodel - default: 100_K - with_legacy: true -- name: bluestore_codel_budget_increment_bytes - type: size - level: advanced - desc: the increment size for opening the bluestore throttle in SlowFastCodel - default: 10_K - with_legacy: true -- name: bluestore_codel_regression_history_size - type: int - level: advanced - desc: number of the slow interval throughput and latency samples that SlowFastCodel keeps for regression - default: 100 - with_legacy: true diff --git a/src/common/regression_utils.h b/src/common/regression_utils.h deleted file mode 100644 index 8f2182949b7..00000000000 --- a/src/common/regression_utils.h +++ /dev/null @@ -1,127 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- - -#pragma once - -#include -#include -#include -#include - -#define Z_P 2.33 // z score for 99th percentile - - -namespace ceph { - /*** - * Calculate the inverse of a 2x2 matrix. - * @param matrix& m, an square 2x2 matrix - * @return the inverse of the m (m^-1) - */ - static boost::numeric::ublas::matrix - matrix_inverse(boost::numeric::ublas::matrix &m) { - assert(m.size1() == m.size2() && - "Can only calculate the inverse of square matrices"); - assert(m.size1() == 2 && m.size2() == 2 && "Only for 2x2 matrices"); - boost::numeric::ublas::matrix m_inverse(2, 2); - const double a = m(0, 0); - const double b = m(0, 1); - const double c = m(1, 0); - const double d = m(1, 1); - const double determinant = 1.0 / ((a * d) - (b * c)); - m_inverse(0, 0) = d * determinant; - m_inverse(0, 1) = -b * determinant; - m_inverse(1, 0) = -c * determinant; - m_inverse(1, 1) = a * determinant; - return m_inverse; - } - - /*** - * Find a logarithmic function in form of "y = a + b * ln(x)" which fits - * the given points (x_values and y_values). - * @param std::vector x_values, x values for sample points - * @param std::vector y_values, y values for sample points - * @param double theta[2], holds the a and b as output (theta[0] = a and theta[1] = b) - */ - static void regression( - const std::vector &x_values, - const std::vector &y_values, - double theta[2]) { - assert(x_values.size() == y_values.size() && - "x and y values vectors should have a same size."); - const int n = x_values.size(); - - boost::numeric::ublas::matrix y_m(n, 1); - for (int i = 0; i < n; i++) { - y_m(i, 0) = y_values[i]; - } - - boost::numeric::ublas::scalar_matrix sm(n, 2, 1); - boost::numeric::ublas::matrix x_new_m(sm); - for (int i = 0; i < n; i++) { - x_new_m(i, 0) = 1; - x_new_m(i, 1) = std::log(x_values[i]); - } - boost::numeric::ublas::matrix x_new_trans_m = boost::numeric::ublas::trans( - x_new_m); - boost::numeric::ublas::matrix x_new_trans_dot_x_new_m = boost::numeric::ublas::prod( - x_new_trans_m, x_new_m); - boost::numeric::ublas::matrix temp_1_m = matrix_inverse( - x_new_trans_dot_x_new_m); - boost::numeric::ublas::matrix temp_2_m = boost::numeric::ublas::prod( - x_new_trans_m, y_m); - boost::numeric::ublas::matrix theta_m = boost::numeric::ublas::prod( - temp_1_m, temp_2_m); - theta[0] = theta_m(0, 0); - theta[1] = theta_m(1, 0); - } - - /*** - * Finds the x location on a fitted logarithmic curve on sample points where - * the slope is equal to target_slope - * @param x_values, x values for sample points - * @param y_values, y values for sample points - * @param target_slope, the slope that we are looking for - * @return the x location where the slope of the curve is target_slope - */ - static double find_slope_on_curve( - const std::vector &x_values, - const std::vector &y_values, - double target_slope) { - assert(x_values.size() == y_values.size() && - "x and y values vectors should have a same size."); - assert(target_slope != 0 && - "The target slope of zero will result to a inf x, try a nonzero value."); - assert(target_slope >= 0 && - "The target slope for a logarithmic function should be positive."); - double theta[2]; // theta[0] + theta[1] * ln(x) - regression(x_values, y_values, - theta); // find the logarithmic function using regression - double target_x = theta[1] / - target_slope; // find the x where the slope is close to target_slope - return target_x; - } - - /*** - * Finds the mu and std parameters of the lognormal distribution from its mode - * and x boundaries. - * @param mode, the mode of the distribution. - * @param min_x, x lower boundary of distribution (zero percentile) - * @param max_x, x upper boundary of distribution (99th percentile) - * @param params, holds the calculated distribution parameters (mu and std) as - * output (params[0] = mu and params[1] = std) - */ - static void - find_log_normal_dist_params(double mode, double min_x, double max_x, - double params[2]) { - assert(min_x < max_x && "The min_x should be smaller than max_x"); - assert(mode >= min_x && mode < max_x && - "The mode should be between min_x and max_x"); - double max_x_normalized = max_x - min_x; - double mode_normalized = mode - min_x; - double std_dev = (-Z_P + std::sqrt( - Z_P * Z_P + 4 * std::log(max_x_normalized) - - 4 * std::log(mode_normalized))) / 2; - double mu = std::log(max_x_normalized) - Z_P * std_dev; - params[0] = mu; - params[1] = std_dev; - } -} diff --git a/src/crimson/os/alienstore/CMakeLists.txt b/src/crimson/os/alienstore/CMakeLists.txt index 9d59225c79b..f006ba33a85 100644 --- a/src/crimson/os/alienstore/CMakeLists.txt +++ b/src/crimson/os/alienstore/CMakeLists.txt @@ -55,7 +55,6 @@ set(alien_store_srcs ${PROJECT_SOURCE_DIR}/src/os/bluestore/BlueStore.cc ${PROJECT_SOURCE_DIR}/src/os/bluestore/simple_bitmap.cc ${PROJECT_SOURCE_DIR}/src/os/bluestore/bluestore_types.cc - ${PROJECT_SOURCE_DIR}/src/os/bluestore/BlueStoreSlowFastCoDel.cc ${PROJECT_SOURCE_DIR}/src/os/bluestore/fastbmap_allocator_impl.cc ${PROJECT_SOURCE_DIR}/src/os/bluestore/FreelistManager.cc ${PROJECT_SOURCE_DIR}/src/os/bluestore/HybridAllocator.cc diff --git a/src/os/CMakeLists.txt b/src/os/CMakeLists.txt index 9bb6be0db3d..204a29fea8c 100644 --- a/src/os/CMakeLists.txt +++ b/src/os/CMakeLists.txt @@ -29,7 +29,6 @@ if(WITH_BLUESTORE) bluestore/BlueStore.cc bluestore/simple_bitmap.cc bluestore/bluestore_types.cc - bluestore/BlueStoreSlowFastCoDel.cc bluestore/fastbmap_allocator_impl.cc bluestore/FreelistManager.cc bluestore/StupidAllocator.cc diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 95f69752ce7..55d08cfbddf 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -4601,15 +4601,6 @@ BlueStore::BlueStore(CephContext *cct, _init_logger(); cct->_conf.add_observer(this); set_cache_shards(1); - if ( cct->_conf->bluestore_codel) { - codel = std::make_unique( - cct, [this](int64_t x) mutable { - this->throttle.reset_kv_throttle_max(x); - }, - [this]() mutable { - return this->throttle.get_kv_throttle_current(); - }); - } } BlueStore::~BlueStore() @@ -4675,17 +4666,6 @@ const char **BlueStore::get_tracked_conf_keys() const "bluestore_warn_on_no_per_pool_omap", "bluestore_warn_on_no_per_pg_omap", "bluestore_max_defer_interval", - "bluestore_codel", - "bluestore_codel_slow_interval", - "bluestore_codel_fast_interval", - "bluestore_codel_initial_target_latency", - "bluestore_codel_min_target_latency", - "bluestore_codel_max_target_latency", - "bluestore_codel_throughput_latency_tradeoff", - "bluestore_codel_initial_budget_bytes", - "bluestore_codel_min_budget_bytes", - "bluestore_codel_budget_increment_bytes", - "bluestore_codel_regression_history_size", NULL }; return KEYS; @@ -4744,9 +4724,6 @@ void BlueStore::handle_conf_change(const ConfigProxy& conf, changed.count("bluestore_throttle_deferred_bytes") || changed.count("bluestore_throttle_trace_rate")) { throttle.reset_throttle(conf); - if (codel) { - codel->reset_bluestore_budget(); - } } if (changed.count("bluestore_max_defer_interval")) { if (bdev) { @@ -4759,21 +4736,6 @@ void BlueStore::handle_conf_change(const ConfigProxy& conf, changed.count("osd_memory_expected_fragmentation")) { _update_osd_memory_options(); } - if (changed.count("bluestore_codel") || - changed.count("bluestore_codel_slow_interval") || - changed.count("bluestore_codel_fast_interval") || - changed.count("bluestore_codel_initial_target_latency") || - changed.count("bluestore_codel_min_target_latency") || - changed.count("bluestore_codel_max_target_latency") || - changed.count("bluestore_codel_throughput_latency_tradeoff") || - changed.count("bluestore_codel_initial_budget_bytes") || - changed.count("bluestore_codel_min_budget_bytes") || - changed.count("bluestore_codel_budget_increment_bytes") || - changed.count("bluestore_codel_regression_history_size")) { - if (codel) { - codel->on_config_changed(cct); - } - } } void BlueStore::_set_compression() @@ -12612,9 +12574,6 @@ void BlueStore::_txc_state_proc(TransContext *txc) case TransContext::STATE_KV_DONE: throttle.log_state_latency(*txc, logger, l_bluestore_state_kv_done_lat); - if (codel) { - codel->update_from_txc_info(txc->txc_state_proc_start, txc->bytes); - } if (txc->deferred_txn) { txc->set_state(TransContext::STATE_DEFERRED_QUEUED); _deferred_queue(txc); @@ -14086,7 +14045,6 @@ int BlueStore::queue_transactions( logger->inc(l_bluestore_txc); // execute (start) - txc->txc_state_proc_start = mono_clock::now(); _txc_state_proc(txc); if (bdev->is_smr()) { diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 045b94b6660..b1f409f7b22 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -51,7 +51,6 @@ #include "bluestore_types.h" #include "BlueFS.h" #include "common/EventTrace.h" -#include "BlueStoreSlowFastCoDel.h" #ifdef WITH_BLKIN #include "common/zipkin_trace.h" @@ -1725,7 +1724,6 @@ public: uint64_t seq = 0; ceph::mono_clock::time_point start; ceph::mono_clock::time_point last_stamp; - ceph::mono_clock::time_point txc_state_proc_start; uint64_t last_nid = 0; ///< if non-zero, highest new nid we allocated uint64_t last_blobid = 0; ///< if non-zero, highest new blobid we allocated @@ -1902,16 +1900,8 @@ public: trace_period_mcs = rate > 0 ? floor((1/rate) * 1000000.0) : 0; #endif } - int64_t get_kv_throttle_current() { - return throttle_bytes.get_current(); - } - void reset_kv_throttle_max(int64_t m) { - throttle_bytes.reset_max(m); - } } throttle; - std::unique_ptr codel; - typedef boost::intrusive::list< TransContext, boost::intrusive::member_hook< diff --git a/src/os/bluestore/BlueStoreSlowFastCoDel.cc b/src/os/bluestore/BlueStoreSlowFastCoDel.cc deleted file mode 100644 index e85a934ec82..00000000000 --- a/src/os/bluestore/BlueStoreSlowFastCoDel.cc +++ /dev/null @@ -1,272 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- - -#include "BlueStoreSlowFastCoDel.h" - -#include "common/regression_utils.h" - -BlueStoreSlowFastCoDel::BlueStoreSlowFastCoDel( - CephContext *_cct, - std::function _bluestore_budget_reset_callback, - std::function _get_kv_throttle_current) : - fast_timer(_cct, fast_timer_lock), - slow_timer(_cct, slow_timer_lock), - bluestore_budget_reset_callback(_bluestore_budget_reset_callback), - get_kv_throttle_current(_get_kv_throttle_current) { - on_config_changed(_cct); -} - -BlueStoreSlowFastCoDel::~BlueStoreSlowFastCoDel() { - { - std::lock_guard l1{fast_timer_lock}; - fast_timer.cancel_all_events(); - fast_timer.shutdown(); - } - - { - std::lock_guard l2{slow_timer_lock}; - slow_timer.cancel_all_events(); - slow_timer.shutdown(); - } - - regression_throughput_history.clear(); - regression_target_latency_history.clear(); -} - -void BlueStoreSlowFastCoDel::update_from_txc_info( - ceph::mono_clock::time_point txc_start_time, - uint64_t txc_bytes) { - std::lock_guard l(register_lock); - ceph::mono_clock::time_point now = ceph::mono_clock::now(); - int64_t latency = std::chrono::nanoseconds(now - txc_start_time).count(); - - if (activated && max_queue_length < get_kv_throttle_current()) { - max_queue_length = get_kv_throttle_current(); - } - if (min_latency == INITIAL_LATENCY_VALUE || latency < min_latency) { - min_latency = latency; - } - slow_interval_txc_cnt++; - slow_interval_registered_bytes += txc_bytes; -} - -void BlueStoreSlowFastCoDel::on_min_latency_violation() { - if (target_latency > 0) { - double diff = (double) (target_latency - min_latency); - auto error_ratio = std::abs(diff) / min_latency; - if (error_ratio > 0.5) { - error_ratio = 0.5; - } - bluestore_budget = std::max(bluestore_budget * (1 - error_ratio), - min_bluestore_budget * 1.0); - } -} - -void BlueStoreSlowFastCoDel::on_no_violation() { - if (bluestore_budget < max_queue_length * 1.5) { - bluestore_budget = bluestore_budget + bluestore_budget_increment; - } -} - -void BlueStoreSlowFastCoDel::on_config_changed(CephContext *cct) { - { - std::lock_guard l(register_lock); - - activated = cct->_conf->bluestore_codel; - target_slope = cct->_conf->bluestore_codel_throughput_latency_tradeoff; - slow_interval = ((int64_t) cct->_conf->bluestore_codel_slow_interval) * - 1000 * 1000; - initial_fast_interval = ((int64_t) - cct->_conf->bluestore_codel_fast_interval) * 1000 * 1000; - initial_target_latency = ((int64_t) - cct->_conf->bluestore_codel_initial_target_latency) * 1000 * 1000; - min_target_latency = ((int64_t) - cct->_conf->bluestore_codel_min_target_latency) * 1000 * 1000; - max_target_latency = ((int64_t) - cct->_conf->bluestore_codel_max_target_latency) * 1000 * 1000; - initial_bluestore_budget = cct->_conf->bluestore_codel_initial_budget_bytes; - min_bluestore_budget = cct->_conf->bluestore_codel_min_budget_bytes; - bluestore_budget_increment = - cct->_conf->bluestore_codel_budget_increment_bytes; - regression_history_size = - cct->_conf->bluestore_codel_regression_history_size; - - bluestore_budget = initial_bluestore_budget; - min_bluestore_budget = initial_bluestore_budget; - max_queue_length = min_bluestore_budget; - fast_interval = initial_fast_interval; - target_latency = initial_target_latency; - min_latency = INITIAL_LATENCY_VALUE; - slow_interval_registered_bytes = 0; - regression_throughput_history.clear(); - regression_target_latency_history.clear(); - slow_interval_start = ceph::mono_clock::zero(); - } - - { - std::lock_guard l1{fast_timer_lock}; - fast_timer.cancel_all_events(); - fast_timer.init(); - } - _fast_interval_process(); - { - std::lock_guard l2{slow_timer_lock}; - slow_timer.cancel_all_events(); - slow_timer.init(); - } - _slow_interval_process(); -} - -void BlueStoreSlowFastCoDel::reset_bluestore_budget() { - if (activated) { - bluestore_budget = std::max(min_bluestore_budget, bluestore_budget); - bluestore_budget_reset_callback(bluestore_budget); - } -} - -void BlueStoreSlowFastCoDel::_fast_interval_process() { - std::lock_guard l(register_lock); - if (target_latency != INITIAL_LATENCY_VALUE && - min_latency != INITIAL_LATENCY_VALUE) { - if (activated) { - if (_check_latency_violation()) { - // min latency violation - violation_count++; - _update_interval(); - on_min_latency_violation(); // handle the violation - } else { - // no latency violation - violation_count = 0; - fast_interval = initial_fast_interval; - on_no_violation(); - } - bluestore_budget = std::max(min_bluestore_budget, bluestore_budget); - bluestore_budget_reset_callback(bluestore_budget); - } - - // reset interval - min_latency = INITIAL_LATENCY_VALUE; - - on_fast_interval_finished(); - } - - auto codel_ctx = new LambdaContext( - [this](int r) { - _fast_interval_process(); - }); - auto interval_duration = std::chrono::nanoseconds(fast_interval); - fast_timer.add_event_after(interval_duration, codel_ctx); -} - -void BlueStoreSlowFastCoDel::_slow_interval_process() { - std::lock_guard l(register_lock); - ceph::mono_clock::time_point now = ceph::mono_clock::now(); - if (activated && !ceph::mono_clock::is_zero(slow_interval_start) - && slow_interval_txc_cnt > 0) { - double time_sec = nanosec_to_sec( - std::chrono::nanoseconds(now - slow_interval_start).count()); - - double slow_interval_throughput = - (slow_interval_registered_bytes * 1.0) / time_sec; - slow_interval_throughput = slow_interval_throughput / (1024.0 * 1024.0); - regression_target_latency_history.push_back( - nanosec_to_millisec(target_latency)); - regression_throughput_history.push_back(slow_interval_throughput); - if (regression_target_latency_history.size() > regression_history_size) { - regression_target_latency_history.erase( - regression_target_latency_history.begin()); - regression_throughput_history.erase( - regression_throughput_history.begin()); - } - std::vector targets; - std::vector throughputs; - double target_ms = nanosec_to_millisec(initial_target_latency); - // If there is sufficient number of points, use the regression to find the - // target_ms. Otherwise, target_ms will be initial_target_latency - if (regression_target_latency_history.size() >= regression_history_size) { - target_ms = ceph::find_slope_on_curve( - regression_target_latency_history, - regression_throughput_history, - target_slope); - } - - target_latency_without_noise = millisec_to_nanosec(target_ms); - target_latency_without_noise = std::max(target_latency_without_noise, - min_target_latency); - target_latency_without_noise = std::min(target_latency_without_noise, - max_target_latency); - target_ms = nanosec_to_millisec(target_latency_without_noise); - - // add log_normal noise - unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); - std::default_random_engine generator(seed); - double dist_params[2]; - double rnd_std_dev = 5; - ceph::find_log_normal_dist_params( - target_ms, - nanosec_to_millisec(min_target_latency), - target_ms * rnd_std_dev, - dist_params); - std::lognormal_distribution distribution(dist_params[0], - dist_params[1]); - - target_latency = millisec_to_nanosec(distribution(generator)); - target_latency += min_target_latency; - - if (target_latency < millisec_to_nanosec(target_ms)) { - std::uniform_real_distribution<> distr(0, 0.5); - target_latency = target_latency + - (target_latency - millisec_to_nanosec(target_ms)) * - distr(generator); - } - - if (target_latency != INITIAL_LATENCY_VALUE) { - target_latency = std::max(target_latency, min_target_latency); - target_latency = std::min(target_latency, max_target_latency); - } - - on_slow_interval_finished(); - } - - slow_interval_start = ceph::mono_clock::now(); - slow_interval_registered_bytes = 0; - slow_interval_txc_cnt = 0; - max_queue_length = min_bluestore_budget; - - auto codel_ctx = new LambdaContext( - [this](int r) { - _slow_interval_process(); - }); - auto interval_duration = std::chrono::nanoseconds(slow_interval); - slow_timer.add_event_after(interval_duration, codel_ctx); -} - - -/** -* check if the min latency violate the target -* @return true if min latency violate the target, false otherwise -*/ -bool BlueStoreSlowFastCoDel::_check_latency_violation() { - if (target_latency != INITIAL_LATENCY_VALUE && - min_latency != INITIAL_LATENCY_VALUE) { - if (min_latency > target_latency) { - return true; - } - } - return false; -} - -void BlueStoreSlowFastCoDel::_update_interval() { - auto sqrt = (int) std::round(std::sqrt(violation_count)); - fast_interval = initial_fast_interval / sqrt; - if (fast_interval <= 0) { - fast_interval = 1000; - } -} - -int64_t BlueStoreSlowFastCoDel::get_bluestore_budget() { - return bluestore_budget; -} - -int64_t BlueStoreSlowFastCoDel::get_target_latency() { - return target_latency; -} diff --git a/src/os/bluestore/BlueStoreSlowFastCoDel.h b/src/os/bluestore/BlueStoreSlowFastCoDel.h deleted file mode 100644 index 242260f00b4..00000000000 --- a/src/os/bluestore/BlueStoreSlowFastCoDel.h +++ /dev/null @@ -1,128 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- - -#pragma once - -#include - -#include "include/Context.h" -#include "common/Timer.h" -#include "common/ceph_time.h" - -class BlueStoreSlowFastCoDel { -public: - BlueStoreSlowFastCoDel( - CephContext *_cct, - std::function _bluestore_budget_reset_callback, - std::function _get_kv_throttle_current); - - virtual ~BlueStoreSlowFastCoDel(); - - void on_config_changed(CephContext *cct); - - void reset_bluestore_budget(); - - void update_from_txc_info( - ceph::mono_clock::time_point txc_start_time, - uint64_t txc_bytes); - - int64_t get_bluestore_budget(); - - int64_t get_target_latency(); - - bool is_activated(); - -protected: - static const int64_t INITIAL_LATENCY_VALUE = -1; - - /* config values */ - // Config value 'bluestore_codel',true if SlowFastCodel is activated - bool activated = false; - // Config value 'bluestore_codel_fast_interval', Initial interval for fast loop - int64_t initial_fast_interval = INITIAL_LATENCY_VALUE; - // Config value 'bluestore_codel_initial_target_latency', Initial target latency - // to start the algorithm - int64_t initial_target_latency = INITIAL_LATENCY_VALUE; - // Config value 'bluestore_codel_slow_interval', the interval for the slow loop - int64_t slow_interval = INITIAL_LATENCY_VALUE; - // Config value 'bluestore_codel_min_target_latency', min possible value for target - int64_t min_target_latency = INITIAL_LATENCY_VALUE; // in ns - // Config value 'bluestore_codel_max_target_latency', max possible value for target - int64_t max_target_latency = INITIAL_LATENCY_VALUE; // in ns - // Config value 'bluestore_codel_throughput_latency_tradeoff', define the - // tradeoff between throughput and latency (MB/s loss for every 1ms latency drop) - double target_slope = 5; - // Config value 'bluestore_codel_regression_history_size', regression history size - int64_t regression_history_size = 100; - // Config value 'bluestore_codel_min_budget_bytes', the minimum bluestore - // throttle budget - int64_t min_bluestore_budget = 102400; - // Config value 'bluestore_codel_initial_budget_bytes', the initial bluestore - // throttle budget - int64_t initial_bluestore_budget = 102400; - // Config value 'bluestore_codel_budget_increment_bytes', the increment size - // for opening the bluestore throttle - int64_t bluestore_budget_increment = 102400; - - /* internal state variables */ - // current interval for the fast loop - int64_t fast_interval = INITIAL_LATENCY_VALUE; - // current target latency that fast loop is using - int64_t target_latency = INITIAL_LATENCY_VALUE; - int64_t target_latency_without_noise = INITIAL_LATENCY_VALUE; - // min latency in the current fast interval - int64_t min_latency = INITIAL_LATENCY_VALUE; - int64_t violation_count = 0; - ceph::mutex fast_timer_lock = ceph::make_mutex("CoDel::fast_timer_lock"); - ceph::mutex slow_timer_lock = ceph::make_mutex("CoDel::slow_timer_lock"); - ceph::mutex register_lock = ceph::make_mutex("CoDel::register_lock"); - SafeTimer fast_timer; // fast loop timer - SafeTimer slow_timer; // slow loop timer - // marks the start of the current slow interval - ceph::mono_clock::time_point slow_interval_start = ceph::mono_clock::zero(); - // amount of bytes that has been processed in current slow interval - int64_t slow_interval_registered_bytes = 0; - // number of transactions that has been processed in current slow interval - int64_t slow_interval_txc_cnt = 0; - // target latency history for regression - std::vector regression_target_latency_history; - // throughput history for regression - std::vector regression_throughput_history; - int64_t bluestore_budget = 102400; // current bluestore throttle budget - // maximum amount of inflight data in current slow interval - int64_t max_queue_length = 102400; - std::function bluestore_budget_reset_callback; - std::function get_kv_throttle_current; - - void on_min_latency_violation(); - - void on_no_violation(); - - virtual void on_fast_interval_finished() {} - - virtual void on_slow_interval_finished() {} - -private: - - bool _check_latency_violation(); - - void _update_interval(); - - void _fast_interval_process(); - - void _slow_interval_process(); - - template - double millisec_to_nanosec(T ms) { - return ms * 1000.0 * 1000.0; - } - - template - double nanosec_to_millisec(T ns) { - return ns / (1000.0 * 1000.0); - } - - template - double nanosec_to_sec(T ns) { - return ns / (1000.0 * 1000.0 * 1000.0); - } -}; diff --git a/src/test/common/CMakeLists.txt b/src/test/common/CMakeLists.txt index 3b635406224..0e84b3d7228 100644 --- a/src/test/common/CMakeLists.txt +++ b/src/test/common/CMakeLists.txt @@ -155,11 +155,6 @@ add_executable(unittest_random add_ceph_unittest(unittest_random) target_link_libraries(unittest_random Boost::random) -# unittest_regression_utils -add_executable(unittest_regression_utils test_regression_utils.cc) -add_ceph_unittest(unittest_regression_utils) -target_link_libraries(unittest_regression_utils ceph-common global) - # unittest_throttle add_executable(unittest_throttle Throttle.cc diff --git a/src/test/common/test_regression_utils.cc b/src/test/common/test_regression_utils.cc deleted file mode 100644 index 6304e5d9ce8..00000000000 --- a/src/test/common/test_regression_utils.cc +++ /dev/null @@ -1,89 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include -#include -#include -#include "common/regression_utils.h" -#include -#include - -using namespace boost::numeric::ublas; - -std::vector generate_rand_vector(int size, int max_value) { - std::srand(std::time(0)); - std::vector rand_vec; - for (int i = 0; i < size; i++) { - double rand_value = std::rand() % max_value; - rand_vec.push_back(rand_value); - } - return rand_vec; -} - -matrix generate_rand_matrix(int size1, int size2, int max_value) { - std::srand(std::time(0)); - matrix rand_m(size1, size2); - for (int i = 0; i < size1; i++) { - for (int j = 0; j < size2; j++) { - double rand_value = std::rand() % max_value; - rand_m(i, j) = rand_value; - } - } - return rand_m; -} - -std::vector exp_vector(std::vector x) { - std::vector exp_vec; - for (int i = 0; i < x.size(); i++) { - exp_vec.push_back(std::exp(x[i])); - } - return exp_vec; -} - -bool is_almost_equal(double x1, double x2, double precision) { - if (std::abs(x1 - x2) < precision) { - return true; - } - return false; -} - -TEST(matrix_op, matrix_inverse) { - int matrix_size = 2; // has to be 2x2 - matrix random_square_m = generate_rand_matrix(matrix_size, matrix_size, 1000); - matrix random_square_m_inv = ceph::matrix_inverse(random_square_m); - // the inverse matrix should have the same size - ASSERT_EQ(random_square_m_inv.size1(), random_square_m.size1()); - ASSERT_EQ(random_square_m_inv.size2(), random_square_m.size2()); - matrix matrix_prod = prod(random_square_m, random_square_m_inv); - // the product should be an identity matrix - for ( int i = 0; i < matrix_prod.size1(); i++){ - for (int j = 0; j < matrix_prod.size2(); j++){ - if (i == j) { - ASSERT_TRUE(is_almost_equal(matrix_prod(i, j), 1, 1e-9)); // i == j -> 1 - } else { - ASSERT_TRUE(is_almost_equal(matrix_prod(i, j), 0, 1e-9)); // i <> j -> 0 - } - } - } -} - -TEST(regression, log_regression) { - // y = ln(x) - std::vector y = generate_rand_vector(200, 100); - std::vector x = exp_vector(y); - - double theta[2]; // y = theta[0] + theta[1] * ln(x) - ceph::regression(x, y, theta); - ASSERT_TRUE(is_almost_equal(theta[0], 0, 1e-9)); // theta[0] = 0 - ASSERT_TRUE(is_almost_equal(theta[1], 1, 1e-9)); // theta[1] = 1 -} - -TEST(regression, find_slope_location) { - // y = ln(x) - std::vector y = generate_rand_vector(200, 100); - std::vector x = exp_vector(y); - - double target_slope = 5; - double x_target = ceph::find_slope_on_curve(x, y, target_slope); - ASSERT_TRUE(is_almost_equal(x_target, 0.2, 1e-9)); // y'(0.2) = 5 -} diff --git a/src/test/os/CMakeLists.txt b/src/test/os/CMakeLists.txt index c7baa164476..35eb8f11780 100644 --- a/src/test/os/CMakeLists.txt +++ b/src/test/os/CMakeLists.txt @@ -5,4 +5,3 @@ add_executable(unittest_lfnindex add_ceph_unittest(unittest_lfnindex) target_link_libraries(unittest_lfnindex os global) -add_subdirectory(bluestore) diff --git a/src/test/os/bluestore/CMakeLists.txt b/src/test/os/bluestore/CMakeLists.txt deleted file mode 100644 index 86de05aaf45..00000000000 --- a/src/test/os/bluestore/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -# unittest_slow_fast_codel -add_executable(unittest_slow_fast_codel - TestBlueStoreSlowFastCoDel.cc - ) -add_ceph_unittest(unittest_slow_fast_codel) -target_link_libraries(unittest_slow_fast_codel - rados_test_stub - librados - global - radostest-cxx - GTest::GTest - ceph-common - os - global - ) diff --git a/src/test/os/bluestore/TestBlueStoreSlowFastCoDel.cc b/src/test/os/bluestore/TestBlueStoreSlowFastCoDel.cc deleted file mode 100644 index 04e3318014b..00000000000 --- a/src/test/os/bluestore/TestBlueStoreSlowFastCoDel.cc +++ /dev/null @@ -1,191 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -// vim: ts=8 sw=2 smarttab - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gtest/gtest.h" -#include "include/Context.h" - -#include "common/ceph_time.h" -#include "os/bluestore/BlueStoreSlowFastCoDel.h" - - -static int64_t milliseconds_to_nanoseconds(int64_t ms) { - return ms * 1000.0 * 1000.0; -} - -static double nanoseconds_to_milliseconds(int64_t ms) { - return ms / (1000.0 * 1000.0); -} - -class BlueStoreSlowFastCoDelMock : public BlueStoreSlowFastCoDel { -public: - BlueStoreSlowFastCoDelMock( - CephContext *_cct, - std::function _bluestore_budget_reset_callback, - std::function _get_kv_throttle_current, - std::mutex &_iteration_mutex, - std::condition_variable &_iteration_cond, - int64_t _target_latency, - int64_t _fast_interval, - int64_t _slow_interval, - double _target_slope - ) : BlueStoreSlowFastCoDel(_cct, _bluestore_budget_reset_callback, - _get_kv_throttle_current), - iteration_mutex(_iteration_mutex), iteration_cond(_iteration_cond), - test_target_latency(_target_latency), test_fast_interval(_fast_interval), - test_slow_interval(_slow_interval), test_target_slope(_target_slope) { - init_test(); - } - - void init_test() { - std::lock_guard l(register_lock); - activated = true; - target_slope = test_target_slope; - slow_interval = test_slow_interval; - initial_fast_interval = test_fast_interval; - min_target_latency = milliseconds_to_nanoseconds(1); - initial_target_latency = test_target_latency; - max_target_latency = milliseconds_to_nanoseconds(500); - initial_bluestore_budget = 100 * 1024; - min_bluestore_budget = 10 * 1024; - bluestore_budget_increment = 1024; - regression_history_size = 5; - bluestore_budget = initial_bluestore_budget; - min_bluestore_budget = initial_bluestore_budget; - max_queue_length = min_bluestore_budget; - fast_interval = initial_fast_interval; - target_latency = initial_target_latency; - min_latency = INITIAL_LATENCY_VALUE; - slow_interval_registered_bytes = 0; - regression_throughput_history.clear(); - regression_target_latency_history.clear(); - slow_interval_start = ceph::mono_clock::zero(); - } - - std::vector target_latency_vector; - -protected: - std::mutex &iteration_mutex; - std::condition_variable &iteration_cond; - int64_t test_target_latency; - int64_t test_fast_interval; - int64_t test_slow_interval; - double test_target_slope; - - void on_fast_interval_finished() override { - std::unique_lock locker(iteration_mutex); - iteration_cond.notify_one(); - } - - void on_slow_interval_finished() override { - target_latency_vector.push_back(target_latency); - } -}; - -class TestSlowFastCoDel : public ::testing::Test { -public: - CephContext *ceph_context = nullptr; - BlueStoreSlowFastCoDelMock *slow_fast_codel = nullptr; - int64_t test_throttle_budget = 0; - std::mutex iteration_mutex; - std::condition_variable iteration_cond; - int64_t target_latency = milliseconds_to_nanoseconds(50); - int64_t fast_interval = milliseconds_to_nanoseconds(100); - int64_t slow_interval = milliseconds_to_nanoseconds(400); - double target_slope = 1; - - std::vector target_latency_vector; - std::vector txc_size_vector; - - TestSlowFastCoDel() {} - - ~TestSlowFastCoDel() {} - - static void SetUpTestCase() {} - - static void TearDownTestCase() {} - - void SetUp() override { - ceph_context = (new CephContext(CEPH_ENTITY_TYPE_ANY))->get(); - } - - void create_bluestore_slow_fast_codel() { - slow_fast_codel = new BlueStoreSlowFastCoDelMock( - ceph_context, - [this](int64_t x) mutable { - this->test_throttle_budget = x; - }, - [this]() mutable { - return this->test_throttle_budget; - }, - iteration_mutex, - iteration_cond, - target_latency, - fast_interval, - slow_interval, - target_slope); - } - - void TearDown() override { - if (slow_fast_codel) - delete slow_fast_codel; - } - - void test_codel() { - int64_t max_iterations = 50; - int iteration_timeout = 1; // 1 sec - int txc_num = 4; - for (int iteration = 0; iteration < max_iterations; iteration++) { - std::unique_lock locker(iteration_mutex); - bool violation = iteration % 2 == 1; - auto budget_tmp = test_throttle_budget; - auto target = slow_fast_codel->get_target_latency(); - double target_throughput = - (target_slope * nanoseconds_to_milliseconds(target_latency)) * - std::log(nanoseconds_to_milliseconds(target) * 1.0); - int64_t txc_size = - (nanoseconds_to_milliseconds(slow_interval) * - target_throughput) / - (1000 * txc_num * (slow_interval / fast_interval)); - txc_size *= 1024 * 1024; - txc_size_vector.push_back(txc_size); - target_latency_vector.push_back(target); - for (int i = 0; i < txc_num; i++) { - auto time = ceph::mono_clock::now(); - if (violation) { - int rand_ms = std::rand() % 1000 + 1000; - int64_t time_diff = milliseconds_to_nanoseconds(rand_ms); - time = time - std::chrono::nanoseconds(target + time_diff); - } - slow_fast_codel->update_from_txc_info(time, txc_size); - } - if (iteration_cond.wait_for( - locker, std::chrono::seconds(iteration_timeout)) == - std::cv_status::timeout) { - ASSERT_TRUE(false) << "Test timeout."; - return; - } - if (violation) { - ASSERT_LT(test_throttle_budget, budget_tmp); - } else { - ASSERT_GT(test_throttle_budget, budget_tmp); - } - } - - ASSERT_TRUE(slow_fast_codel->target_latency_vector.size() > 0); - } -}; - -TEST_F(TestSlowFastCoDel, test1) { - create_bluestore_slow_fast_codel(); - test_codel(); -} -- 2.39.5