From 737a7f1cf6194d3b286657587a2123ee5b688a3e Mon Sep 17 00:00:00 2001 From: Sridhar Seshasayee Date: Fri, 23 Sep 2022 17:06:38 +0530 Subject: [PATCH] osd: Ignore osd bench results for mclock if it exceeds the set threshold The osd bench may in some cases show unrealistic results. The IOPS capacity reported is used for QoS specific calculations in the mClock scheduler. If the IOPS capacity is unrealistic, it could result is a performance impact to the ops handled by mClock scheduler. To avoid this, do not consider the results of the osd bench if it breaches a threshold set for hdd and ssd media. The existing default setting will be enforced as a safety measure. A cluster log warning is raised for the osd. Fixes: https://tracker.ceph.com/issues/57529 Signed-off-by: Sridhar Seshasayee --- src/common/options/osd.yaml.in | 26 ++++++++++++++++++++++++++ src/osd/OSD.cc | 33 ++++++++++++++++++++++++++++----- 2 files changed, 54 insertions(+), 5 deletions(-) diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in index bc5e71f31b5..2f118073995 100644 --- a/src/common/options/osd.yaml.in +++ b/src/common/options/osd.yaml.in @@ -1165,6 +1165,32 @@ options: - osd_max_backfills flags: - runtime +- name: osd_mclock_iops_capacity_threshold_hdd + type: float + level: basic + desc: The threshold IOPs capacity (at 4KiB block size) beyond which to ignore + the OSD bench results for an OSD (for rotational media) + long_desc: This option specifies the threshold IOPS capacity for an OSD under + which the OSD bench results can be considered for QoS calculations. Only + considered for osd_op_queue = mclock_scheduler + fmt_desc: The threshold IOPS capacity (at 4KiB block size) beyond which to + ignore OSD bench results for an OSD (for rotational media) + default: 500 + flags: + - runtime +- name: osd_mclock_iops_capacity_threshold_ssd + type: float + level: basic + desc: The threshold IOPs capacity (at 4KiB block size) beyond which to ignore + the OSD bench results for an OSD (for solid state media) + long_desc: This option specifies the threshold IOPS capacity for an OSD under + which the OSD bench results can be considered for QoS calculations. Only + considered for osd_op_queue = mclock_scheduler + fmt_desc: The threshold IOPS capacity (at 4KiB block size) beyond which to + ignore OSD bench results for an OSD (for solid state media) + default: 80000 + flags: + - runtime # Set to true for testing. Users should NOT set this. # If set to true even after reading enough shards to # decode the object, any error will be reported. diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 9adf2c1d52b..f8642fbadab 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -9775,11 +9775,11 @@ void OSD::maybe_override_max_osd_capacity_for_qos() max_capacity_iops_config = "osd_mclock_max_capacity_iops_ssd"; } + double default_iops = 0.0; + double cur_iops = 0.0; if (!force_run_benchmark) { - double default_iops = 0.0; - // Get the current osd iops capacity - double cur_iops = cct->_conf.get_val(max_capacity_iops_config); + cur_iops = cct->_conf.get_val(max_capacity_iops_config); // Get the default max iops capacity auto val = cct->_conf.get_val_default(max_capacity_iops_config); @@ -9830,8 +9830,31 @@ void OSD::maybe_override_max_osd_capacity_for_qos() << " elapsed_sec: " << elapsed << dendl; - // Persist the iops value to the MON store. - mon_cmd_set_config(max_capacity_iops_config, std::to_string(iops)); + // Get the threshold IOPS set for the underlying hdd/ssd. + double threshold_iops = 0.0; + if (store_is_rotational) { + threshold_iops = cct->_conf.get_val( + "osd_mclock_iops_capacity_threshold_hdd"); + } else { + threshold_iops = cct->_conf.get_val( + "osd_mclock_iops_capacity_threshold_ssd"); + } + + // Persist the iops value to the MON store or throw cluster warning + // if the measured iops exceeds the set threshold. If the iops exceed + // the threshold, the default value is used. + if (iops > threshold_iops) { + clog->warn() << "OSD bench result of " << std::to_string(iops) + << " IOPS exceeded the threshold limit of " + << std::to_string(threshold_iops) << " IOPS for osd." + << std::to_string(whoami) << ". IOPS capacity is unchanged" + << " at " << std::to_string(cur_iops) << " IOPS. The" + << " recommendation is to establish the osd's IOPS capacity" + << " using other benchmark tools (e.g. Fio) and then" + << " override osd_mclock_max_capacity_iops_[hdd|ssd]."; + } else { + mon_cmd_set_config(max_capacity_iops_config, std::to_string(iops)); + } } } -- 2.39.5