From: Sridhar Seshasayee Date: Tue, 21 Feb 2023 12:24:36 +0000 (+0530) Subject: osd: Retain overridden mClock recovery settings across osd restarts X-Git-Tag: v18.1.0~122^2~21 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9d1a64cb54d2adf12cacfaedcec8d030518f55c7;p=ceph.git osd: Retain overridden mClock recovery settings across osd restarts Fix an issue where an overridden mClock recovery setting (set prior to an osd restart) could be lost after an osd restart. For e.g., consider that prior to an osd restart, the option 'osd_max_backfill' was successfully set to a value different from the mClock default. If the osd was restarted for some reason, the boot-up sequence was incorrectly resetting the backfill value to the mclock default within the async local/remote reservers. This fix ensures that no change is made if the current overriden value is different from the mClock default. Modify an existing standalone test to verify that the local and remote async reservers are updated to the desired number of backfills under normal conditions and also across osd restarts. Signed-off-by: Sridhar Seshasayee --- diff --git a/qa/standalone/misc/mclock-config.sh b/qa/standalone/misc/mclock-config.sh index d16cd45f43fa..6ba26af477b1 100755 --- a/qa/standalone/misc/mclock-config.sh +++ b/qa/standalone/misc/mclock-config.sh @@ -247,9 +247,9 @@ function TEST_backfill_limit_adjustment_mclock() { run_osd $dir 0 --osd_op_queue=mclock_scheduler || return 1 local backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ - config get osd_max_backfills) + config get osd_max_backfills | jq .osd_max_backfills | bc) # Get default value - echo "$backfills" | grep --quiet 'osd_max_backfills' || return 1 + echo "osd_max_backfills: $backfills" || return 1 # Change the backfill limit without setting # osd_mclock_override_recovery_settings option. Verify that the backfill @@ -257,8 +257,16 @@ function TEST_backfill_limit_adjustment_mclock() { ceph config set osd.0 osd_max_backfills 20 || return 1 sleep 2 # Allow time for change to take effect local max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ - config get osd_max_backfills) - test "$max_backfills" = "$backfills" || return 1 + config get osd_max_backfills | jq .osd_max_backfills | bc) + test $max_backfills = $backfills || return 1 + + # Verify local and async reserver settings are not changed + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .local_reservations.max_allowed | bc) + test $max_backfills = $backfills || return 1 + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .remote_reservations.max_allowed | bc) + test $max_backfills = $backfills || return 1 # Change backfills limit after setting osd_mclock_override_recovery_settings. # Verify that the backfills limit is modified. @@ -266,8 +274,35 @@ function TEST_backfill_limit_adjustment_mclock() { ceph config set osd.0 osd_max_backfills 20 || return 1 sleep 2 # Allow time for change to take effect max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ - config get osd_max_backfills) - test "$max_backfills" = '{"osd_max_backfills":"20"}' || return 1 + config get osd_max_backfills | jq .osd_max_backfills | bc) + test $max_backfills = 20 || return 1 + + # Verify local and async reserver settings are changed + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .local_reservations.max_allowed | bc) + test $max_backfills = 20 || return 1 + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .remote_reservations.max_allowed | bc) + test $max_backfills = 20 || return 1 + + # Kill osd and bring it back up. + # Confirm that the backfill settings are retained. + kill_daemons $dir TERM osd || return 1 + ceph osd down 0 || return 1 + wait_for_osd down 0 || return 1 + activate_osd $dir 0 --osd-op-queue=mclock_scheduler || return 1 + + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + config get osd_max_backfills | jq .osd_max_backfills | bc) + test $max_backfills = 20 || return 1 + + # Verify local and async reserver settings are changed + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .local_reservations.max_allowed | bc) + test $max_backfills = 20 || return 1 + max_backfills=$(CEPH_ARGS='' ceph --format=json daemon $(get_asok_path osd.0) \ + dump_recovery_reservations | jq .remote_reservations.max_allowed | bc) + test $max_backfills = 20 || return 1 teardown $dir || return 1 } diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index f964e4179fd5..7d5f87344f9a 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -10052,17 +10052,28 @@ bool OSD::maybe_override_options_for_qos(const std::set *changed) } } } else { // if (changed != nullptr) (osd boot-up) - // Override the default recovery max active and max backfills to - // higher values based on the type of backing device (hdd/ssd). - // This section is executed only during osd boot-up. + /** + * This section is executed only during osd boot-up. + * Override the default recovery max active (hdd & ssd) and max backfills + * config options to either the mClock defaults or retain their respective + * overridden values before the osd was restarted. + */ for (auto opt : recovery_qos_defaults) { + /** + * Note: set_val_default doesn't overwrite an option if it was earlier + * set at a config level greater than CONF_DEFAULT. It doesn't return + * a status. With get_val(), the config subsystem is guaranteed to + * either return the overridden value (if any) or the default value. + */ cct->_conf.set_val_default(opt.first, std::to_string(opt.second)); + auto opt_val = cct->_conf.get_val(opt.first); + dout(1) << __func__ << " " + << opt.first << " set to " << opt_val + << dendl; if (opt.first == "osd_max_backfills") { - service.local_reserver.set_max(opt.second); - service.remote_reserver.set_max(opt.second); + service.local_reserver.set_max(opt_val); + service.remote_reserver.set_max(opt_val); } - dout(1) << __func__ << " Set default value for " << opt.first - << " to " << opt.second << dendl; } } return true;