From: sajibreadd Date: Thu, 29 Aug 2024 19:00:23 +0000 (+0600) Subject: osd_recovery_sleep_degraded, osd_recovery_sleep_degraded_ssd, osd_recovery_sleep_degr... X-Git-Tag: v19.2.3~79^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=2a556f06647c0ed2781dea0bab5c9632cc5c7418;p=ceph-ci.git osd_recovery_sleep_degraded, osd_recovery_sleep_degraded_ssd, osd_recovery_sleep_degraded_hdd added in the configuration to throttle the data movement while recovery when pg is degraded Fixes: https://tracker.ceph.com/issues/67700 Signed-off-by: Md Mahamudur Rahaman Sajib (cherry picked from commit f4360508cc3dded91a73c7b98c4626f827ba5d91) Conflicts: src/osd/OSD.cc --- diff --git a/doc/rados/configuration/mclock-config-ref.rst b/doc/rados/configuration/mclock-config-ref.rst index 58de3e54bfe..c1287a21f1b 100644 --- a/doc/rados/configuration/mclock-config-ref.rst +++ b/doc/rados/configuration/mclock-config-ref.rst @@ -292,6 +292,10 @@ sleep options are disabled (set to 0), - :confval:`osd_recovery_sleep_hdd` - :confval:`osd_recovery_sleep_ssd` - :confval:`osd_recovery_sleep_hybrid` +- :confval:`osd_recovery_sleep_degraded` +- :confval:`osd_recovery_sleep_degraded_hdd` +- :confval:`osd_recovery_sleep_degraded_ssd` +- :confval:`osd_recovery_sleep_degraded_hybrid` - :confval:`osd_scrub_sleep` - :confval:`osd_delete_sleep` - :confval:`osd_delete_sleep_hdd` diff --git a/doc/rados/configuration/osd-config-ref.rst b/doc/rados/configuration/osd-config-ref.rst index 5127b4b8cf1..9d028532448 100644 --- a/doc/rados/configuration/osd-config-ref.rst +++ b/doc/rados/configuration/osd-config-ref.rst @@ -431,6 +431,10 @@ perform well in a degraded state. .. confval:: osd_recovery_sleep_hdd .. confval:: osd_recovery_sleep_ssd .. confval:: osd_recovery_sleep_hybrid +.. confval:: osd_recovery_sleep_degraded +.. confval:: osd_recovery_sleep_degraded_hdd +.. confval:: osd_recovery_sleep_degraded_ssd +.. confval:: osd_recovery_sleep_degraded_hybrid .. confval:: osd_recovery_priority Tiering diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in index cd6221f0cb5..233380d9733 100644 --- a/src/common/options/osd.yaml.in +++ b/src/common/options/osd.yaml.in @@ -140,6 +140,51 @@ options: - osd_recovery_sleep flags: - runtime +- name: osd_recovery_sleep_degraded + type: float + level: advanced + desc: Time in seconds to sleep before next recovery or backfill op when PGs are degraded. + This setting overrides _ssd, _hdd, and _hybrid if non-zero. + fmt_desc: Time in seconds to sleep before the next recovery or backfill op when PGs + are degraded. Increasing this value will slow down recovery ops while client + ops will be less impacted. + default: 0 + flags: + - runtime +- name: osd_recovery_sleep_degraded_hdd + type: float + level: advanced + desc: Time in seconds to sleep before next recovery or backfill op for HDDs + when PGs is degraded. + fmt_desc: Time in seconds to sleep before next recovery or backfill op + for HDDs when PGs are degraded. + default: 0.1 + flags: + - runtime +- name: osd_recovery_sleep_degraded_ssd + type: float + level: advanced + desc: Time in seconds to sleep before next recovery or backfill op for SSDs + when PGs are degraded. + fmt_desc: Time in seconds to sleep before the next recovery or backfill op + for SSDs when PGs are degraded. + default: 0 + see_also: + - osd_recovery_sleep_degraded + flags: + - runtime +- name: osd_recovery_sleep_degraded_hybrid + type: float + level: advanced + desc: Time in seconds to sleep before next recovery or backfill op when PGs + are degraded and data is on HDD and journal is on SSD + fmt_desc: Time in seconds to sleep before the next recovery or backfill op when + PGs are degraded and OSD data is on HDD and OSD journal / WAL+DB is on SSD. + default: 0.025 + see_also: + - osd_recovery_sleep_degraded + flags: + - runtime - name: osd_snap_trim_sleep type: float level: advanced diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 852b87e8855..5c49bea0eaf 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3665,6 +3665,21 @@ float OSD::get_osd_recovery_sleep() return cct->_conf->osd_recovery_sleep_hdd; } +float OSD::get_osd_recovery_sleep_degraded() { + float osd_recovery_sleep_degraded = + cct->_conf.get_val("osd_recovery_sleep_degraded"); + if (osd_recovery_sleep_degraded > 0) { + return osd_recovery_sleep_degraded; + } + if (!store_is_rotational && !journal_is_rotational) { + return cct->_conf.get_val("osd_recovery_sleep_degraded_ssd"); + } else if (store_is_rotational && !journal_is_rotational) { + return cct->_conf.get_val("osd_recovery_sleep_degraded_hybrid"); + } else { + return cct->_conf.get_val("osd_recovery_sleep_degraded_hdd"); + } +} + float OSD::get_osd_delete_sleep() { float osd_delete_sleep = cct->_conf.get_val("osd_delete_sleep"); @@ -9541,9 +9556,12 @@ void OSD::do_recovery( * ops are scheduled after osd_recovery_sleep amount of time from the previous * recovery event's schedule time. This is done by adding a * recovery_requeue_callback event, which re-queues the recovery op using - * queue_recovery_after_sleep. + * queue_recovery_after_sleep. (osd_recovery_sleep_degraded will be + * used instead of osd_recovery_sleep when pg is degraded) */ - float recovery_sleep = get_osd_recovery_sleep(); + float recovery_sleep = pg->is_degraded() + ? get_osd_recovery_sleep_degraded() + : get_osd_recovery_sleep(); { std::lock_guard l(service.sleep_lock); if (recovery_sleep > 0 && service.recovery_needs_sleep) { @@ -9852,6 +9870,10 @@ const char** OSD::get_tracked_conf_keys() const "osd_recovery_sleep_hdd", "osd_recovery_sleep_ssd", "osd_recovery_sleep_hybrid", + "osd_recovery_sleep_degraded", + "osd_recovery_sleep_degraded_hdd", + "osd_recovery_sleep_degraded_ssd", + "osd_recovery_sleep_degraded_hybrid", "osd_delete_sleep", "osd_delete_sleep_hdd", "osd_delete_sleep_ssd", @@ -9919,7 +9941,11 @@ void OSD::handle_conf_change(const ConfigProxy& conf, changed.count("osd_recovery_sleep") || changed.count("osd_recovery_sleep_hdd") || changed.count("osd_recovery_sleep_ssd") || - changed.count("osd_recovery_sleep_hybrid")) { + changed.count("osd_recovery_sleep_hybrid") || + changed.count("osd_recovery_sleep_degraded") || + changed.count("osd_recovery_sleep_degraded_hdd") || + changed.count("osd_recovery_sleep_degraded_ssd") || + changed.count("osd_recovery_sleep_degraded_hybrid")) { maybe_override_sleep_options_for_qos(); } if (changed.count("osd_min_recovery_priority")) { @@ -10251,6 +10277,12 @@ void OSD::maybe_override_sleep_options_for_qos() cct->_conf.set_val("osd_recovery_sleep_ssd", std::to_string(0)); cct->_conf.set_val("osd_recovery_sleep_hybrid", std::to_string(0)); + // Disable recovery sleep for pg degraded + cct->_conf.set_val("osd_recovery_sleep_degraded", std::to_string(0)); + cct->_conf.set_val("osd_recovery_sleep_degraded_hdd", std::to_string(0)); + cct->_conf.set_val("osd_recovery_sleep_degraded_ssd", std::to_string(0)); + cct->_conf.set_val("osd_recovery_sleep_degraded_hybrid", std::to_string(0)); + // Disable delete sleep cct->_conf.set_val("osd_delete_sleep", std::to_string(0)); cct->_conf.set_val("osd_delete_sleep_hdd", std::to_string(0)); diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 51183bfe388..2d95bdd259c 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -2002,6 +2002,7 @@ private: int get_num_op_threads(); float get_osd_recovery_sleep(); + float get_osd_recovery_sleep_degraded(); float get_osd_delete_sleep(); float get_osd_snap_trim_sleep(); diff --git a/src/osd/PG.h b/src/osd/PG.h index 18337eddea2..4706daeb987 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1291,6 +1291,7 @@ protected: public: int pg_stat_adjust(osd_stat_t *new_stat); + bool is_degraded() const { return recovery_state.is_degraded(); } protected: bool delete_needs_sleep = false; @@ -1314,7 +1315,6 @@ protected: bool is_backfill_unfound() const { return recovery_state.is_backfill_unfound(); } bool is_incomplete() const { return recovery_state.is_incomplete(); } bool is_clean() const { return recovery_state.is_clean(); } - bool is_degraded() const { return recovery_state.is_degraded(); } bool is_undersized() const { return recovery_state.is_undersized(); } bool is_scrubbing() const { return state_test(PG_STATE_SCRUBBING); } // Primary only bool is_remapped() const { return recovery_state.is_remapped(); }