From: sajibreadd Date: Thu, 29 Aug 2024 19:00:23 +0000 (+0600) Subject: osd_recovery_sleep_degraded, osd_recovery_sleep_degraded_ssd, osd_recovery_sleep_degr... X-Git-Tag: v20.3.0~381^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=f4360508cc3dded91a73c7b98c4626f827ba5d91;p=ceph.git osd_recovery_sleep_degraded, osd_recovery_sleep_degraded_ssd, osd_recovery_sleep_degraded_hdd added in the configuration to throttle the data movement while recovery when pg is degraded Fixes: https://tracker.ceph.com/issues/67700 Signed-off-by: Md Mahamudur Rahaman Sajib --- diff --git a/doc/rados/configuration/mclock-config-ref.rst b/doc/rados/configuration/mclock-config-ref.rst index 58de3e54bfef9..c1287a21f1b53 100644 --- a/doc/rados/configuration/mclock-config-ref.rst +++ b/doc/rados/configuration/mclock-config-ref.rst @@ -292,6 +292,10 @@ sleep options are disabled (set to 0), - :confval:`osd_recovery_sleep_hdd` - :confval:`osd_recovery_sleep_ssd` - :confval:`osd_recovery_sleep_hybrid` +- :confval:`osd_recovery_sleep_degraded` +- :confval:`osd_recovery_sleep_degraded_hdd` +- :confval:`osd_recovery_sleep_degraded_ssd` +- :confval:`osd_recovery_sleep_degraded_hybrid` - :confval:`osd_scrub_sleep` - :confval:`osd_delete_sleep` - :confval:`osd_delete_sleep_hdd` diff --git a/doc/rados/configuration/osd-config-ref.rst b/doc/rados/configuration/osd-config-ref.rst index 23efa797773aa..df527f99fbd2d 100644 --- a/doc/rados/configuration/osd-config-ref.rst +++ b/doc/rados/configuration/osd-config-ref.rst @@ -431,6 +431,10 @@ perform well in a degraded state. .. confval:: osd_recovery_sleep_hdd .. confval:: osd_recovery_sleep_ssd .. confval:: osd_recovery_sleep_hybrid +.. confval:: osd_recovery_sleep_degraded +.. confval:: osd_recovery_sleep_degraded_hdd +.. confval:: osd_recovery_sleep_degraded_ssd +.. confval:: osd_recovery_sleep_degraded_hybrid .. confval:: osd_recovery_priority Tiering diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in index 2be1c08f934c8..4dc60d33ff01d 100644 --- a/src/common/options/osd.yaml.in +++ b/src/common/options/osd.yaml.in @@ -140,6 +140,51 @@ options: - osd_recovery_sleep flags: - runtime +- name: osd_recovery_sleep_degraded + type: float + level: advanced + desc: Time in seconds to sleep before next recovery or backfill op when PGs are degraded. + This setting overrides _ssd, _hdd, and _hybrid if non-zero. + fmt_desc: Time in seconds to sleep before the next recovery or backfill op when PGs + are degraded. Increasing this value will slow down recovery ops while client + ops will be less impacted. + default: 0 + flags: + - runtime +- name: osd_recovery_sleep_degraded_hdd + type: float + level: advanced + desc: Time in seconds to sleep before next recovery or backfill op for HDDs + when PGs is degraded. + fmt_desc: Time in seconds to sleep before next recovery or backfill op + for HDDs when PGs are degraded. + default: 0.1 + flags: + - runtime +- name: osd_recovery_sleep_degraded_ssd + type: float + level: advanced + desc: Time in seconds to sleep before next recovery or backfill op for SSDs + when PGs are degraded. + fmt_desc: Time in seconds to sleep before the next recovery or backfill op + for SSDs when PGs are degraded. + default: 0 + see_also: + - osd_recovery_sleep_degraded + flags: + - runtime +- name: osd_recovery_sleep_degraded_hybrid + type: float + level: advanced + desc: Time in seconds to sleep before next recovery or backfill op when PGs + are degraded and data is on HDD and journal is on SSD + fmt_desc: Time in seconds to sleep before the next recovery or backfill op when + PGs are degraded and OSD data is on HDD and OSD journal / WAL+DB is on SSD. + default: 0.025 + see_also: + - osd_recovery_sleep_degraded + flags: + - runtime - name: osd_snap_trim_sleep type: float level: advanced diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index af3bd8fe0e8cf..54167bc1df33f 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3681,6 +3681,21 @@ float OSD::get_osd_recovery_sleep() return cct->_conf->osd_recovery_sleep_hdd; } +float OSD::get_osd_recovery_sleep_degraded() { + float osd_recovery_sleep_degraded = + cct->_conf.get_val("osd_recovery_sleep_degraded"); + if (osd_recovery_sleep_degraded > 0) { + return osd_recovery_sleep_degraded; + } + if (!store_is_rotational && !journal_is_rotational) { + return cct->_conf.get_val("osd_recovery_sleep_degraded_ssd"); + } else if (store_is_rotational && !journal_is_rotational) { + return cct->_conf.get_val("osd_recovery_sleep_degraded_hybrid"); + } else { + return cct->_conf.get_val("osd_recovery_sleep_degraded_hdd"); + } +} + float OSD::get_osd_delete_sleep() { float osd_delete_sleep = cct->_conf.get_val("osd_delete_sleep"); @@ -9703,9 +9718,12 @@ void OSD::do_recovery( * ops are scheduled after osd_recovery_sleep amount of time from the previous * recovery event's schedule time. This is done by adding a * recovery_requeue_callback event, which re-queues the recovery op using - * queue_recovery_after_sleep. + * queue_recovery_after_sleep. (osd_recovery_sleep_degraded will be + * used instead of osd_recovery_sleep when pg is degraded) */ - float recovery_sleep = get_osd_recovery_sleep(); + float recovery_sleep = pg->is_degraded() + ? get_osd_recovery_sleep_degraded() + : get_osd_recovery_sleep(); { std::lock_guard l(service.sleep_lock); if (recovery_sleep > 0 && service.recovery_needs_sleep) { @@ -10014,6 +10032,10 @@ std::vector OSD::get_tracked_keys() const noexcept "osd_recovery_sleep_hdd"s, "osd_recovery_sleep_ssd"s, "osd_recovery_sleep_hybrid"s, + "osd_recovery_sleep_degraded"s, + "osd_recovery_sleep_degraded_hdd"s, + "osd_recovery_sleep_degraded_ssd"s, + "osd_recovery_sleep_degraded_hybrid"s, "osd_delete_sleep"s, "osd_delete_sleep_hdd"s, "osd_delete_sleep_ssd"s, @@ -10079,7 +10101,11 @@ void OSD::handle_conf_change(const ConfigProxy& conf, changed.count("osd_recovery_sleep") || changed.count("osd_recovery_sleep_hdd") || changed.count("osd_recovery_sleep_ssd") || - changed.count("osd_recovery_sleep_hybrid")) { + changed.count("osd_recovery_sleep_hybrid") || + changed.count("osd_recovery_sleep_degraded") || + changed.count("osd_recovery_sleep_degraded_hdd") || + changed.count("osd_recovery_sleep_degraded_ssd") || + changed.count("osd_recovery_sleep_degraded_hybrid")) { maybe_override_sleep_options_for_qos(); } if (changed.count("osd_min_recovery_priority")) { @@ -10411,6 +10437,12 @@ void OSD::maybe_override_sleep_options_for_qos() cct->_conf.set_val("osd_recovery_sleep_ssd", std::to_string(0)); cct->_conf.set_val("osd_recovery_sleep_hybrid", std::to_string(0)); + // Disable recovery sleep for pg degraded + cct->_conf.set_val("osd_recovery_sleep_degraded", std::to_string(0)); + cct->_conf.set_val("osd_recovery_sleep_degraded_hdd", std::to_string(0)); + cct->_conf.set_val("osd_recovery_sleep_degraded_ssd", std::to_string(0)); + cct->_conf.set_val("osd_recovery_sleep_degraded_hybrid", std::to_string(0)); + // Disable delete sleep cct->_conf.set_val("osd_delete_sleep", std::to_string(0)); cct->_conf.set_val("osd_delete_sleep_hdd", std::to_string(0)); diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 0a4928fc20af6..1c8c4107b2767 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -2017,6 +2017,7 @@ private: int get_num_op_threads(); float get_osd_recovery_sleep(); + float get_osd_recovery_sleep_degraded(); float get_osd_delete_sleep(); float get_osd_snap_trim_sleep(); diff --git a/src/osd/PG.h b/src/osd/PG.h index bb8caa36b9544..d27d8196ad346 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1241,6 +1241,7 @@ protected: public: int pg_stat_adjust(osd_stat_t *new_stat); + bool is_degraded() const { return recovery_state.is_degraded(); } protected: bool delete_needs_sleep = false; @@ -1264,7 +1265,6 @@ protected: bool is_backfill_unfound() const { return recovery_state.is_backfill_unfound(); } bool is_incomplete() const { return recovery_state.is_incomplete(); } bool is_clean() const { return recovery_state.is_clean(); } - bool is_degraded() const { return recovery_state.is_degraded(); } bool is_undersized() const { return recovery_state.is_undersized(); } bool is_scrubbing() const { return state_test(PG_STATE_SCRUBBING); } // Primary only bool is_remapped() const { return recovery_state.is_remapped(); }