From: Leonid Chernin Date: Wed, 4 Mar 2026 11:35:07 +0000 (+0200) Subject: nvmeofgw: delay failback X-Git-Tag: v21.0.1~88^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0933ace3856fde314751e65e6c1a41027a69e77a;p=ceph.git nvmeofgw: delay failback Fix issue with ESX NVMeof initiators: To prevent IO delays during GW failback add configurable failback delay. Failback process for recovered GW would be delayed by configured number of seconds. The default value is 0 - no delays need when Host is Linux. tracker: https://tracker.ceph.com/issues/75427 Signed-off-by: Leonid Chernin --- diff --git a/src/common/options/mon.yaml.in b/src/common/options/mon.yaml.in index 9e9554b60df0..b3c8cf015e13 100644 --- a/src/common/options/mon.yaml.in +++ b/src/common/options/mon.yaml.in @@ -127,6 +127,13 @@ options: default: 15_min services: - mon +- name: mon_nvmeofgw_failback_delay + type: secs + level: advanced + desc: Period in seconds to delay HA failback of the gateway + default: 0 + services: + - mon - name: mon_nvmeofgw_wrong_map_ignore_sec type: uint level: advanced diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index b921ccb5519f..2295673da6b0 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -461,7 +461,15 @@ void NVMeofGwMap::process_gw_map_ka( } else { //========= prepare to Failback to this GW ========= // find the GW that took over on the group st.ana_grp_id - find_failback_gw(gw_id, group_key, propose_pending); + std::chrono::seconds failback_delay = g_conf().get_val + ("mon_nvmeofgw_failback_delay"); + if (failback_delay == std::chrono::seconds{0}) { + find_failback_gw(gw_id, group_key, propose_pending); + } else { + st.delay_failbacks_ts = std::chrono::system_clock::now() + failback_delay; + dout(4) << "failback delay " << failback_delay + << " set for gw "<< gw_id << dendl; + } } } else if (st.availability == gw_availability_t::GW_AVAILABLE) { for (auto& state_itr: created_gws[group_key][gw_id].sm_state) { @@ -479,6 +487,8 @@ void NVMeofGwMap::process_gw_map_ka( void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose) { propose = false; + std::chrono::system_clock::time_point now = + std::chrono::system_clock::now(); for (auto& group_state: created_gws) { auto& group_key = group_state.first; auto& gws_states = group_state.second; @@ -517,7 +527,13 @@ void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose) gw_states_per_group_t::GW_STANDBY_STATE)) { // 2. Failback missed: Check this GW is Available and Standby and // no other GW is doing Failback to it + + if (state.delay_failbacks_ts < now) { find_failback_gw(gw_id, group_key, propose); + } else { + dout(4) << "failback not allowed for GW "<< gw_id + << " failback delay not expired yet" << dendl; + } } } if (propose) { diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 785b629fc456..576c84044b15 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -208,6 +208,8 @@ void NVMeofGwMon::restore_pending_map_info(NVMeofGwMap & tmp_map) { } pending_map.created_gws[group_key][gw_id].last_gw_down_ts = gw_created_pair.second.last_gw_down_ts; + pending_map.created_gws[group_key][gw_id].delay_failbacks_ts = + gw_created_pair.second.delay_failbacks_ts; pending_map.created_gws[group_key][gw_id].last_gw_map_epoch_valid = gw_created_pair.second.last_gw_map_epoch_valid; pending_map.created_gws[group_key][gw_id].beacon_index = diff --git a/src/mon/NVMeofGwTypes.h b/src/mon/NVMeofGwTypes.h index cd22dcbc4fe6..7febc9a960ea 100755 --- a/src/mon/NVMeofGwTypes.h +++ b/src/mon/NVMeofGwTypes.h @@ -171,6 +171,8 @@ struct NvmeGwMonState { std::chrono::system_clock::now(); std::chrono::system_clock::time_point last_gw_down_ts = std::chrono::system_clock::now() - std::chrono::seconds(30); + std::chrono::system_clock::time_point delay_failbacks_ts = + std::chrono::system_clock::now(); NvmeGwMonState(): ana_grp_id(REDUNDANT_GW_ANA_GROUP_ID) {} NvmeGwMonState(NvmeAnaGrpId id)