]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
nvmeofgw: delay failback 67717/head
authorLeonid Chernin <leonidc@il.ibm.com>
Wed, 4 Mar 2026 11:35:07 +0000 (13:35 +0200)
committerLeonid Chernin <leonidc@il.ibm.com>
Tue, 10 Mar 2026 08:19:48 +0000 (10:19 +0200)
 Fix issue with ESX NVMeof initiators:
 To prevent IO delays during GW failback add configurable failback
 delay. Failback process for recovered GW would be delayed by configured
 number of seconds.
 The default value is 0 - no delays need when Host is Linux.

 tracker: https://tracker.ceph.com/issues/75427

Signed-off-by: Leonid Chernin <leonidc@il.ibm.com>
src/common/options/mon.yaml.in
src/mon/NVMeofGwMap.cc
src/mon/NVMeofGwMon.cc
src/mon/NVMeofGwTypes.h

index 9e9554b60df0c8c5bc65a34a3f9cf2416e2e953c..b3c8cf015e13cc6f7a27cd0529fceddfd1443d99 100644 (file)
@@ -127,6 +127,13 @@ options:
   default: 15_min
   services:
   - mon
+- name: mon_nvmeofgw_failback_delay
+  type: secs
+  level: advanced
+  desc: Period in seconds to delay HA failback of the gateway
+  default: 0
+  services:
+  - mon
 - name: mon_nvmeofgw_wrong_map_ignore_sec
   type: uint
   level: advanced
index b921ccb5519f7fa922b6d5c43c44d2c3360d5bca..2295673da6b0c9e551fc212e806b1a4212422eea 100755 (executable)
@@ -461,7 +461,15 @@ void NVMeofGwMap::process_gw_map_ka(
     } else {
       //========= prepare to Failback to this GW =========
       // find the GW that took over on the group st.ana_grp_id
-      find_failback_gw(gw_id, group_key, propose_pending);
+      std::chrono::seconds failback_delay = g_conf().get_val<std::chrono::seconds>
+                           ("mon_nvmeofgw_failback_delay");
+      if (failback_delay == std::chrono::seconds{0}) {
+        find_failback_gw(gw_id, group_key, propose_pending);
+      } else {
+        st.delay_failbacks_ts = std::chrono::system_clock::now() + failback_delay;
+        dout(4) << "failback delay " << failback_delay
+                << " set for gw "<< gw_id << dendl;
+      }
     }
   } else if (st.availability == gw_availability_t::GW_AVAILABLE) {
     for (auto& state_itr: created_gws[group_key][gw_id].sm_state) {
@@ -479,6 +487,8 @@ void NVMeofGwMap::process_gw_map_ka(
 void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose)
 {
   propose = false;
+  std::chrono::system_clock::time_point now =
+           std::chrono::system_clock::now();
   for (auto& group_state: created_gws) {
     auto& group_key = group_state.first;
     auto& gws_states = group_state.second;
@@ -517,7 +527,13 @@ void NVMeofGwMap::handle_abandoned_ana_groups(bool& propose)
                  gw_states_per_group_t::GW_STANDBY_STATE)) {
        // 2. Failback missed: Check this GW is Available and Standby and
        // no other GW is doing Failback to it
+
+  if (state.delay_failbacks_ts < now) {
        find_failback_gw(gw_id, group_key, propose);
+  } else {
+    dout(4) << "failback not allowed for GW "<< gw_id
+            << " failback delay  not expired yet" << dendl;
+  }
       }
     }
     if (propose) {
index 785b629fc45699173906f99fc693c789597a42f0..576c84044b157de20f7a5cf04471e26fcd91e60d 100644 (file)
@@ -208,6 +208,8 @@ void NVMeofGwMon::restore_pending_map_info(NVMeofGwMap & tmp_map) {
       }
       pending_map.created_gws[group_key][gw_id].last_gw_down_ts =
           gw_created_pair.second.last_gw_down_ts;
+      pending_map.created_gws[group_key][gw_id].delay_failbacks_ts =
+          gw_created_pair.second.delay_failbacks_ts;
       pending_map.created_gws[group_key][gw_id].last_gw_map_epoch_valid =
          gw_created_pair.second.last_gw_map_epoch_valid;
       pending_map.created_gws[group_key][gw_id].beacon_index =
index cd22dcbc4fe6a4250295e8fb23ba017580d51063..7febc9a960eac6b1756d3519a77dda8a4f634d7e 100755 (executable)
@@ -171,6 +171,8 @@ struct NvmeGwMonState {
              std::chrono::system_clock::now();
   std::chrono::system_clock::time_point last_gw_down_ts =
              std::chrono::system_clock::now() - std::chrono::seconds(30);
+  std::chrono::system_clock::time_point delay_failbacks_ts =
+             std::chrono::system_clock::now();
   NvmeGwMonState(): ana_grp_id(REDUNDANT_GW_ANA_GROUP_ID) {}
 
   NvmeGwMonState(NvmeAnaGrpId id)