Merge pull request #63003 from leonidc/fix_duplicate_entity_addr

author Samuel Just <sjust@redhat.com>

Wed, 11 Jun 2025 15:30:29 +0000 (08:30 -0700)

committer GitHub <noreply@github.com>

Wed, 11 Jun 2025 15:30:29 +0000 (08:30 -0700)
author Samuel Just <sjust@redhat.com>
Wed, 11 Jun 2025 15:30:29 +0000 (08:30 -0700)
committer GitHub <noreply@github.com>
Wed, 11 Jun 2025 15:30:29 +0000 (08:30 -0700)
diff --cc src/common/options/mon.yaml.in
Simple merge
diff --cc src/mon/NVMeofGwMap.cc

index 12bd93cef7466f59c90e26ab4b3ce1f53d34224b,e959f6d20af8868204559c1cb7ea270270c4446a..80d5306051f7da29c6765368d9e228ccd5f2a270
--- 1/src/mon/NVMeofGwMap.cc
--- 2/src/mon/NVMeofGwMap.cc
+++ b/src/mon/NVMeofGwMap.cc
@@@ -238,32 -237,27 +238,51 @@@ void  NVMeofGwMap::gw_performed_startup
     dout(4) << "GW  performed the full startup " << gw_id << dendl;
     propose_pending = true;
     increment_gw_epoch( group_key);
+ +  auto &st = created_gws[group_key][gw_id];
+ +  const auto skip_failovers_sec = g_conf().get_val<std::chrono::seconds>
+ +    ("mon_nvmeofgw_skip_failovers_interval");
+ +  const auto beacon_grace_sec =
+ +    g_conf().get_val<std::chrono::seconds>("mon_nvmeofgw_beacon_grace");
+ + /*
+ +    This is a heuristic that meant to identify "cephadm redeploy" of the nvmeof gws.
+ +    We would like to identify that redeploy is going on, because it helps us to prevent
+ +    redundant failover and failback actions.
+ +    It is very important to minimize fo/fb during redeploy, because during redeploy
+ +    all GWs go down and up again, and the amount of fo/fb that could be driven by that
+ +    is big, which also triggers a lot of changes on the hosts the are nvmeof connected
+ +    to the gws, even up to the point that the host will get stuck.
+ +    This heuristic assumes that if a gw disappears and shows back in less than
+ +    REDEPLOY_TIMEOUT seconds, then it might be that a redeploy started, so we will
+ +    do a failover for this GW, but will not do failover for the next REDEPLOY_TIMEOUT.
+ +    Then again for the next GW that disappears and so on.
+ +    If it works as designed, than regardless of the number of GWs, redeploy will only
+ +    cause one fo/fb. */
+ +  if ((now - (st.last_gw_down_ts - beacon_grace_sec)) < skip_failovers_sec) {
+ +    skip_failovers_for_group(group_key);
+ +    dout(4) << "startup: set skip-failovers for group " << gw_id << " group "
+ +               << group_key << dendl;
+ +  }
   }
   
+ void NVMeofGwMap::set_addr_vect(const NvmeGwId &gw_id,
+     const NvmeGroupKey& group_key, const entity_addr_t &addr) {
+   entity_addrvec_t addrvec(addr);
+   for (auto& gws_states: created_gws[group_key]) {
+      auto &state = gws_states.second;
+      auto &gw_found = gws_states.first;
+      if (state.addr_vect == addrvec && gw_found != gw_id) {
+       /* This can happen when several GWs restart simultaneously and
+        * they got entity_addr that differ from the previous one
+        */
+        entity_addr_t a;
+        state.addr_vect = entity_addrvec_t(a);// cleanup duplicated address
+        dout(4) << "found duplicated addr vect in gw " << gw_found << dendl;
+      }
+   }
+   created_gws[group_key][gw_id].addr_vect = addrvec;
+   dout(10) << "Set addr vect " << addrvec << " for gw " << gw_id << dendl;
+ }
+ 
   void NVMeofGwMap::increment_gw_epoch(const NvmeGroupKey& group_key)
   {
     if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOFHAMAP)) {
diff --cc src/mon/NVMeofGwMon.cc
Simple merge
diff --cc src/nvmeof/NVMeofGwMonitorClient.cc
Simple merge
author	Samuel Just <sjust@redhat.com>
	Wed, 11 Jun 2025 15:30:29 +0000 (08:30 -0700)
committer	GitHub <noreply@github.com>
	Wed, 11 Jun 2025 15:30:29 +0000 (08:30 -0700)
		1	2
src/common/options/mon.yaml.in	patch \|	diff1 \|	diff2 \|	blob \| history
src/mon/NVMeofGwMap.cc	patch \|	diff1 \|	diff2 \|	blob \| history
src/mon/NVMeofGwMon.cc	patch \|	diff1 \|	diff2 \|	blob \| history
src/nvmeof/NVMeofGwMonitorClient.cc	patch \|	diff1 \|	diff2 \|	blob \| history