From: Samuel Just Date: Wed, 11 Jun 2025 15:30:29 +0000 (-0700) Subject: Merge pull request #63003 from leonidc/fix_duplicate_entity_addr X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8fb15d16eed476982fa79341a94d2719123c8781;p=ceph.git Merge pull request #63003 from leonidc/fix_duplicate_entity_addr fix duplicated entity addr in the map during reboot of several GWs Reviewed-by: Samuel Just --- 8fb15d16eed476982fa79341a94d2719123c8781 diff --cc src/mon/NVMeofGwMap.cc index 12bd93cef746,e959f6d20af8..80d5306051f7 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@@ -238,32 -237,27 +238,51 @@@ void NVMeofGwMap::gw_performed_startup dout(4) << "GW performed the full startup " << gw_id << dendl; propose_pending = true; increment_gw_epoch( group_key); + auto &st = created_gws[group_key][gw_id]; + const auto skip_failovers_sec = g_conf().get_val + ("mon_nvmeofgw_skip_failovers_interval"); + const auto beacon_grace_sec = + g_conf().get_val("mon_nvmeofgw_beacon_grace"); + /* + This is a heuristic that meant to identify "cephadm redeploy" of the nvmeof gws. + We would like to identify that redeploy is going on, because it helps us to prevent + redundant failover and failback actions. + It is very important to minimize fo/fb during redeploy, because during redeploy + all GWs go down and up again, and the amount of fo/fb that could be driven by that + is big, which also triggers a lot of changes on the hosts the are nvmeof connected + to the gws, even up to the point that the host will get stuck. + This heuristic assumes that if a gw disappears and shows back in less than + REDEPLOY_TIMEOUT seconds, then it might be that a redeploy started, so we will + do a failover for this GW, but will not do failover for the next REDEPLOY_TIMEOUT. + Then again for the next GW that disappears and so on. + If it works as designed, than regardless of the number of GWs, redeploy will only + cause one fo/fb. */ + if ((now - (st.last_gw_down_ts - beacon_grace_sec)) < skip_failovers_sec) { + skip_failovers_for_group(group_key); + dout(4) << "startup: set skip-failovers for group " << gw_id << " group " + << group_key << dendl; + } } + void NVMeofGwMap::set_addr_vect(const NvmeGwId &gw_id, + const NvmeGroupKey& group_key, const entity_addr_t &addr) { + entity_addrvec_t addrvec(addr); + for (auto& gws_states: created_gws[group_key]) { + auto &state = gws_states.second; + auto &gw_found = gws_states.first; + if (state.addr_vect == addrvec && gw_found != gw_id) { + /* This can happen when several GWs restart simultaneously and + * they got entity_addr that differ from the previous one + */ + entity_addr_t a; + state.addr_vect = entity_addrvec_t(a);// cleanup duplicated address + dout(4) << "found duplicated addr vect in gw " << gw_found << dendl; + } + } + created_gws[group_key][gw_id].addr_vect = addrvec; + dout(10) << "Set addr vect " << addrvec << " for gw " << gw_id << dendl; + } + void NVMeofGwMap::increment_gw_epoch(const NvmeGroupKey& group_key) { if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOFHAMAP)) {