From: Leonid Chernin Date: Mon, 21 Apr 2025 13:56:07 +0000 (+0300) Subject: mon: fix duplicated entity addr in the map during reboot of several nvvmeof GWs X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=b868dea3bf2f788062a176ff58b695483f6689d5;p=ceph.git mon: fix duplicated entity addr in the map during reboot of several nvvmeof GWs Signed-off-by: Leonid Chernin (cherry picked from commit e55730c073697f66f35d4ee7beb4c9a7dd59dd65) --- diff --git a/src/common/options/mon.yaml.in b/src/common/options/mon.yaml.in index 06db74895b802..2c337fcea7072 100644 --- a/src/common/options/mon.yaml.in +++ b/src/common/options/mon.yaml.in @@ -111,6 +111,13 @@ options: default: 15_min services: - mon +- name: mon_nvmeofgw_wrong_map_ignore_sec + type: uint + level: advanced + desc: Period in seconds from MonClient startup to ignore wrong maps from Monitor + default: 15 + services: + - mon - name: mon_mgr_inactive_grace type: int level: advanced diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 12bd93cef7466..80d5306051f7d 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -264,6 +264,25 @@ void NVMeofGwMap::gw_performed_startup(const NvmeGwId &gw_id, } } +void NVMeofGwMap::set_addr_vect(const NvmeGwId &gw_id, + const NvmeGroupKey& group_key, const entity_addr_t &addr) { + entity_addrvec_t addrvec(addr); + for (auto& gws_states: created_gws[group_key]) { + auto &state = gws_states.second; + auto &gw_found = gws_states.first; + if (state.addr_vect == addrvec && gw_found != gw_id) { + /* This can happen when several GWs restart simultaneously and + * they got entity_addr that differ from the previous one + */ + entity_addr_t a; + state.addr_vect = entity_addrvec_t(a);// cleanup duplicated address + dout(4) << "found duplicated addr vect in gw " << gw_found << dendl; + } + } + created_gws[group_key][gw_id].addr_vect = addrvec; + dout(10) << "Set addr vect " << addrvec << " for gw " << gw_id << dendl; +} + void NVMeofGwMap::increment_gw_epoch(const NvmeGroupKey& group_key) { if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOFHAMAP)) { diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index c3a95d082266f..015577f248ad7 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -87,6 +87,8 @@ public: const NvmeGroupKey& group_key, bool &map_modified); void gw_performed_startup(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, bool &propose_pending); + void set_addr_vect(const NvmeGwId &gw_id, + const NvmeGroupKey& group_key, const entity_addr_t &addr_vect); void skip_failovers_for_group(const NvmeGroupKey& group_key); private: int do_delete_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key); diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index e41060084eae9..5c5d4f7194fc2 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -680,8 +680,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op) false) { pending_map.created_gws[group_key][gw_id].performed_full_startup = true; pending_map.gw_performed_startup(gw_id, group_key, gw_propose); - pending_map.created_gws[group_key][gw_id].addr_vect = - entity_addrvec_t(con->get_peer_addr()); + pending_map.set_addr_vect(gw_id, group_key, con->get_peer_addr()); } LastBeacon lb = {gw_id, group_key}; last_beacon[lb] = now; //Update last beacon @@ -730,8 +729,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op) dout(4) << "Warning: entity addr need to set for GW client " << gw_id << " was " << pending_map.created_gws[group_key][gw_id].addr_vect << " now " << entity_addrvec_t(con->get_peer_addr()) << dendl; - pending_map.created_gws[group_key][gw_id].addr_vect = - entity_addrvec_t(con->get_peer_addr()); + pending_map.set_addr_vect(gw_id, group_key, con->get_peer_addr()); gw_propose = true; } // deep copy the whole nonce map of this GW diff --git a/src/nvmeof/NVMeofGwMonitorClient.cc b/src/nvmeof/NVMeofGwMonitorClient.cc index ae5562a0718b7..0b798c370a2e7 100644 --- a/src/nvmeof/NVMeofGwMonitorClient.cc +++ b/src/nvmeof/NVMeofGwMonitorClient.cc @@ -39,6 +39,7 @@ NVMeofGwMonitorClient::NVMeofGwMonitorClient(int argc, const char **argv) : osdmap_epoch(0), gwmap_epoch(0), last_map_time(std::chrono::steady_clock::now()), + reset_timestamp(std::chrono::steady_clock::now()), monc{g_ceph_context, poolctx}, client_messenger(Messenger::create(g_ceph_context, "async", entity_name_t::CLIENT(-1), "client", getpid())), objecter{g_ceph_context, client_messenger.get(), &monc, poolctx}, @@ -304,18 +305,32 @@ void NVMeofGwMonitorClient::shutdown() void NVMeofGwMonitorClient::handle_nvmeof_gw_map(ceph::ref_t nmap) { - last_map_time = std::chrono::steady_clock::now(); // record time of last monitor message + auto now = std::chrono::steady_clock::now(); + last_map_time = now; // record time of last monitor message auto &new_map = nmap->get_map(); gwmap_epoch = nmap->get_gwmap_epoch(); auto group_key = std::make_pair(pool, group); dout(10) << "handle nvmeof gw map: " << new_map << dendl; - + uint64_t reset_elapsed_seconds = + std::chrono::duration_cast(now - reset_timestamp).count(); NvmeGwClientState old_gw_state; + uint64_t ignore_wrong_map_interval_sec = + g_conf().get_val("mon_nvmeofgw_wrong_map_ignore_sec"); auto got_old_gw_state = get_gw_state("old map", map, group_key, name, old_gw_state); NvmeGwClientState new_gw_state; auto got_new_gw_state = get_gw_state("new map", new_map, group_key, name, new_gw_state); + /*It is possible that wrong second map would be sent by monitor in rear cases when several GWs doing reboot + * and entity_address of the monitor client changes. So Monitor may send the unicast map to the wrong destination + * since this "old" address still appears in its map. It is asynchronous process in the monitor, better to protect + * from this scenario by silently ignoring the wrong map. This can happen just in the first several seconds after restart + */ + if ( (reset_elapsed_seconds < ignore_wrong_map_interval_sec) && + !got_new_gw_state && got_old_gw_state) { + dout(4) << "Wrong map received, Ignore it" << dendl; + return; + } // ensure that the gateway state has not vanished ceph_assert(got_new_gw_state || !got_old_gw_state); diff --git a/src/nvmeof/NVMeofGwMonitorClient.h b/src/nvmeof/NVMeofGwMonitorClient.h index dc5fcbce2b2ce..546fff27db7eb 100644 --- a/src/nvmeof/NVMeofGwMonitorClient.h +++ b/src/nvmeof/NVMeofGwMonitorClient.h @@ -45,6 +45,9 @@ private: epoch_t gwmap_epoch; // last received gw map epoch std::chrono::time_point last_map_time; // used to panic on disconnect + std::chrono::time_point + reset_timestamp; // used to bypass some validations + bool first_beacon = true; // init gw ssl opts void init_gw_ssl_opts();