]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: fix duplicated entity addr in the map during reboot of several nvvmeof GWs 64257/head
authorLeonid Chernin <leonidc@il.ibm.com>
Mon, 21 Apr 2025 13:56:07 +0000 (16:56 +0300)
committerLeonid Chernin <leonidc@il.ibm.com>
Mon, 30 Jun 2025 08:08:28 +0000 (11:08 +0300)
Signed-off-by: Leonid Chernin <leonidc@il.ibm.com>
(cherry picked from commit e55730c073697f66f35d4ee7beb4c9a7dd59dd65)

src/common/options/mon.yaml.in
src/mon/NVMeofGwMap.cc
src/mon/NVMeofGwMap.h
src/mon/NVMeofGwMon.cc
src/nvmeof/NVMeofGwMonitorClient.cc
src/nvmeof/NVMeofGwMonitorClient.h

index 06db74895b80290d631f6cbde98d0bd1dd4f2a05..2c337fcea70721b73abd548bb9657fcd15300bb1 100644 (file)
@@ -111,6 +111,13 @@ options:
   default: 15_min
   services:
   - mon
+- name: mon_nvmeofgw_wrong_map_ignore_sec
+  type: uint
+  level: advanced
+  desc: Period in seconds from MonClient startup to ignore wrong maps from Monitor
+  default: 15
+  services:
+  - mon
 - name: mon_mgr_inactive_grace
   type: int
   level: advanced
index 12bd93cef7466f59c90e26ab4b3ce1f53d34224b..80d5306051f7da29c6765368d9e228ccd5f2a270 100755 (executable)
@@ -264,6 +264,25 @@ void  NVMeofGwMap::gw_performed_startup(const NvmeGwId &gw_id,
   }
 }
 
+void NVMeofGwMap::set_addr_vect(const NvmeGwId &gw_id,
+    const NvmeGroupKey& group_key, const entity_addr_t &addr) {
+  entity_addrvec_t addrvec(addr);
+  for (auto& gws_states: created_gws[group_key]) {
+     auto &state = gws_states.second;
+     auto &gw_found = gws_states.first;
+     if (state.addr_vect == addrvec && gw_found != gw_id) {
+      /* This can happen when several GWs restart simultaneously and
+       * they got entity_addr that differ from the previous one
+       */
+       entity_addr_t a;
+       state.addr_vect = entity_addrvec_t(a);// cleanup duplicated address
+       dout(4) << "found duplicated addr vect in gw " << gw_found << dendl;
+     }
+  }
+  created_gws[group_key][gw_id].addr_vect = addrvec;
+  dout(10) << "Set addr vect " << addrvec << " for gw " << gw_id << dendl;
+}
+
 void NVMeofGwMap::increment_gw_epoch(const NvmeGroupKey& group_key)
 {
   if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOFHAMAP)) {
index c3a95d082266fd5536e01efdded1b81fe5134eac..015577f248ad7bf97ecdbe4fe818ea2d0266f050 100755 (executable)
@@ -87,6 +87,8 @@ public:
        const NvmeGroupKey& group_key, bool &map_modified);
   void gw_performed_startup(const NvmeGwId &gw_id,
        const NvmeGroupKey& group_key, bool &propose_pending);
+  void set_addr_vect(const NvmeGwId &gw_id,
+      const NvmeGroupKey& group_key, const entity_addr_t &addr_vect);
   void skip_failovers_for_group(const NvmeGroupKey& group_key);
 private:
   int  do_delete_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key);
index e41060084eae95a9d6b42ef0cc88d196c4bc0896..5c5d4f7194fc2a24fac45c6cc1ae9bd9b97a7bdb 100644 (file)
@@ -680,8 +680,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
        false) {
        pending_map.created_gws[group_key][gw_id].performed_full_startup = true;
        pending_map.gw_performed_startup(gw_id, group_key, gw_propose);
-       pending_map.created_gws[group_key][gw_id].addr_vect =
-           entity_addrvec_t(con->get_peer_addr());
+       pending_map.set_addr_vect(gw_id, group_key, con->get_peer_addr());
       }
       LastBeacon lb = {gw_id, group_key};
       last_beacon[lb] = now; //Update last beacon
@@ -730,8 +729,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
     dout(4) << "Warning: entity addr need to set for GW client " << gw_id
       << " was " <<  pending_map.created_gws[group_key][gw_id].addr_vect
       << " now " << entity_addrvec_t(con->get_peer_addr()) << dendl;
-    pending_map.created_gws[group_key][gw_id].addr_vect =
-      entity_addrvec_t(con->get_peer_addr());
+    pending_map.set_addr_vect(gw_id, group_key, con->get_peer_addr());
     gw_propose = true;
   }
   // deep copy the whole nonce map of this GW
index ae5562a0718b775def7c130edf7e363f32e8e7cc..0b798c370a2e764f68008fff03c04bdd6a8d847e 100644 (file)
@@ -39,6 +39,7 @@ NVMeofGwMonitorClient::NVMeofGwMonitorClient(int argc, const char **argv) :
   osdmap_epoch(0),
   gwmap_epoch(0),
   last_map_time(std::chrono::steady_clock::now()),
+  reset_timestamp(std::chrono::steady_clock::now()),
   monc{g_ceph_context, poolctx},
   client_messenger(Messenger::create(g_ceph_context, "async", entity_name_t::CLIENT(-1), "client", getpid())),
   objecter{g_ceph_context, client_messenger.get(), &monc, poolctx},
@@ -304,18 +305,32 @@ void NVMeofGwMonitorClient::shutdown()
 
 void NVMeofGwMonitorClient::handle_nvmeof_gw_map(ceph::ref_t<MNVMeofGwMap> nmap)
 {
-  last_map_time = std::chrono::steady_clock::now(); // record time of last monitor message
+  auto now = std::chrono::steady_clock::now();
+  last_map_time = now; // record time of last monitor message
 
   auto &new_map = nmap->get_map();
   gwmap_epoch = nmap->get_gwmap_epoch();
   auto group_key = std::make_pair(pool, group);
   dout(10) << "handle nvmeof gw map: " << new_map << dendl;
-
+  uint64_t reset_elapsed_seconds =
+      std::chrono::duration_cast<std::chrono::seconds>(now - reset_timestamp).count();
   NvmeGwClientState old_gw_state;
+  uint64_t ignore_wrong_map_interval_sec =
+       g_conf().get_val<uint64_t>("mon_nvmeofgw_wrong_map_ignore_sec");
   auto got_old_gw_state = get_gw_state("old map", map, group_key, name, old_gw_state); 
   NvmeGwClientState new_gw_state;
   auto got_new_gw_state = get_gw_state("new map", new_map, group_key, name, new_gw_state); 
 
+  /*It is possible that wrong second map would be sent by monitor in rear cases when several GWs doing reboot
+  * and entity_address of the monitor client changes. So Monitor may send the unicast map to the wrong destination
+  * since this "old" address still appears in its map. It is asynchronous process in the monitor, better to protect
+  * from this scenario by silently ignoring the wrong map. This can happen just in the first several seconds after restart
+  */
+  if ( (reset_elapsed_seconds < ignore_wrong_map_interval_sec) &&
+        !got_new_gw_state && got_old_gw_state) {
+    dout(4) << "Wrong map received, Ignore it" << dendl;
+    return;
+  }
   // ensure that the gateway state has not vanished
   ceph_assert(got_new_gw_state || !got_old_gw_state);
 
index dc5fcbce2b2ce45c969545b1f39c8814d906816f..546fff27db7eb21c1593c5f763bc2eb957963e0a 100644 (file)
@@ -45,6 +45,9 @@ private:
   epoch_t     gwmap_epoch;  // last received gw map epoch
   std::chrono::time_point<std::chrono::steady_clock>
               last_map_time; // used to panic on disconnect
+  std::chrono::time_point<std::chrono::steady_clock>
+                reset_timestamp; // used to bypass some validations
+
   bool first_beacon = true;
   // init gw ssl opts
   void init_gw_ssl_opts();