]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: fix duplicated entity addr in the map during reboot of several nvvmeof GWs 63003/head
authorLeonid Chernin <leonidc@il.ibm.com>
Mon, 21 Apr 2025 13:56:07 +0000 (16:56 +0300)
committerLeonid Chernin <leonidc@il.ibm.com>
Thu, 8 May 2025 06:23:26 +0000 (09:23 +0300)
Signed-off-by: Leonid Chernin <leonidc@il.ibm.com>
src/common/options/mon.yaml.in
src/mon/NVMeofGwMap.cc
src/mon/NVMeofGwMap.h
src/mon/NVMeofGwMon.cc
src/nvmeof/NVMeofGwMonitorClient.cc
src/nvmeof/NVMeofGwMonitorClient.h

index db2b6f6afccd9e241fac8bdebe434a3b836cab6f..337162ad427a06b0e2f9b39f2b1be068f5d326e1 100644 (file)
@@ -110,6 +110,13 @@ options:
   default: 15_min
   services:
   - mon
+- name: mon_nvmeofgw_wrong_map_ignore_sec
+  type: uint
+  level: advanced
+  desc: Period in seconds from MonClient startup to ignore wrong maps from Monitor
+  default: 15
+  services:
+  - mon
 - name: mon_mgr_inactive_grace
   type: int
   level: advanced
index 8a9113a3a26b67ec4ed736a102a9d0294fef840c..e959f6d20af8868204559c1cb7ea270270c4446a 100755 (executable)
@@ -239,6 +239,25 @@ void  NVMeofGwMap::gw_performed_startup(const NvmeGwId &gw_id,
   increment_gw_epoch( group_key);
 }
 
+void NVMeofGwMap::set_addr_vect(const NvmeGwId &gw_id,
+    const NvmeGroupKey& group_key, const entity_addr_t &addr) {
+  entity_addrvec_t addrvec(addr);
+  for (auto& gws_states: created_gws[group_key]) {
+     auto &state = gws_states.second;
+     auto &gw_found = gws_states.first;
+     if (state.addr_vect == addrvec && gw_found != gw_id) {
+      /* This can happen when several GWs restart simultaneously and
+       * they got entity_addr that differ from the previous one
+       */
+       entity_addr_t a;
+       state.addr_vect = entity_addrvec_t(a);// cleanup duplicated address
+       dout(4) << "found duplicated addr vect in gw " << gw_found << dendl;
+     }
+  }
+  created_gws[group_key][gw_id].addr_vect = addrvec;
+  dout(10) << "Set addr vect " << addrvec << " for gw " << gw_id << dendl;
+}
+
 void NVMeofGwMap::increment_gw_epoch(const NvmeGroupKey& group_key)
 {
   if (HAVE_FEATURE(mon->get_quorum_con_features(), NVMEOFHAMAP)) {
index c3a95d082266fd5536e01efdded1b81fe5134eac..015577f248ad7bf97ecdbe4fe818ea2d0266f050 100755 (executable)
@@ -87,6 +87,8 @@ public:
        const NvmeGroupKey& group_key, bool &map_modified);
   void gw_performed_startup(const NvmeGwId &gw_id,
        const NvmeGroupKey& group_key, bool &propose_pending);
+  void set_addr_vect(const NvmeGwId &gw_id,
+      const NvmeGroupKey& group_key, const entity_addr_t &addr_vect);
   void skip_failovers_for_group(const NvmeGroupKey& group_key);
 private:
   int  do_delete_gw(const NvmeGwId &gw_id, const NvmeGroupKey& group_key);
index 1e5efad1810dacc93adc1a732f618f5f4bea56e2..64877f474bee9ab775922810d08e1cda07b26a79 100644 (file)
@@ -678,8 +678,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
        false) {
        pending_map.created_gws[group_key][gw_id].performed_full_startup = true;
        pending_map.gw_performed_startup(gw_id, group_key, gw_propose);
-       pending_map.created_gws[group_key][gw_id].addr_vect =
-           entity_addrvec_t(con->get_peer_addr());
+       pending_map.set_addr_vect(gw_id, group_key, con->get_peer_addr());
       }
       LastBeacon lb = {gw_id, group_key};
       last_beacon[lb] = now; //Update last beacon
@@ -728,8 +727,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
     dout(4) << "Warning: entity addr need to set for GW client " << gw_id
       << " was " <<  pending_map.created_gws[group_key][gw_id].addr_vect
       << " now " << entity_addrvec_t(con->get_peer_addr()) << dendl;
-    pending_map.created_gws[group_key][gw_id].addr_vect =
-      entity_addrvec_t(con->get_peer_addr());
+    pending_map.set_addr_vect(gw_id, group_key, con->get_peer_addr());
     gw_propose = true;
   }
   // deep copy the whole nonce map of this GW
index a48b85d960520dda886c3baab1cb2956bd516653..686b79719db2df3f172eec44efda5a98bb1c0f78 100644 (file)
@@ -39,6 +39,7 @@ NVMeofGwMonitorClient::NVMeofGwMonitorClient(int argc, const char **argv) :
   osdmap_epoch(0),
   gwmap_epoch(0),
   last_map_time(std::chrono::steady_clock::now()),
+  reset_timestamp(std::chrono::steady_clock::now()),
   monc{g_ceph_context, poolctx},
   client_messenger(Messenger::create(g_ceph_context, "async", entity_name_t::CLIENT(-1), "client", getpid())),
   objecter{g_ceph_context, client_messenger.get(), &monc, poolctx},
@@ -305,18 +306,32 @@ void NVMeofGwMonitorClient::shutdown()
 
 void NVMeofGwMonitorClient::handle_nvmeof_gw_map(ceph::ref_t<MNVMeofGwMap> nmap)
 {
-  last_map_time = std::chrono::steady_clock::now(); // record time of last monitor message
+  auto now = std::chrono::steady_clock::now();
+  last_map_time = now; // record time of last monitor message
 
   auto &new_map = nmap->get_map();
   gwmap_epoch = nmap->get_gwmap_epoch();
   auto group_key = std::make_pair(pool, group);
   dout(10) << "handle nvmeof gw map: " << new_map << dendl;
-
+  uint64_t reset_elapsed_seconds =
+      std::chrono::duration_cast<std::chrono::seconds>(now - reset_timestamp).count();
   NvmeGwClientState old_gw_state;
+  uint64_t ignore_wrong_map_interval_sec =
+       g_conf().get_val<uint64_t>("mon_nvmeofgw_wrong_map_ignore_sec");
   auto got_old_gw_state = get_gw_state("old map", map, group_key, name, old_gw_state); 
   NvmeGwClientState new_gw_state;
   auto got_new_gw_state = get_gw_state("new map", new_map, group_key, name, new_gw_state); 
 
+  /*It is possible that wrong second map would be sent by monitor in rear cases when several GWs doing reboot
+  * and entity_address of the monitor client changes. So Monitor may send the unicast map to the wrong destination
+  * since this "old" address still appears in its map. It is asynchronous process in the monitor, better to protect
+  * from this scenario by silently ignoring the wrong map. This can happen just in the first several seconds after restart
+  */
+  if ( (reset_elapsed_seconds < ignore_wrong_map_interval_sec) &&
+        !got_new_gw_state && got_old_gw_state) {
+    dout(4) << "Wrong map received, Ignore it" << dendl;
+    return;
+  }
   // ensure that the gateway state has not vanished
   ceph_assert(got_new_gw_state || !got_old_gw_state);
 
index dc5fcbce2b2ce45c969545b1f39c8814d906816f..546fff27db7eb21c1593c5f763bc2eb957963e0a 100644 (file)
@@ -45,6 +45,9 @@ private:
   epoch_t     gwmap_epoch;  // last received gw map epoch
   std::chrono::time_point<std::chrono::steady_clock>
               last_map_time; // used to panic on disconnect
+  std::chrono::time_point<std::chrono::steady_clock>
+                reset_timestamp; // used to bypass some validations
+
   bool first_beacon = true;
   // init gw ssl opts
   void init_gw_ssl_opts();