]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
nvmeofgw: prevent map corruption while processing beacons from deleted gws
authorLeonid Chernin <leonidc@il.ibm.com>
Mon, 8 Dec 2025 20:54:44 +0000 (22:54 +0200)
committerLeonid Chernin <leonidc@il.ibm.com>
Wed, 29 Apr 2026 13:33:51 +0000 (16:33 +0300)
Fix race issue of map corruption when deleted gw sends beacons
but this gw data was removed from pending map and still exists in map.
Process beacons only if GW's data exists in both maps:
main-map and pending-map, otherwise just ignore beacons.

fixes: https://tracker.ceph.com/issues/74160

Signed-off-by: Leonid Chernin <leonidc@il.ibm.com>
(cherry picked from commit 3636dc3598b7022bff623d686cbee83ed288d966)

src/mon/NVMeofGwMon.cc

index 2b5bddc11fdf116b401cfa47d7c95ad7b5e52634..064b616458735aa88560bbe2764399e7bb6c3465 100644 (file)
@@ -908,6 +908,10 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
                               NVMEOFHAMAP);
   auto& group_gws = map.created_gws[group_key];
   auto gw = group_gws.find(gw_id);
+  auto& pend_gws =  pending_map.created_gws[group_key];
+  auto pend_gw = pend_gws.find(gw_id);
+
+  bool gw_exists = (gw != group_gws.end() && (pend_gw != pend_gws.end()));
   const BeaconSubsystems& sub = m->get_subsystems();
   auto now = ceph::coarse_mono_clock::now();
   int beacons_till_ack =
@@ -917,7 +921,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
 
   check_beacon_timeout(now, gw_propose);
   if (avail == gw_availability_t::GW_CREATED) {
-    if (gw == group_gws.end()) {
+    if (!gw_exists) {
       gw_created = false;
       dout(10) << "Warning: GW " << gw_id << " group_key " << group_key
               << " was not found in the  map.created_gws "
@@ -954,7 +958,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
   // gw already created
   } else { // first GW beacon should come with avail = Created
     // if GW reports Avail/Unavail but in monitor's database it is Unavailable
-    if (gw != group_gws.end()) {
+    if (gw_exists) {
       correct_sequence = pending_map.put_gw_beacon_sequence_number
            (gw_id, version, group_key, sequence, stored_sequence);
       // it means it did not perform "exit" after failover was set by
@@ -989,7 +993,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
     }
   }
   // Beacon from GW in !Created state but it does not appear in the map
-  if (gw == group_gws.end()) {
+  if (!gw_exists) {
     dout(4) << "GW that does not appear in the map sends beacon, ignore "
        << gw_id << dendl;
     mon.no_reply(op);