]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
nvmeofgw: prevent map corruption while processing beacons from deleted gws wip-vallari-test-7Jan-centos9-only
authorLeonid Chernin <leonidc@il.ibm.com>
Mon, 8 Dec 2025 20:54:44 +0000 (22:54 +0200)
committerVallari Agrawal <vallari.agrawal@ibm.com>
Wed, 7 Jan 2026 13:45:35 +0000 (19:15 +0530)
Fix race issue of map corruption when deleted gw sends beacons
but this gw data was removed from pending map and still exists in map.
Process beacons only if GW's data exists in both maps:
main-map and pending-map, otherwise just ignore beacons.

fixes: https://tracker.ceph.com/issues/74160

Signed-off-by: Leonid Chernin <leonidc@il.ibm.com>
src/mon/NVMeofGwMon.cc

index 9809b9eff610d8caaae79f1e7d4ab2d8b4a0e8dc..b4c5113ff376d6264207cf8ac552fea6d1dbcff0 100644 (file)
@@ -902,6 +902,10 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
                               NVMEOFHAMAP);
   auto& group_gws = map.created_gws[group_key];
   auto gw = group_gws.find(gw_id);
+  auto& pend_gws =  pending_map.created_gws[group_key];
+  auto pend_gw = pend_gws.find(gw_id);
+
+  bool gw_exists = (gw != group_gws.end() && (pend_gw != pend_gws.end()));
   const BeaconSubsystems& sub = m->get_subsystems();
   auto now = ceph::coarse_mono_clock::now();
   int beacons_till_ack =
@@ -910,7 +914,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
   bool send_ack =  false;
 
   if (avail == gw_availability_t::GW_CREATED) {
-    if (gw == group_gws.end()) {
+    if (!gw_exists) {
       gw_created = false;
       dout(10) << "Warning: GW " << gw_id << " group_key " << group_key
               << " was not found in the  map.created_gws "
@@ -947,7 +951,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
   // gw already created
   } else { // first GW beacon should come with avail = Created
     // if GW reports Avail/Unavail but in monitor's database it is Unavailable
-    if (gw != group_gws.end()) {
+    if (gw_exists) {
       correct_sequence = pending_map.put_gw_beacon_sequence_number
            (gw_id, version, group_key, sequence, stored_sequence);
       // it means it did not perform "exit" after failover was set by
@@ -982,7 +986,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
     }
   }
   // Beacon from GW in !Created state but it does not appear in the map
-  if (gw == group_gws.end()) {
+  if (!gw_exists) {
     dout(4) << "GW that does not appear in the map sends beacon, ignore "
        << gw_id << dendl;
     mon.no_reply(op);