]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
nvmeofgw: prevent map corruption while processing beacons from deleted gws reef-nvmeof
authorLeonid Chernin <leonidc@il.ibm.com>
Mon, 8 Dec 2025 20:54:44 +0000 (22:54 +0200)
committerLeonid Chernin <leonidc@il.ibm.com>
Tue, 23 Dec 2025 11:01:42 +0000 (13:01 +0200)
Fix race issue of map corruption when deleted gw sends beacons
but this gw data was removed from pending map and still exists in map.
Process beacons only if GW's data exists in both maps:
main-map and pending-map, otherwise just ignore beacons.

fixes: https://tracker.ceph.com/issues/74160

Signed-off-by: Leonid Chernin <leonidc@il.ibm.com>
src/mon/NVMeofGwMon.cc

index 9cb77a24183d1b060297fd0a2559caa30c6f0470..fa9c008f0264b4b4f69a8ee8d590f19af77765e3 100644 (file)
@@ -423,10 +423,14 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){
     NVMeofGwMap ack_map;
     auto& group_gws = map.Created_gws[group_key];
     auto gw = group_gws.find(gw_id);
+    auto& pend_gws =  pending_map.Created_gws[group_key];
+    auto pend_gw = pend_gws.find(gw_id);
+
+    bool gw_exists = (gw != group_gws.end() && (pend_gw != pend_gws.end()));
     const BeaconSubsystems& sub = m->get_subsystems();
 
     if (avail == GW_AVAILABILITY_E::GW_CREATED){
-        if (gw == group_gws.end()) {
+        if (!gw_exists) {
            gw_created = false;
            dout(10) << "Warning: GW " << gw_id << " group_key " << group_key << " was not found in the  map.Created_gws "<< map.Created_gws <<dendl;
            goto set_propose;
@@ -446,7 +450,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){
         }
     }
     else { // gw already created
-        if (gw != group_gws.end()) // if GW reports Available but in monitor's database it is Unavailable
+        if (gw_exists) // if GW reports Available but in monitor's database it is Unavailable
                                    // it means it did not perform "exit" after failover was set by NVMeofGWMon
            if( pending_map.Created_gws[group_key][gw_id].availability == GW_AVAILABILITY_E::GW_UNAVAILABLE  &&
                pending_map.Created_gws[group_key][gw_id].performed_full_startup == false &&
@@ -461,7 +465,7 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op){
     }
 
     // At this stage the gw has to be in the Created_gws
-    if(gw == group_gws.end()){
+    if(!gw_exists){
         dout(1) << "Error : Administratively deleted GW sends beacon " << gw_id <<dendl;
         goto false_return; // not sending ack to this beacon
     }