]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mon/NVMeofGwMap: add delay to NVMEOF_GATEWAY_DELETING warning
authorVallari Agrawal <vallari.agrawal@ibm.com>
Wed, 25 Dec 2024 05:01:21 +0000 (10:31 +0530)
committerVallari Agrawal <vallari.agrawal@ibm.com>
Tue, 7 Jan 2025 06:54:12 +0000 (12:24 +0530)
Instead of immediately triggering, have this healthcheck trigger
after some time has elasped. This delay can be configured by
mon_nvmeofgw_delete_grace.

Track the time when gateways go into DELETING state in a new
member var (of NVMeofGwMon) 'gws_deleting_time'.

Signed-off-by: Vallari Agrawal <vallari.agrawal@ibm.com>
src/mon/NVMeofGwMap.cc
src/mon/NVMeofGwMap.h
src/mon/NVMeofGwMon.h

index fb5e5a4a91079c886c45461d39c00ed5eda7583f..2d2735f1e7c2e9893459314e4def8083ba7c202f 100755 (executable)
@@ -171,6 +171,8 @@ int NVMeofGwMap::cfg_delete_gw(
             << state.availability <<  " Resulting GW availability: "
             << state.availability  << dendl;
         state.subsystems.clear();//ignore subsystems of this GW
+        utime_t now = ceph_clock_now();
+        mon->nvmegwmon()->gws_deleting_time[group_key][gw_id] = now;
         return 0;
       }
     }
@@ -895,11 +897,12 @@ struct CMonRequestProposal : public Context {
   }
 };
 
-void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const 
+void NVMeofGwMap::get_health_checks(health_check_map_t *checks) 
 {
   list<string> singleGatewayDetail;
   list<string> gatewayDownDetail;
   list<string> gatewayInDeletingDetail;
+  int deleting_gateways = 0;
   for (const auto& created_map_pair: created_gws) {
     const auto& group_key = created_map_pair.first;
     auto& group = group_key.second;
@@ -917,12 +920,36 @@ void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const
         ss << "NVMeoF Gateway '" << gw_id << "' is unavailable." ;
         gatewayDownDetail.push_back(ss.str());
       } else if (gw_created.availability == gw_availability_t::GW_DELETING) {
-        ostringstream ss;
-        ss << "NVMeoF Gateway '" << gw_id << "' is in deleting state." ;
-        gatewayInDeletingDetail.push_back(ss.str());
+        deleting_gateways++;
+        utime_t now = ceph_clock_now();
+        bool found_deleting_time = false;
+        auto gws_deleting_time = mon->nvmegwmon()->gws_deleting_time;
+        auto group_it = gws_deleting_time.find(group_key);
+        if (group_it != gws_deleting_time.end()) {
+          auto& gw_map = group_it->second;
+          auto gw_it = gw_map.find(gw_id);
+          if (gw_it != gw_map.end()) {
+            found_deleting_time = true;
+            utime_t delete_time = gw_it->second;
+            if ((now - delete_time) > g_conf().get_val<std::chrono::seconds>("mon_nvmeofgw_delete_grace").count()) {
+              ostringstream ss;
+              ss << "NVMeoF Gateway '" << gw_id << "' is in deleting state.";
+              gatewayInDeletingDetail.push_back(ss.str());
+            }
+          }
+        }
+        if (!found_deleting_time) {
+          // DELETING gateway not found in gws_deleting_time, set timeout now 
+          mon->nvmegwmon()->gws_deleting_time[group_key][gw_id] = now; 
+        }
       }
     }
   }
+  if (deleting_gateways == 0) {
+    // no gateway in GW_DELETING state currently, flush old gws_deleting_time
+    mon->nvmegwmon()->gws_deleting_time.clear();
+  }
+
   if (!singleGatewayDetail.empty()) {
     ostringstream ss;
     ss << singleGatewayDetail.size() << " group(s) have only 1 nvmeof gateway"
index 5f6577330123ad56d96d564b9ff21dead0000b5d..85fd62b3a07d8e09a98e6d620e8a1f2709affd18 100755 (executable)
@@ -144,7 +144,7 @@ public:
     DECODE_FINISH(bl);
   }
 
-  void get_health_checks(health_check_map_t *checks) const;
+  void get_health_checks(health_check_map_t *checks);
 };
 
 #include "NVMeofGwSerialize.h"
index 7fae8b766a5e7fe46d0bd6f4b73eb2f80bf0ff59..d7f5fd89cde688000912e858a8d6a2aea01e4cd1 100644 (file)
@@ -82,6 +82,8 @@ public:
   void check_subs(bool type);
   void check_sub(Subscription *sub);
 
+  std::map<NvmeGroupKey, std::map<NvmeGwId, utime_t>> gws_deleting_time;
+
 private:
   void synchronize_last_beacon();
   void process_gw_down(const NvmeGwId &gw_id,