From: Vallari Agrawal Date: Wed, 25 Dec 2024 05:01:21 +0000 (+0530) Subject: mon/NVMeofGwMap: add delay to NVMEOF_GATEWAY_DELETING warning X-Git-Tag: v20.0.0~439^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=56cf5122d9755c5de198bd46f1670a8f20a0e3e2;p=ceph.git mon/NVMeofGwMap: add delay to NVMEOF_GATEWAY_DELETING warning Instead of immediately triggering, have this healthcheck trigger after some time has elasped. This delay can be configured by mon_nvmeofgw_delete_grace. Track the time when gateways go into DELETING state in a new member var (of NVMeofGwMon) 'gws_deleting_time'. Signed-off-by: Vallari Agrawal --- diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index fb5e5a4a9107..2d2735f1e7c2 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -171,6 +171,8 @@ int NVMeofGwMap::cfg_delete_gw( << state.availability << " Resulting GW availability: " << state.availability << dendl; state.subsystems.clear();//ignore subsystems of this GW + utime_t now = ceph_clock_now(); + mon->nvmegwmon()->gws_deleting_time[group_key][gw_id] = now; return 0; } } @@ -895,11 +897,12 @@ struct CMonRequestProposal : public Context { } }; -void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const +void NVMeofGwMap::get_health_checks(health_check_map_t *checks) { list singleGatewayDetail; list gatewayDownDetail; list gatewayInDeletingDetail; + int deleting_gateways = 0; for (const auto& created_map_pair: created_gws) { const auto& group_key = created_map_pair.first; auto& group = group_key.second; @@ -917,12 +920,36 @@ void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const ss << "NVMeoF Gateway '" << gw_id << "' is unavailable." ; gatewayDownDetail.push_back(ss.str()); } else if (gw_created.availability == gw_availability_t::GW_DELETING) { - ostringstream ss; - ss << "NVMeoF Gateway '" << gw_id << "' is in deleting state." ; - gatewayInDeletingDetail.push_back(ss.str()); + deleting_gateways++; + utime_t now = ceph_clock_now(); + bool found_deleting_time = false; + auto gws_deleting_time = mon->nvmegwmon()->gws_deleting_time; + auto group_it = gws_deleting_time.find(group_key); + if (group_it != gws_deleting_time.end()) { + auto& gw_map = group_it->second; + auto gw_it = gw_map.find(gw_id); + if (gw_it != gw_map.end()) { + found_deleting_time = true; + utime_t delete_time = gw_it->second; + if ((now - delete_time) > g_conf().get_val("mon_nvmeofgw_delete_grace").count()) { + ostringstream ss; + ss << "NVMeoF Gateway '" << gw_id << "' is in deleting state."; + gatewayInDeletingDetail.push_back(ss.str()); + } + } + } + if (!found_deleting_time) { + // DELETING gateway not found in gws_deleting_time, set timeout now + mon->nvmegwmon()->gws_deleting_time[group_key][gw_id] = now; + } } } } + if (deleting_gateways == 0) { + // no gateway in GW_DELETING state currently, flush old gws_deleting_time + mon->nvmegwmon()->gws_deleting_time.clear(); + } + if (!singleGatewayDetail.empty()) { ostringstream ss; ss << singleGatewayDetail.size() << " group(s) have only 1 nvmeof gateway" diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 5f6577330123..85fd62b3a07d 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -144,7 +144,7 @@ public: DECODE_FINISH(bl); } - void get_health_checks(health_check_map_t *checks) const; + void get_health_checks(health_check_map_t *checks); }; #include "NVMeofGwSerialize.h" diff --git a/src/mon/NVMeofGwMon.h b/src/mon/NVMeofGwMon.h index 7fae8b766a5e..d7f5fd89cde6 100644 --- a/src/mon/NVMeofGwMon.h +++ b/src/mon/NVMeofGwMon.h @@ -82,6 +82,8 @@ public: void check_subs(bool type); void check_sub(Subscription *sub); + std::map> gws_deleting_time; + private: void synchronize_last_beacon(); void process_gw_down(const NvmeGwId &gw_id,