From 1cad0401cfc74482db4118199d9bca5e3c7b254d Mon Sep 17 00:00:00 2001 From: Vallari Agrawal Date: Tue, 1 Oct 2024 00:54:36 +0530 Subject: [PATCH] mon: add nvmeof healthchecks Add NVMeofGwMap::get_health_checks which raises NVMEOF_SINGLE_GATEWAY if any of the groups have 1 gateway. In NVMeofGwMon, call `encode_health` and `load_health` to register healthchecks. This will add nvmeof healthchecks to "ceph health" output. Signed-off-by: Vallari Agrawal --- doc/rados/operations/health-checks.rst | 12 ++++++++++++ src/mon/NVMeofGwMap.cc | 25 +++++++++++++++++++++++++ src/mon/NVMeofGwMap.h | 5 +++++ src/mon/NVMeofGwMon.cc | 6 ++++++ 4 files changed, 48 insertions(+) diff --git a/doc/rados/operations/health-checks.rst b/doc/rados/operations/health-checks.rst index 81dafdf03e987..bbce958bebe70 100644 --- a/doc/rados/operations/health-checks.rst +++ b/doc/rados/operations/health-checks.rst @@ -1633,6 +1633,18 @@ We encourage you to fix this by making the weights even on both dividing buckets This can be done by making sure the combined weight of the OSDs on each dividing bucket are the same. +NVMeoF Gateway +-------------- + +NVMOEF_SINGLE_GATEWAY +__________________________________ + +One of the gateway group has only one gateway. This is not ideal because it makes +high availability (HA) impossible with a single gatway in a group. This can lead to +problems with failover and failback operations for the NVMeoF gateway. + +It's recommended to have multiple NVMeoF gateways in a group. + Miscellaneous ------------- diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index c01ea9e710321..c350622b7db63 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -16,7 +16,9 @@ #include "NVMeofGwMon.h" #include "NVMeofGwMap.h" #include "OSDMonitor.h" +#include "mon/health_check.h" +using std::list; using std::map; using std::make_pair; using std::ostream; @@ -879,6 +881,29 @@ struct CMonRequestProposal : public Context { } }; +void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const +{ + list detail; + for (const auto& created_map_pair: created_gws) { + const auto& group_key = created_map_pair.first; + auto& group = group_key.second; + const NvmeGwMonStates& gw_created_map = created_map_pair.second; + if ( gw_created_map.size() == 1) { + ostringstream ss; + ss << "NVMeoF Gateway Group '" << group << "' has 1 gateway." ; + detail.push_back(ss.str()); + } + } + if (!detail.empty()) { + ostringstream ss; + ss << detail.size() << " group(s) have only 1 nvmeof gateway" + << "; HA is not possible with single gateway."; + auto& d = checks->add("NVMEOF_SINGLE_GATEWAY", HEALTH_WARN, + ss.str(), detail.size()); + d.detail.swap(detail); + } +} + int NVMeofGwMap::blocklist_gw( const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId grpid, epoch_t &epoch, bool failover) diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 267d85b10f918..5f6577330123a 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -27,6 +27,9 @@ #include "NVMeofGwTypes.h" using ceph::coarse_mono_clock; + +class health_check_map_t; + class Monitor; /*-------------------*/ class NVMeofGwMap @@ -140,6 +143,8 @@ public: decode(fsm_timers, bl); DECODE_FINISH(bl); } + + void get_health_checks(health_check_map_t *checks) const; }; #include "NVMeofGwSerialize.h" diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index d9e936e27df34..25c143819645c 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -176,6 +176,11 @@ void NVMeofGwMon::encode_pending(MonitorDBStore::TransactionRef t) << HAVE_FEATURE(mon.get_quorum_con_features(), NVMEOFHA) << dendl; put_version(t, pending_map.epoch, bl); put_last_committed(t, pending_map.epoch); + + //health + health_check_map_t checks; + pending_map.get_health_checks(&checks); + encode_health(checks, t); } void NVMeofGwMon::update_from_paxos(bool *need_bootstrap) @@ -188,6 +193,7 @@ void NVMeofGwMon::update_from_paxos(bool *need_bootstrap) bufferlist bl; int err = get_version(version, bl); ceph_assert(err == 0); + load_health(); auto p = bl.cbegin(); map.decode(p); -- 2.39.5