From: Vallari Agrawal Date: Mon, 30 Sep 2024 19:24:36 +0000 (+0530) Subject: mon: add nvmeof healthchecks X-Git-Tag: v20.0.0~665^2~2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=1cad0401cfc74482db4118199d9bca5e3c7b254d;p=ceph.git mon: add nvmeof healthchecks Add NVMeofGwMap::get_health_checks which raises NVMEOF_SINGLE_GATEWAY if any of the groups have 1 gateway. In NVMeofGwMon, call `encode_health` and `load_health` to register healthchecks. This will add nvmeof healthchecks to "ceph health" output. Signed-off-by: Vallari Agrawal --- diff --git a/doc/rados/operations/health-checks.rst b/doc/rados/operations/health-checks.rst index 81dafdf03e987..bbce958bebe70 100644 --- a/doc/rados/operations/health-checks.rst +++ b/doc/rados/operations/health-checks.rst @@ -1633,6 +1633,18 @@ We encourage you to fix this by making the weights even on both dividing buckets This can be done by making sure the combined weight of the OSDs on each dividing bucket are the same. +NVMeoF Gateway +-------------- + +NVMOEF_SINGLE_GATEWAY +__________________________________ + +One of the gateway group has only one gateway. This is not ideal because it makes +high availability (HA) impossible with a single gatway in a group. This can lead to +problems with failover and failback operations for the NVMeoF gateway. + +It's recommended to have multiple NVMeoF gateways in a group. + Miscellaneous ------------- diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index c01ea9e710321..c350622b7db63 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -16,7 +16,9 @@ #include "NVMeofGwMon.h" #include "NVMeofGwMap.h" #include "OSDMonitor.h" +#include "mon/health_check.h" +using std::list; using std::map; using std::make_pair; using std::ostream; @@ -879,6 +881,29 @@ struct CMonRequestProposal : public Context { } }; +void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const +{ + list detail; + for (const auto& created_map_pair: created_gws) { + const auto& group_key = created_map_pair.first; + auto& group = group_key.second; + const NvmeGwMonStates& gw_created_map = created_map_pair.second; + if ( gw_created_map.size() == 1) { + ostringstream ss; + ss << "NVMeoF Gateway Group '" << group << "' has 1 gateway." ; + detail.push_back(ss.str()); + } + } + if (!detail.empty()) { + ostringstream ss; + ss << detail.size() << " group(s) have only 1 nvmeof gateway" + << "; HA is not possible with single gateway."; + auto& d = checks->add("NVMEOF_SINGLE_GATEWAY", HEALTH_WARN, + ss.str(), detail.size()); + d.detail.swap(detail); + } +} + int NVMeofGwMap::blocklist_gw( const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId grpid, epoch_t &epoch, bool failover) diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 267d85b10f918..5f6577330123a 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -27,6 +27,9 @@ #include "NVMeofGwTypes.h" using ceph::coarse_mono_clock; + +class health_check_map_t; + class Monitor; /*-------------------*/ class NVMeofGwMap @@ -140,6 +143,8 @@ public: decode(fsm_timers, bl); DECODE_FINISH(bl); } + + void get_health_checks(health_check_map_t *checks) const; }; #include "NVMeofGwSerialize.h" diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index d9e936e27df34..25c143819645c 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -176,6 +176,11 @@ void NVMeofGwMon::encode_pending(MonitorDBStore::TransactionRef t) << HAVE_FEATURE(mon.get_quorum_con_features(), NVMEOFHA) << dendl; put_version(t, pending_map.epoch, bl); put_last_committed(t, pending_map.epoch); + + //health + health_check_map_t checks; + pending_map.get_health_checks(&checks); + encode_health(checks, t); } void NVMeofGwMon::update_from_paxos(bool *need_bootstrap) @@ -188,6 +193,7 @@ void NVMeofGwMon::update_from_paxos(bool *need_bootstrap) bufferlist bl; int err = get_version(version, bl); ceph_assert(err == 0); + load_health(); auto p = bl.cbegin(); map.decode(p);