]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: add nvmeof healthchecks
authorVallari Agrawal <val.agl002@gmail.com>
Mon, 30 Sep 2024 19:24:36 +0000 (00:54 +0530)
committerVallari Agrawal <val.agl002@gmail.com>
Tue, 15 Oct 2024 12:41:48 +0000 (18:11 +0530)
Add NVMeofGwMap::get_health_checks which raises
NVMEOF_SINGLE_GATEWAY if any of the groups have
1 gateway.

In NVMeofGwMon, call `encode_health` and `load_health`
to register healthchecks. This will add nvmeof healthchecks
to "ceph health" output.

Signed-off-by: Vallari Agrawal <val.agl002@gmail.com>
doc/rados/operations/health-checks.rst
src/mon/NVMeofGwMap.cc
src/mon/NVMeofGwMap.h
src/mon/NVMeofGwMon.cc

index 81dafdf03e9876aa235159c62612a9b69b1af8bf..bbce958bebe70888885dd1b5d48d4ca4867f084c 100644 (file)
@@ -1633,6 +1633,18 @@ We encourage you to fix this by making the weights even on both dividing buckets
 This can be done by making sure the combined weight of the OSDs on each dividing
 bucket are the same.
 
+NVMeoF Gateway
+--------------
+
+NVMOEF_SINGLE_GATEWAY
+__________________________________
+
+One of the gateway group has only one gateway. This is not ideal because it makes
+high availability (HA) impossible with a single gatway in a group. This can lead to 
+problems with failover and failback operations for the NVMeoF gateway.
+
+It's recommended to have multiple NVMeoF gateways in a group.
+
 Miscellaneous
 -------------
 
index c01ea9e71032163bcfb8b648e324352e0a825cbd..c350622b7db637534d6a245289b5d52ba5a7f599 100755 (executable)
@@ -16,7 +16,9 @@
 #include "NVMeofGwMon.h"
 #include "NVMeofGwMap.h"
 #include "OSDMonitor.h"
+#include "mon/health_check.h"
 
+using std::list;
 using std::map;
 using std::make_pair;
 using std::ostream;
@@ -879,6 +881,29 @@ struct CMonRequestProposal : public Context {
   }
 };
 
+void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const 
+{
+  list<string> detail;
+  for (const auto& created_map_pair: created_gws) {
+    const auto& group_key = created_map_pair.first;
+    auto& group = group_key.second;
+    const NvmeGwMonStates& gw_created_map = created_map_pair.second;
+    if ( gw_created_map.size() == 1) {
+      ostringstream ss;
+      ss << "NVMeoF Gateway Group '" << group << "' has 1 gateway." ;
+      detail.push_back(ss.str());
+    }
+  }
+  if (!detail.empty()) {
+    ostringstream ss;
+    ss << detail.size() << " group(s) have only 1 nvmeof gateway"
+      << "; HA is not possible with single gateway.";
+    auto& d = checks->add("NVMEOF_SINGLE_GATEWAY", HEALTH_WARN,
+        ss.str(), detail.size());
+    d.detail.swap(detail);
+  }
+}
+
 int NVMeofGwMap::blocklist_gw(
   const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
   NvmeAnaGrpId grpid, epoch_t &epoch, bool failover)
index 267d85b10f9184435b3d849a30db17af64ff3fe3..5f6577330123ad56d96d564b9ff21dead0000b5d 100755 (executable)
@@ -27,6 +27,9 @@
 #include "NVMeofGwTypes.h"
 
 using ceph::coarse_mono_clock;
+
+class health_check_map_t;
+
 class Monitor;
 /*-------------------*/
 class NVMeofGwMap
@@ -140,6 +143,8 @@ public:
     decode(fsm_timers, bl);
     DECODE_FINISH(bl);
   }
+
+  void get_health_checks(health_check_map_t *checks) const;
 };
 
 #include "NVMeofGwSerialize.h"
index d9e936e27df34a8f292b7c711062b2fa1b19beb8..25c143819645caeb095fe1b809fba1536fd948a3 100644 (file)
@@ -176,6 +176,11 @@ void NVMeofGwMon::encode_pending(MonitorDBStore::TransactionRef t)
           << HAVE_FEATURE(mon.get_quorum_con_features(), NVMEOFHA) << dendl;
   put_version(t, pending_map.epoch, bl);
   put_last_committed(t, pending_map.epoch);
+
+  //health
+  health_check_map_t checks;
+  pending_map.get_health_checks(&checks);
+  encode_health(checks, t);
 }
 
 void NVMeofGwMon::update_from_paxos(bool *need_bootstrap)
@@ -188,6 +193,7 @@ void NVMeofGwMon::update_from_paxos(bool *need_bootstrap)
     bufferlist bl;
     int err = get_version(version, bl);
     ceph_assert(err == 0);
+    load_health();
 
     auto p = bl.cbegin();
     map.decode(p);