]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
monitoring: add tests for 2 new nvmeof alerts 60404/head
authorVallari Agrawal <vallari.agrawal@ibm.com>
Wed, 30 Oct 2024 14:04:01 +0000 (19:34 +0530)
committerVallari Agrawal <vallari.agrawal@ibm.com>
Mon, 11 Nov 2024 17:43:44 +0000 (23:13 +0530)
Add test for alerts NVMeoFMissingListener and
NVMeoFZeroListenerSubsystem to test_alerts.yml.

Signed-off-by: Vallari Agrawal <vallari.agrawal@ibm.com>
monitoring/ceph-mixin/tests_alerts/test_alerts.yml

index a269ff742271b35159cd639aa98b46f704cff146..6bcaa53b851131ac4835a1501dcece600c10cdb3 100644 (file)
@@ -2522,6 +2522,75 @@ tests:
         exp_annotations:
           summary: "The number of clients connected to nqn1 is too high on cluster mycluster"
           description: "The supported limit for clients connecting to a subsystem is 32"
+ # NVMeoFMissingListener
+ - interval: 1m
+   input_series:
+    - series: 'ceph_nvmeof_subsystem_listener_count{nqn="nqn1", instance="node-1:9100"}'
+      values: '0 0 0 0 0 0 0 0 0 0 0'
+    - series: 'ceph_nvmeof_subsystem_listener_count{nqn="nqn1", instance="node-2:9100"}'
+      values: '1 1 1 1 1 1 1 1 1 1 1'
+    - series: 'ceph_nvmeof_subsystem_listener_count{nqn="nqn1", instance="node-3:9100"}'
+      values: '1 1 1 1 1 1 1 1 1 1 1'
+    - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.1", instance="node-1:9100"}'
+      values: '1+0x20'
+    - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.2", instance="node-2:9100"}'
+      values: '1+0x20'
+    - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.3", instance="node-3:9100"}'
+      values: '1+0x20'      
+    - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.4", instance="node-4:9100"}'
+      values: '1+0x20'
+   promql_expr_test:
+     - expr: ceph_nvmeof_subsystem_listener_count == 0 and on(nqn) sum(ceph_nvmeof_subsystem_listener_count) by (nqn) > 0
+       eval_time: 1m
+       exp_samples:
+         - labels: '{__name__="ceph_nvmeof_subsystem_listener_count", instance="node-1:9100", nqn="nqn1"}'
+           value: 0
+   alert_rule_test:
+    - eval_time: 10m
+      alertname: NVMeoFMissingListener
+      exp_alerts:
+      - exp_labels:
+          instance: node-1:9100
+          nqn: nqn1
+          severity: warning
+          type: ceph_default
+        exp_annotations:
+          summary: "No listener added for node-1:9100 NVMe-oF Gateway to nqn1 subsystem"
+          description: "For every subsystem, each gateway should have a listener to balance traffic between gateways." 
+
+ # NVMeoFZeroListenerSubsystem
+ - interval: 1m
+   input_series:
+    - series: 'ceph_nvmeof_subsystem_listener_count{nqn="nqn1"}'
+      values: '0 0 0 0 0 0 0 0'
+    - series: 'ceph_nvmeof_subsystem_listener_count{nqn="nqn2"}'
+      values: '0 1 1 1 2 2 3 4'
+    - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.1"}'
+      values: '1+0x20'
+    - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.2"}'
+      values: '1+0x20'
+    - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.3"}'
+      values: '1+0x20'      
+    - series: 'ceph_nvmeof_gateway_info{addr="1.1.1.4"}'
+      values: '1+0x20'
+   promql_expr_test:
+     - expr: ceph_nvmeof_subsystem_listener_count == 0
+       eval_time: 1m
+       exp_samples:
+         - labels: '{__name__="ceph_nvmeof_subsystem_listener_count",nqn="nqn1"}'
+           value: 0
+   alert_rule_test:
+    - eval_time: 10m
+      alertname: NVMeoFZeroListenerSubsystem
+      exp_alerts:
+      - exp_labels:
+          nqn: nqn1
+          severity: warning
+          type: ceph_default
+        exp_annotations:
+          summary: "No listeners added to nqn1 subsystem"
+          description: "NVMeoF gateway configuration incomplete; one of the subsystems have zero listeners."
 
  # NVMeoFHighHostCPU
  - interval: 1m