]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
monitoring: add 2 nvmeof alerts to prometheus_alerts.yaml
authorVallari Agrawal <val.agl002@gmail.com>
Tue, 8 Oct 2024 21:07:48 +0000 (02:37 +0530)
committerVallari Agrawal <vallari.agrawal@ibm.com>
Mon, 11 Nov 2024 11:53:03 +0000 (17:23 +0530)
- `NVMeoFMissingListener`: trigger if all listeners
     are not created for each gateway in a subsystem
- `NVMeoFZeroListenerSubsystem`: trigger if a subsystem has no listeners

Signed-off-by: Vallari Agrawal <vallari.agrawal@ibm.com>
monitoring/ceph-mixin/prometheus_alerts.yml

index ba6a6ded0a36bb392819c60259e9d0ff4bfcc5c7..805ecb1188aff29584b51449cd243cd29bbd13a2 100644 (file)
@@ -837,6 +837,24 @@ groups:
         labels:
           severity: "warning"
           type: "ceph_default"
+      - alert: "NVMeoFMissingListener"
+        annotations:
+          description: "For every subsystem, each gateway should have a listener to balance traffic between gateways."
+          summary: "No listener added for {{ $labels.instance }} NVMe-oF Gateway to {{ $labels.nqn }} subsystem"
+        expr: "ceph_nvmeof_subsystem_listener_count == 0 and on(nqn) sum(ceph_nvmeof_subsystem_listener_count) by (nqn) > 0"
+        for: "10m"
+        labels:
+          severity: "warning"
+          type: "ceph_default"
+      - alert: "NVMeoFZeroListenerSubsystem"
+        annotations:
+          description: "NVMeoF gateway configuration incomplete; one of the subsystems have zero listeners."
+          summary: "No listeners added to {{ $labels.nqn }} subsystem"
+        expr: "sum(ceph_nvmeof_subsystem_listener_count) by (nqn) == 0"
+        for: "10m"
+        labels:
+          severity: "warning"
+          type: "ceph_default"
       - alert: "NVMeoFHighHostCPU"
         annotations:
           description: "High CPU on a gateway host can lead to CPU contention and performance degradation"