From: Vallari Agrawal Date: Wed, 30 Oct 2024 14:02:41 +0000 (+0530) Subject: monitoring: add 2 new nvmeof alerts X-Git-Tag: testing/wip-vshankar-testing-20241118.055430-debug~11^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7994fea43630b052fabb79c39241927dc7bb6678;p=ceph-ci.git monitoring: add 2 new nvmeof alerts Add NVMeoFMissingListener and NVMeoFZeroListenerSubsystem alerts to prometheus_alerts.libsonnet. Signed-off-by: Vallari Agrawal --- diff --git a/monitoring/ceph-mixin/prometheus_alerts.libsonnet b/monitoring/ceph-mixin/prometheus_alerts.libsonnet index cde1a736f8c..30b6b07d463 100644 --- a/monitoring/ceph-mixin/prometheus_alerts.libsonnet +++ b/monitoring/ceph-mixin/prometheus_alerts.libsonnet @@ -935,6 +935,26 @@ description: 'The supported limit for clients connecting to a subsystem is %(NVMeoFHighClientCount)d' % $._config, }, }, + { + alert: 'NVMeoFMissingListener', + 'for': '10m', + expr: 'ceph_nvmeof_subsystem_listener_count == 0 and on(nqn) sum(ceph_nvmeof_subsystem_listener_count) by (nqn) > 0', + labels: { severity: 'warning', type: 'ceph_default' }, + annotations: { + summary: 'No listener added for {{ $labels.instance }} NVMe-oF Gateway to {{ $labels.nqn }} subsystem', + description: 'For every subsystem, each gateway should have a listener to balance traffic between gateways.', + }, + }, + { + alert: 'NVMeoFZeroListenerSubsystem', + 'for': '10m', + expr: 'sum(ceph_nvmeof_subsystem_listener_count) by (nqn) == 0', + labels: { severity: 'warning', type: 'ceph_default' }, + annotations: { + summary: 'No listeners added to {{ $labels.nqn }} subsystem', + description: 'NVMeoF gateway configuration incomplete; one of the subsystems have zero listeners.', + }, + }, { alert: 'NVMeoFHighHostCPU', 'for': '10m',