From: Vallari Agrawal Date: Mon, 3 Feb 2025 18:24:50 +0000 (+0530) Subject: monitoring: add alert NVMeoFMaxGatewayGroups X-Git-Tag: v20.0.0~262^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=ab4a1ddcd89b9d647d1ec167469884f488842130;p=ceph.git monitoring: add alert NVMeoFMaxGatewayGroups Add alert NVMeoFMaxGatewayGroups to prometheus_alerts.yml and prometheus_alerts.libsonnet. This alerts is to indicate if max number of NVMeoF gateway groups have been reached in a cluster. Signed-off-by: Vallari Agrawal --- diff --git a/monitoring/ceph-mixin/prometheus_alerts.libsonnet b/monitoring/ceph-mixin/prometheus_alerts.libsonnet index 5d1ab49b53340..f3e06a76f1f7f 100644 --- a/monitoring/ceph-mixin/prometheus_alerts.libsonnet +++ b/monitoring/ceph-mixin/prometheus_alerts.libsonnet @@ -885,6 +885,16 @@ description: 'You may create many gateways in a gateway group, but %(NVMeoFMaxGatewaysPerGroup)d is the tested limit' % $._config, }, }, + { + alert: 'NVMeoFMaxGatewayGroups', + 'for': '1m', + expr: 'count(count by (group, cluster) (ceph_nvmeof_gateway_info)) by (cluster) > %.2f' % [$._config.NVMeoFMaxGatewayGroups], + labels: { severity: 'warning', type: 'ceph_default' }, + annotations: { + summary: 'Max gateway groups exceeded%(cluster)s' % $.MultiClusterSummary(), + description: 'You may create many gateway groups, but %(NVMeoFMaxGatewayGroups)d is the tested limit' % $._config, + }, + }, { alert: 'NVMeoFSingleGatewayGroup', 'for': '5m', diff --git a/monitoring/ceph-mixin/prometheus_alerts.yml b/monitoring/ceph-mixin/prometheus_alerts.yml index 7c0da4d51a4cf..f8bcd4ca0e12e 100644 --- a/monitoring/ceph-mixin/prometheus_alerts.yml +++ b/monitoring/ceph-mixin/prometheus_alerts.yml @@ -792,6 +792,15 @@ groups: labels: severity: "warning" type: "ceph_default" + - alert: "NVMeoFMaxGatewayGroups" + annotations: + description: "You may create many gateway groups, but 4 is the tested limit" + summary: "Max gateway groups exceeded on cluster {{ $labels.cluster }}" + expr: "count(count by (group, cluster) (ceph_nvmeof_gateway_info)) by (cluster) > 4.00" + for: "1m" + labels: + severity: "warning" + type: "ceph_default" - alert: "NVMeoFSingleGatewayGroup" annotations: description: "Although a single member gateway group is valid, it should only be used for test purposes"