]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
monitoring: add alert NVMeoFMaxGatewayGroups
authorVallari Agrawal <vallari.agrawal@ibm.com>
Mon, 3 Feb 2025 18:24:50 +0000 (23:54 +0530)
committerVallari Agrawal <vallari.agrawal@ibm.com>
Mon, 3 Feb 2025 18:24:50 +0000 (23:54 +0530)
Add alert NVMeoFMaxGatewayGroups to prometheus_alerts.yml
and prometheus_alerts.libsonnet.

This alerts is to indicate if max number of NVMeoF gateway
groups have been reached in a cluster.

Signed-off-by: Vallari Agrawal <vallari.agrawal@ibm.com>
monitoring/ceph-mixin/prometheus_alerts.libsonnet
monitoring/ceph-mixin/prometheus_alerts.yml

index 5d1ab49b53340e4d7eef16dddc300e174c48b7ca..f3e06a76f1f7fd1eed671d0bbb9f9670a24d08ac 100644 (file)
             description: 'You may create many gateways in a gateway group, but %(NVMeoFMaxGatewaysPerGroup)d is the tested limit' % $._config,
           },
         },
+        {
+          alert: 'NVMeoFMaxGatewayGroups',
+          'for': '1m',
+          expr: 'count(count by (group, cluster) (ceph_nvmeof_gateway_info)) by (cluster) > %.2f' % [$._config.NVMeoFMaxGatewayGroups],
+          labels: { severity: 'warning', type: 'ceph_default' },
+          annotations: {
+            summary: 'Max gateway groups exceeded%(cluster)s' % $.MultiClusterSummary(),
+            description: 'You may create many gateway groups, but %(NVMeoFMaxGatewayGroups)d is the tested limit' % $._config,
+          },
+        },
         {
           alert: 'NVMeoFSingleGatewayGroup',
           'for': '5m',
index 7c0da4d51a4cf0eff8a864475f1ce283d6d70000..f8bcd4ca0e12eaeb829cd8a2d3955a92556a8826 100644 (file)
@@ -792,6 +792,15 @@ groups:
         labels:
           severity: "warning"
           type: "ceph_default"
+      - alert: "NVMeoFMaxGatewayGroups"
+        annotations:
+          description: "You may create many gateway groups, but 4 is the tested limit"
+          summary: "Max gateway groups exceeded on cluster {{ $labels.cluster }}"
+        expr: "count(count by (group, cluster) (ceph_nvmeof_gateway_info)) by (cluster) > 4.00"
+        for: "1m"
+        labels:
+          severity: "warning"
+          type: "ceph_default"
       - alert: "NVMeoFSingleGatewayGroup"
         annotations:
           description: "Although a single member gateway group is valid, it should only be used for test purposes"