]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
monitoring: fix NVMeoFSubsystemNamespaceLimit
authorVallari Agrawal <vallari.agrawal@ibm.com>
Wed, 26 Feb 2025 16:01:54 +0000 (21:31 +0530)
committerVallari Agrawal <vallari.agrawal@ibm.com>
Wed, 26 Feb 2025 16:01:54 +0000 (21:31 +0530)
Alert is not triggered as expected, change the query
to fix that.

BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2282348

Signed-off-by: Vallari Agrawal <vallari.agrawal@ibm.com>
monitoring/ceph-mixin/prometheus_alerts.libsonnet
monitoring/ceph-mixin/prometheus_alerts.yml

index f3e06a76f1f7fd1eed671d0bbb9f9670a24d08ac..2546a676222719787bdb8738cbec558e6ff606c9 100644 (file)
         {
           alert: 'NVMeoFSubsystemNamespaceLimit',
           'for': '1m',
-          expr: '(count by(nqn, cluster) (ceph_nvmeof_subsystem_namespace_metadata)) >= ceph_nvmeof_subsystem_namespace_limit',
+          expr: '(count by(nqn, cluster, instance) (ceph_nvmeof_subsystem_namespace_metadata)) >= on(nqn, instance) group_right(cluster) ceph_nvmeof_subsystem_namespace_limit',
           labels: { severity: 'warning', type: 'ceph_default' },
           annotations: {
             summary: '{{ $labels.nqn }} subsystem has reached its maximum number of namespaces%(cluster)s' % $.MultiClusterSummary(),
index f8bcd4ca0e12eaeb829cd8a2d3955a92556a8826..33636f4ef41396749f5120fc89d7d3bcf9ba6d4d 100644 (file)
@@ -760,7 +760,7 @@ groups:
         annotations:
           description: "Subsystems have a max namespace limit defined at creation time. This alert means that no more namespaces can be added to {{ $labels.nqn }}"
           summary: "{{ $labels.nqn }} subsystem has reached its maximum number of namespaces on cluster {{ $labels.cluster }}"
-        expr: "(count by(nqn, cluster) (ceph_nvmeof_subsystem_namespace_metadata)) >= ceph_nvmeof_subsystem_namespace_limit"
+        expr: "(count by(nqn, cluster, instance) (ceph_nvmeof_subsystem_namespace_metadata)) >= on(nqn, instance) group_right(cluster) ceph_nvmeof_subsystem_namespace_limit"
         for: "1m"
         labels:
           severity: "warning"