]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
monitoring: fix NVMeoFMultipleNamespacesOfRBDImage 69297/head
authorVallari Agrawal <vallari.agrawal@ibm.com>
Fri, 5 Jun 2026 05:14:51 +0000 (10:44 +0530)
committerVallari Agrawal <vallari.agrawal@ibm.com>
Fri, 5 Jun 2026 05:18:08 +0000 (10:48 +0530)
Do not trigger alert NVMeoFMultipleNamespacesOfRBDImage
for same pool/image name used in multiple nvmeof namespaces,
if they are in different rados namespaces (rados_namespace_name)

These are valid repeation of pool/image name:
- mypool/rados_ns1/myimage1
- mypool/rados_ns2/myimage1
- mypool/myimage1 (default rados namespace)

Fixes: https://tracker.ceph.com/issues/77128
Signed-off-by: Vallari Agrawal <vallari.agrawal@ibm.com>
monitoring/ceph-mixin/prometheus_alerts.libsonnet
monitoring/ceph-mixin/prometheus_alerts.yml
monitoring/ceph-mixin/tests_alerts/test_alerts.yml

index 9c1c3db4375e1ba5a5af81d96be33310c33474b6..6173a9dd781c85005ee296985a9884dcdcfd4869 100644 (file)
         {
           alert: 'NVMeoFMultipleNamespacesOfRBDImage',
           'for': '1m',
-          expr: 'count by(pool_name, rbd_name) (count by(bdev_name, pool_name, rbd_name) (ceph_nvmeof_bdev_metadata and on (bdev_name) ceph_nvmeof_subsystem_namespace_metadata)) > 1',
+          expr: 'count by(pool_name, rbd_name, rados_namespace_name) (count by(bdev_name, pool_name, rbd_name, rados_namespace_name) (ceph_nvmeof_bdev_metadata * on (bdev_name, instance, cluster) group_left(rados_namespace_name) ceph_nvmeof_subsystem_namespace_metadata)) > 1',
           labels: { severity: 'warning', type: 'ceph_default' },
           annotations: {
-            summary: 'RBD image {{ $labels.pool_name }}/{{ $labels.rbd_name }} cannot be reused for multiple NVMeoF namespace ',
+            summary: 'RBD image {{ $labels.pool_name }}/{{ if $labels.rados_namespace_name }}{{ $labels.rados_namespace_name }}/{{ end }}{{ $labels.rbd_name }} cannot be reused for multiple NVMeoF namespaces',
             description: 'Each NVMeoF namespace must have a unique RBD pool and image, across all different gateway groups.',
           },
         },
index 8a6c411a2b63d9b2147e8d5256ce5b6905cff3f9..f2aa4defabde0022c65d35cfe366aaea6b32b159 100644 (file)
@@ -804,8 +804,8 @@ groups:
       - alert: "NVMeoFMultipleNamespacesOfRBDImage"
         annotations:
           description: "Each NVMeoF namespace must have a unique RBD pool and image, across all different gateway groups."
-          summary: "RBD image {{ $labels.pool_name }}/{{ $labels.rbd_name }} cannot be reused for multiple NVMeoF namespace "
-        expr: "count by(pool_name, rbd_name) (count by(bdev_name, pool_name, rbd_name) (ceph_nvmeof_bdev_metadata and on (bdev_name) ceph_nvmeof_subsystem_namespace_metadata)) > 1"
+          summary: "RBD image {{ $labels.pool_name }}/{{ if $labels.rados_namespace_name }}{{ $labels.rados_namespace_name }}/{{ end }}{{ $labels.rbd_name }} cannot be reused for multiple NVMeoF namespaces"
+        expr: "count by(pool_name, rbd_name, rados_namespace_name) (count by(bdev_name, pool_name, rbd_name, rados_namespace_name) (ceph_nvmeof_bdev_metadata * on (bdev_name, instance, cluster) group_left(rados_namespace_name) ceph_nvmeof_subsystem_namespace_metadata)) > 1"
         for: "1m"
         labels:
           severity: "warning"
@@ -995,4 +995,3 @@ groups:
           oid: "1.3.6.1.4.1.50495.1.2.1.15.2"
           severity: "warning"
           type: "ceph_default"
-
index 5578544c934f70a5070ea88a1b90f613d4c2181b..6a4c2cd207622ec3873394123d75c96b18037fe3 100644 (file)
@@ -2278,19 +2278,27 @@ tests:
 # NVMeoFMultipleNamespacesOfRBDImage
  - interval: 1m
    input_series:
-    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev1", instance="ceph-nvme-vm1", pool_name="mypool", rbd_name="myimage1"}'
+    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev1", instance="ceph-nvme-vm1", cluster="mycluster", pool_name="mypool", rbd_name="myimage1"}'
       values: '1x10'
-    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev1", instance="ceph-nvme-vm2", pool_name="mypool", rbd_name="myimage1"}'
+    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev1", instance="ceph-nvme-vm2", cluster="mycluster", pool_name="mypool", rbd_name="myimage1"}'
       values: '1x10'
-    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev2", instance="ceph-nvme-vm1", pool_name="mypool", rbd_name="myimage2"}'
+    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev2", instance="ceph-nvme-vm1", cluster="mycluster", pool_name="mypool", rbd_name="myimage2"}'
       values: '1x10'
-    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev2", instance="ceph-nvme-vm2", pool_name="mypool", rbd_name="myimage2"}'
+    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev2", instance="ceph-nvme-vm2", cluster="mycluster", pool_name="mypool", rbd_name="myimage2"}'
       values: '1x10'
-    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev3", instance="ceph-nvme-vm1", pool_name="mypool", rbd_name="myimage1"}'
+    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev3", instance="ceph-nvme-vm1", cluster="mycluster", pool_name="mypool", rbd_name="myimage1"}'
       values: '1x10'
-    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev3", instance="ceph-nvme-vm2", pool_name="mypool", rbd_name="myimage1"}'
+    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev3", instance="ceph-nvme-vm2", cluster="mycluster", pool_name="mypool", rbd_name="myimage1"}'
       values: '1x10'
-    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev4", instance="ceph-nvme-vm1", pool_name="mypool", rbd_name="myimage1"}' # bdev with no ns
+    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev4", instance="ceph-nvme-vm1", cluster="mycluster", pool_name="mypool", rbd_name="myimage1"}' # bdev with no ns
+      values: '1x10'
+    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev5", instance="ceph-nvme-vm1", cluster="mycluster", pool_name="mypool", rbd_name="myimage5"}'
+      values: '1x10'
+    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev5", instance="ceph-nvme-vm2", cluster="mycluster", pool_name="mypool", rbd_name="myimage5"}'
+      values: '1x10'
+    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev6", instance="ceph-nvme-vm1", cluster="mycluster", pool_name="mypool", rbd_name="myimage5"}'
+      values: '1x10'
+    - series: 'ceph_nvmeof_bdev_metadata{bdev_name="bdev6", instance="ceph-nvme-vm2", cluster="mycluster", pool_name="mypool", rbd_name="myimage5"}'
       values: '1x10'
     - series: 'ceph_nvmeof_subsystem_namespace_metadata{nqn="nqn1", nsid="1", bdev_name="bdev1", instance="ceph-nvme-vm1", cluster="mycluster"}'
       values: '1x10'
@@ -2304,8 +2312,16 @@ tests:
       values: '1x10'
     - series: 'ceph_nvmeof_subsystem_namespace_metadata{nqn="nqn2", nsid="1", bdev_name="bdev3", instance="ceph-nvme-vm2", cluster="mycluster"}'
       values: '1x10'
+    - series: 'ceph_nvmeof_subsystem_namespace_metadata{nqn="nqn3", nsid="1", bdev_name="bdev5", instance="ceph-nvme-vm1", cluster="mycluster", rados_namespace_name="rados1"}'
+      values: '1x10'
+    - series: 'ceph_nvmeof_subsystem_namespace_metadata{nqn="nqn3", nsid="1", bdev_name="bdev5", instance="ceph-nvme-vm2", cluster="mycluster", rados_namespace_name="rados1"}'
+      values: '1x10'
+    - series: 'ceph_nvmeof_subsystem_namespace_metadata{nqn="nqn3", nsid="2", bdev_name="bdev6", instance="ceph-nvme-vm1", cluster="mycluster", rados_namespace_name="rados2"}'
+      values: '1x10'
+    - series: 'ceph_nvmeof_subsystem_namespace_metadata{nqn="nqn3", nsid="2", bdev_name="bdev6", instance="ceph-nvme-vm2", cluster="mycluster", rados_namespace_name="rados2"}'
+      values: '1x10'
    promql_expr_test:
-     - expr: count by(pool_name, rbd_name) (count by(bdev_name, pool_name, rbd_name) (ceph_nvmeof_bdev_metadata and on (bdev_name) ceph_nvmeof_subsystem_namespace_metadata)) > 1
+     - expr: count by(pool_name, rbd_name, rados_namespace_name) (count by(bdev_name, pool_name, rbd_name, rados_namespace_name) (ceph_nvmeof_bdev_metadata * on (bdev_name, instance, cluster) group_left(rados_namespace_name) ceph_nvmeof_subsystem_namespace_metadata)) > 1
        eval_time: 1m
        exp_samples:
          - labels: '{pool_name="mypool", rbd_name="myimage1"}'
@@ -2320,7 +2336,7 @@ tests:
           severity: warning
           type: ceph_default
         exp_annotations:
-          summary: "RBD image mypool/myimage1 cannot be reused for multiple NVMeoF namespace "
+          summary: "RBD image mypool/myimage1 cannot be reused for multiple NVMeoF namespaces"
           description: "Each NVMeoF namespace must have a unique RBD pool and image, across all different gateway groups."
 
  # NVMeoFTooManyGateways