]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/dashboard: Compare values of MTU alert by device
authorPatrick Seidensal <pseidensal@suse.com>
Wed, 23 Mar 2022 13:53:58 +0000 (14:53 +0100)
committerPatrick Seidensal <pseidensal@suse.com>
Mon, 28 Mar 2022 11:38:15 +0000 (13:38 +0200)
Fixes: https://tracker.ceph.com/issues/55004
Signed-off-by: Patrick Seidensal <pseidensal@suse.com>
monitoring/ceph-mixin/prometheus_alerts.yml

index fc38678f99dd56dbc0e4b4822a8f2d1debc4a470..578596f4af0bc1b6e01c1f6e1bec8df818b4fa15 100644 (file)
@@ -704,7 +704,18 @@ groups:
             rate of the past 48 hours.
 
       - alert: CephNodeInconsistentMTU
-        expr: node_network_mtu_bytes{device!="lo"} * (node_network_up{device!="lo"} > 0) != on() group_left() (quantile(0.5, node_network_mtu_bytes{device!="lo"}))
+        expr: |
+          node_network_mtu_bytes * (node_network_up{device!="lo"} > 0) ==
+            scalar(
+              max by (device) (node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) !=
+                quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!="lo"} > 0))
+            )
+          or
+          node_network_mtu_bytes * (node_network_up{device!="lo"} > 0) ==
+            scalar(
+              min by (device) (node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) !=
+                quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!="lo"} > 0))
+            )
         labels:
           severity: warning
           type: ceph_default
@@ -712,7 +723,7 @@ groups:
           summary: MTU settings across Ceph hosts are inconsistent
           description: >
             Node {{ $labels.instance }} has a different MTU size ({{ $value }})
-            than the median value on device {{ $labels.device }}.
+            than the median of devices named {{ $labels.device }}.
 
   - name: pools
     rules: