]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/dashboard: Compare values of MTU alert by device
authorPatrick Seidensal <pseidensal@suse.com>
Wed, 23 Mar 2022 13:53:58 +0000 (14:53 +0100)
committerPatrick Seidensal <pseidensal@suse.com>
Thu, 7 Apr 2022 14:24:31 +0000 (16:24 +0200)
Fixes: https://tracker.ceph.com/issues/55004
Signed-off-by: Patrick Seidensal <pseidensal@suse.com>
(cherry picked from commit 3821548a37373f87109ab0dac7f3ee2d8f3ead99)

monitoring/prometheus/alerts/ceph_default_alerts.yml

index d9e6e35637f92deb1b0da841a05b0438b71bb44b..eadb05a05a8201ec1e7209bb60c6b47f2296b6b8 100644 (file)
@@ -704,7 +704,18 @@ groups:
             rate of the past 48 hours.
 
       - alert: CephNodeInconsistentMTU
-        expr: node_network_mtu_bytes{device!="lo"} * (node_network_up{device!="lo"} > 0) != on() group_left() (quantile(0.5, node_network_mtu_bytes{device!="lo"}))
+        expr: |
+          node_network_mtu_bytes * (node_network_up{device!="lo"} > 0) ==
+            scalar(
+              max by (device) (node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) !=
+                quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!="lo"} > 0))
+            )
+          or
+          node_network_mtu_bytes * (node_network_up{device!="lo"} > 0) ==
+            scalar(
+              min by (device) (node_network_mtu_bytes * (node_network_up{device!="lo"} > 0)) !=
+                quantile by (device) (.5, node_network_mtu_bytes * (node_network_up{device!="lo"} > 0))
+            )
         labels:
           severity: warning
           type: ceph_default
@@ -712,7 +723,7 @@ groups:
           summary: MTU settings across Ceph hosts are inconsistent
           description: >
             Node {{ $labels.instance }} has a different MTU size ({{ $value }})
-            than the median value on device {{ $labels.device }}.
+            than the median of devices named {{ $labels.device }}.
 
   - name: pools
     rules: