From 4f9bd92e46f941d06ce699d8ac9a94aa95fbea23 Mon Sep 17 00:00:00 2001 From: Aashish Sharma Date: Thu, 2 Sep 2021 11:57:57 +0530 Subject: [PATCH] mgr/dashboard: Incorrect MTU mismatch warning The MTU mismatch warning was being fired for those NIC's as well that are in down state. This PR intends to fix this issue Fixes:https://tracker.ceph.com/issues/52028 Signed-off-by: Aashish Sharma (cherry picked from commit 58d635455d1f59921d5ad821168f31b6f937588a) --- .../prometheus/alerts/ceph_default_alerts.yml | 2 +- monitoring/prometheus/alerts/test_alerts.yml | 20 ++++++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/monitoring/prometheus/alerts/ceph_default_alerts.yml b/monitoring/prometheus/alerts/ceph_default_alerts.yml index d4a0b8209e8aa..71fc864cddf7d 100644 --- a/monitoring/prometheus/alerts/ceph_default_alerts.yml +++ b/monitoring/prometheus/alerts/ceph_default_alerts.yml @@ -233,7 +233,7 @@ groups: rate of the past 48 hours. - alert: MTU Mismatch - expr: node_network_mtu_bytes{device!="lo"} != on() group_left() (quantile(0.5, node_network_mtu_bytes{device!="lo"})) + expr: node_network_mtu_bytes{device!="lo"} * (node_network_up{device!="lo"} > 0) != on() group_left() (quantile(0.5, node_network_mtu_bytes{device!="lo"})) labels: severity: warning type: ceph_default diff --git a/monitoring/prometheus/alerts/test_alerts.yml b/monitoring/prometheus/alerts/test_alerts.yml index 8bc35aa2643ae..913c207339b1c 100644 --- a/monitoring/prometheus/alerts/test_alerts.yml +++ b/monitoring/prometheus/alerts/test_alerts.yml @@ -680,13 +680,27 @@ tests: - series: 'node_network_mtu_bytes{device="eth4",instance="node-exporter", job="node-exporter"}' values: '9000 9000 9000 9000 9000' + - series: 'node_network_up{device="eth0",instance="node-exporter", + job="node-exporter"}' + values: '0 0 0 0 0' + - series: 'node_network_up{device="eth1",instance="node-exporter", + job="node-exporter"}' + values: '0 0 0 0 0' + - series: 'node_network_up{device="eth2",instance="node-exporter", + job="node-exporter"}' + values: '1 1 1 1 1' + - series: 'node_network_up{device="eth3",instance="node-exporter", + job="node-exporter"}' + values: '0 0 0 0 0' + - series: 'node_network_up{device="eth4",instance="node-exporter", + job="node-exporter"}' + values: '1 1 1 1 1' promql_expr_test: - - expr: node_network_mtu_bytes{device!="lo"} != on() group_left() + - expr: node_network_mtu_bytes{device!="lo"} * (node_network_up{device!="lo"} > 0) != on() group_left() (quantile(0.5, node_network_mtu_bytes{device!="lo"})) eval_time: 1m exp_samples: - - labels: '{__name__="node_network_mtu_bytes", device="eth4", - instance="node-exporter", job="node-exporter"}' + - labels: '{device="eth4", instance="node-exporter", job="node-exporter"}' value: 9000 alert_rule_test: - eval_time: 1m -- 2.39.5