From b57ae2edab454f48745030dc6b14e269e4c313a7 Mon Sep 17 00:00:00 2001 From: Aashish Sharma Date: Thu, 25 Mar 2021 11:25:37 +0530 Subject: [PATCH] mgr/dashboard:Simplify some complex calculations in test_alerts.yml run-promtool-unittests is failing with difference in floating point values in some complex calculations. This PR intends to simplify those calculations and fix this issue. Fixes: https://tracker.ceph.com/issues/49952 Signed-off-by: Aashish Sharma (cherry picked from commit 8d2f39e6c568afb6880689160212bcc93057e194) --- monitoring/prometheus/alerts/test_alerts.yml | 51 +++++++++----------- 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/monitoring/prometheus/alerts/test_alerts.yml b/monitoring/prometheus/alerts/test_alerts.yml index cc246954388f..fe450e343afb 100644 --- a/monitoring/prometheus/alerts/test_alerts.yml +++ b/monitoring/prometheus/alerts/test_alerts.yml @@ -22,7 +22,6 @@ tests: - exp_labels: instance: ceph:9283 job: ceph - oid: 1.3.6.1.4.1.50495.15.1.2.2.1 type: ceph_default severity: critical exp_annotations: @@ -50,7 +49,6 @@ tests: - exp_labels: instance: ceph:9283 job: ceph - oid: 1.3.6.1.4.1.50495.15.1.2.2.2 type: ceph_default severity: warning exp_annotations: @@ -96,7 +94,6 @@ tests: alertname: low monitor quorum count exp_alerts: - exp_labels: - oid: 1.3.6.1.4.1.50495.15.1.2.3.1 type: ceph_default severity: critical exp_annotations: @@ -150,7 +147,6 @@ tests: alertname: 10% OSDs down exp_alerts: - exp_labels: - oid: 1.3.6.1.4.1.50495.15.1.2.4.1 type: ceph_default severity: critical exp_annotations: @@ -201,7 +197,6 @@ tests: alertname: OSD down exp_alerts: - exp_labels: - oid: 1.3.6.1.4.1.50495.15.1.2.4.2 type: ceph_default severity: warning exp_annotations: @@ -228,7 +223,7 @@ tests: - series: 'ceph_osd_stat_bytes_used{ceph_daemon="osd.2",instance="ceph:9283" ,job="ceph"}' values: '1076310016 1076310016 1076310016 1076310016 1076310016 - 106447810032' + 100856561909.76' - series: 'ceph_osd_stat_bytes{ceph_daemon="osd.0",instance="ceph:9283" ,job="ceph"}' values: '108447916032 108447916032 108447916032 108447916032 108447916032 @@ -280,7 +275,7 @@ tests: exp_samples: - labels: '{ceph_daemon="osd.2",hostname="ceph",instance="ceph:9283", job="ceph"}' - value: 9.815569899986845E+01 + value: 9.3E+01 alert_rule_test: - eval_time: 10m alertname: OSDs near full @@ -290,12 +285,11 @@ tests: hostname: ceph instance: ceph:9283 job: ceph - oid: 1.3.6.1.4.1.50495.15.1.2.4.3 type: ceph_default severity: critical exp_annotations: description: > - OSD osd.2 on ceph is dangerously full: 98.16% + OSD osd.2 on ceph is dangerously full: 93% # flapping OSD - interval: 1s @@ -347,7 +341,6 @@ tests: hostname: ceph instance: ceph:9283 job: ceph - oid: 1.3.6.1.4.1.50495.15.1.2.4.4 severity: warning type: ceph_default exp_annotations: @@ -361,13 +354,16 @@ tests: input_series: - series: 'ceph_osd_numpg{ceph_daemon="osd.0",instance="ceph:9283", job="ceph"}' - values: '169 169 169 169 169 169' + values: '100 100 100 100 100 160' - series: 'ceph_osd_numpg{ceph_daemon="osd.1",instance="ceph:9283", job="ceph"}' - values: '169 169 169 169 169 90' + values: '100 100 100 100 100 320' - series: 'ceph_osd_numpg{ceph_daemon="osd.2",instance="ceph:9283", job="ceph"}' - values: '169 169 169 169 169 169' + values: '100 100 100 100 100 160' + - series: 'ceph_osd_numpg{ceph_daemon="osd.3",instance="ceph:9283", + job="ceph"}' + values: '100 100 100 100 100 160' - series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.0", ceph_version="ceph version 17.0.0-189-g3558fd72 (3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)", @@ -389,6 +385,13 @@ tests: hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore", public_addr="172.20.0.2"}' values: '1 1 1 1 1 1' + - series: 'ceph_osd_metadata{back_iface="eth0",ceph_daemon="osd.3", + ceph_version="ceph version 17.0.0-189-g3558fd72 + (3558fd7291855971aa6481a2ade468ad61fbb346) pacific (dev)", + cluster_addr="172.20.0.2",device_class="hdd",front_iface="eth0", + hostname="ceph",instance="ceph:9283",job="ceph",objectstore="bluestore", + public_addr="172.20.0.2"}' + values: '1 1 1 1 1 1' promql_expr_test: - expr: | abs( @@ -402,7 +405,7 @@ tests: exp_samples: - labels: '{ceph_daemon="osd.1", hostname="ceph", instance="ceph:9283", job="ceph"}' - value: 3.691588785046729E-01 + value: 6E-01 alert_rule_test: - eval_time: 10m alertname: high pg count deviation @@ -412,7 +415,6 @@ tests: hostname: ceph instance: ceph:9283 job: ceph - oid: 1.3.6.1.4.1.50495.15.1.2.4.5 severity: warning type: ceph_default exp_annotations: @@ -461,7 +463,6 @@ tests: instance: ceph:9283 job: ceph name: device_health_metrics - oid: 1.3.6.1.4.1.50495.15.1.2.7.1 pool_id: 3 severity: critical type: ceph_default @@ -516,7 +517,6 @@ tests: instance: ceph:9283 job: ceph name: device_health_metrics - oid: 1.3.6.1.4.1.50495.15.1.2.7.2 pool_id: 3 severity: warning type: ceph_default @@ -534,7 +534,7 @@ tests: --live-home",fstype="ext4",instance="node-exporter",job="node-exporter", mountpoint="/"}' values: '35336400896 35336400896 35336400896 35336400896 35336400896 - 3533640089 3533640089' + 3525385519.104 3533640089' - series: 'node_filesystem_size_bytes{device="/dev/mapper/fedora_localhost --live-home",fstype="ext4",instance="node-exporter",job="node-exporter", mountpoint="/"}' @@ -548,7 +548,7 @@ tests: - labels: '{device="/dev/mapper/fedora_localhost --live-home", fstype="ext4", instance="node-exporter", job="node-exporter", mountpoint="/"}' - value: 4.8112390362092565E+00 + value: 4.8E+00 alert_rule_test: - eval_time: 10m alertname: root volume full @@ -559,7 +559,6 @@ tests: instance: node-exporter job: node-exporter mountpoint: / - oid: 1.3.6.1.4.1.50495.15.1.2.8.1 severity: critical type: ceph_default exp_annotations: @@ -601,7 +600,6 @@ tests: device: eth0 instance: node-exporter job: node-exporter - oid: 1.3.6.1.4.1.50495.15.1.2.8.2 severity: warning type: ceph_default exp_annotations: @@ -644,7 +642,6 @@ tests: device: eth0 instance: node-exporter job: node-exporter - oid: 1.3.6.1.4.1.50495.15.1.2.8.3 severity: warning type: ceph_default exp_annotations: @@ -702,7 +699,7 @@ tests: - series: 'ceph_pool_stored{instance="ceph:9283",job="ceph",pool_id="2"}' values: '1850 1850 1850 1850 1850 1850 1850' - series: 'ceph_pool_stored{instance="ceph:9283",job="ceph",pool_id="3"}' - values: '10628706304000 10628706304000 23524 23524 23524 23524 23524 23524 + values: '900 900 23524 23524 23524 23524 23524 23524 23524' - series: 'ceph_pool_max_avail{instance="ceph:9283",job="ceph",pool_id="1"}' values: '106287063040 106287063040 106287063040 106287063040 106287063040 @@ -711,8 +708,7 @@ tests: values: '106287063040 106287063040 106287063040 106287063040 106287063040 106287063040 106287063040' - series: 'ceph_pool_max_avail{instance="ceph:9283",job="ceph",pool_id="3"}' - values: '106287063040 1 106287063040 106287063040 106287063040 - 106287063040 106287063040' + values: '37.5 37.5 37.5 37.5 37.5 37.5 37.5' - series: 'ceph_pool_metadata{instance="ceph:9283",job="ceph", name="device_health_metrics",pool_id="1"}' values: '1 1 1 1 1 1 1 1 1' @@ -731,7 +727,7 @@ tests: exp_samples: - labels: '{instance="ceph:9283", job="ceph", name="default.rgw.log", pool_id="3"}' - value: 9.999999999999059E+01 + value: 9.6E+01 alert_rule_test: - eval_time: 2m alertname: pool full @@ -740,12 +736,11 @@ tests: instance: ceph:9283 job: ceph name: default.rgw.log - oid: 1.3.6.1.4.1.50495.15.1.2.9.1 pool_id: 3 severity: critical type: ceph_default exp_annotations: - description: Pool default.rgw.log at 99.01% capacity. + description: Pool default.rgw.log at 96% capacity. # slow OSD ops - interval : 1m -- 2.47.3