From 7cc902b6b8b06992cc8ca4144c2b49f4fdc7c88a Mon Sep 17 00:00:00 2001 From: Aashish Sharma Date: Tue, 18 Oct 2022 16:42:32 +0530 Subject: [PATCH] mgr/dashboard: Fix CephPoolGrowthWarning alert Prometheus reports an error - many-to-many matching not allowed: matching labels must be unique on one side for CephPoolGrowthWarning if we have same pool ids on two different instances. Fixes: https://tracker.ceph.com/issues/58017 Signed-off-by: Aashish Sharma (cherry picked from commit 97189b66afd4623ae09bc4ba12e1af6f69821793) --- monitoring/ceph-mixin/prometheus_alerts.yml | 2 +- .../ceph-mixin/tests_alerts/test_alerts.yml | 31 ++++++++++++------- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/monitoring/ceph-mixin/prometheus_alerts.yml b/monitoring/ceph-mixin/prometheus_alerts.yml index a544d41eb0ee0..bd773f27c5402 100644 --- a/monitoring/ceph-mixin/prometheus_alerts.yml +++ b/monitoring/ceph-mixin/prometheus_alerts.yml @@ -518,7 +518,7 @@ groups: annotations: description: "Pool '{{ $labels.name }}' will be full in less than 5 days assuming the average fill-up rate of the past 48 hours." summary: "Pool growth rate may soon exceed capacity" - expr: "(predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(pool_id) group_right ceph_pool_metadata) >= 95" + expr: "(predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(pool_id, instance) group_right() ceph_pool_metadata) >= 95" labels: oid: "1.3.6.1.4.1.50495.1.2.1.9.2" severity: "warning" diff --git a/monitoring/ceph-mixin/tests_alerts/test_alerts.yml b/monitoring/ceph-mixin/tests_alerts/test_alerts.yml index 7b7e7db7301bd..8cdb563493603 100644 --- a/monitoring/ceph-mixin/tests_alerts/test_alerts.yml +++ b/monitoring/ceph-mixin/tests_alerts/test_alerts.yml @@ -1472,35 +1472,44 @@ tests: # trigger percent full prediction on pools 1 and 2 only - interval: 12h input_series: - - series: 'ceph_pool_percent_used{pool_id="1"}' - values: '70 75 80 87 92' - - series: 'ceph_pool_percent_used{pool_id="2"}' + - series: 'ceph_pool_percent_used{pool_id="1", instance="9090"}' + values: '1 1 1 1 1' + - series: 'ceph_pool_percent_used{pool_id="1", instance="8090"}' + values: '78 89 79 98 78' + - series: 'ceph_pool_percent_used{pool_id="2", instance="9090"}' + values: '1 1 1 1 1' + - series: 'ceph_pool_percent_used{pool_id="2", instance="8090"}' values: '22 22 23 23 24' - - series: 'ceph_pool_metadata{pool_id="1",name="rbd",type="replicated"}' + - series: 'ceph_pool_metadata{pool_id="1" , instance="9090" ,name="rbd",type="replicated"}' + values: '1 1 1 1 1' + - series: 'ceph_pool_metadata{pool_id="1", instance="8090",name="default.rgw.index",type="replicated"}' + values: '1 1 1 1 1' + - series: 'ceph_pool_metadata{pool_id="2" , instance="9090" ,name="rbd",type="replicated"}' values: '1 1 1 1 1' - - series: 'ceph_pool_metadata{pool_id="2",name="default.rgw.index",type="replicated"}' + - series: 'ceph_pool_metadata{pool_id="2", instance="8090",name="default.rgw.index",type="replicated"}' values: '1 1 1 1 1' promql_expr_test: - expr: | - (predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(pool_id) - group_right ceph_pool_metadata) >= 95 + (predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(pool_id, instance) + group_right() ceph_pool_metadata) >= 95 eval_time: 36h exp_samples: - - labels: '{name="rbd",pool_id="1",type="replicated"}' - value: 1.424E+02 # 142% + - labels: '{instance="8090",name="default.rgw.index",pool_id="1",type="replicated"}' + value: 1.435E+02 # 142% alert_rule_test: - eval_time: 48h alertname: CephPoolGrowthWarning exp_alerts: - exp_labels: - name: rbd + instance: 8090 + name: default.rgw.index pool_id: 1 severity: warning type: ceph_default oid: 1.3.6.1.4.1.50495.1.2.1.9.2 exp_annotations: summary: Pool growth rate may soon exceed capacity - description: Pool 'rbd' will be full in less than 5 days assuming the average fill-up rate of the past 48 hours. + description: Pool 'default.rgw.index' will be full in less than 5 days assuming the average fill-up rate of the past 48 hours. - interval: 1m input_series: - series: 'ceph_health_detail{name="POOL_BACKFILLFULL"}' -- 2.39.5