ceph-mixin: fix manually edited 'prometheus_alerts.yml' file

author Arun Kumar Mohan <amohan@redhat.com>

Sun, 2 Apr 2023 03:36:04 +0000 (09:06 +0530)

committer Arun Kumar Mohan <amohan@redhat.com>

Thu, 28 Mar 2024 08:21:57 +0000 (13:51 +0530)
author Arun Kumar Mohan <amohan@redhat.com>
Sun, 2 Apr 2023 03:36:04 +0000 (09:06 +0530)
committer Arun Kumar Mohan <amohan@redhat.com>
Thu, 28 Mar 2024 08:21:57 +0000 (13:51 +0530)
diff --git a/monitoring/ceph-mixin/prometheus_alerts.libsonnet b/monitoring/ceph-mixin/prometheus_alerts.libsonnet

index a6ab4c2a3f909584e00df836eb4d62738e7c483e..b7558a70fa87e11f51ef5b556438861cb8e67768 100644 (file)
--- a/monitoring/ceph-mixin/prometheus_alerts.libsonnet
+++ b/monitoring/ceph-mixin/prometheus_alerts.libsonnet
@@ -587,7 +587,7 @@
        rules: [
          {
            alert: 'CephPoolGrowthWarning',
-          expr: '(predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(%(cluster)spool_id)    group_right ceph_pool_metadata) >= 95' % $.MultiClusterQuery(),
+          expr: '(predict_linear(ceph_pool_percent_used[2d], 3600 * 24 * 5) * on(%(cluster)spool_id, instance) group_right() ceph_pool_metadata) >= 95' % $.MultiClusterQuery(),
            labels: { severity: 'warning', type: 'ceph_default', oid: '1.3.6.1.4.1.50495.1.2.1.9.2' },
            annotations: {
              summary: 'Pool growth rate may soon exceed capacity%(cluster)s' % $.MultiClusterSummary(),
diff --git a/monitoring/ceph-mixin/prometheus_alerts.yml b/monitoring/ceph-mixin/prometheus_alerts.yml

index e491c753f3c7db0529880e632e58170a9fe9bbe6..49c38ebd3555f9d2f2fb40507e6d7a59faf2fff6 100644 (file)
--- a/monitoring/ceph-mixin/prometheus_alerts.yml
+++ b/monitoring/ceph-mixin/prometheus_alerts.yml
@@ -497,8 +497,8 @@ groups:
            type: "ceph_default"
        - alert: "CephNodeNetworkBondDegraded"
          annotations:
-          summary: "Degraded Bond on Node {{ $labels.instance }}"
            description: "Bond {{ $labels.master }} is degraded on Node {{ $labels.instance }}."
+          summary: "Degraded Bond on Node {{ $labels.instance }}"
          expr: |
            node_bonding_slaves - node_bonding_active != 0
          labels:
@@ -573,15 +573,15 @@ groups:
            severity: "warning"
            type: "ceph_default"
        - alert: "CephDaemonSlowOps"
-        for: "30s"
-        expr: "ceph_daemon_health_metrics{type=\"SLOW_OPS\"} > 0"
-        labels: 
-          severity: 'warning'
-          type: 'ceph_default'
          annotations:
-          summary: "{{ $labels.ceph_daemon }} operations are slow to complete"
            description: "{{ $labels.ceph_daemon }} operations are taking too long to process (complaint time exceeded)"
            documentation: "https://docs.ceph.com/en/latest/rados/operations/health-checks#slow-ops"
+          summary: "{{ $labels.ceph_daemon }} operations are slow to complete"
+        expr: "ceph_daemon_health_metrics{type=\"SLOW_OPS\"} > 0"
+        for: "30s"
+        labels:
+          severity: "warning"
+          type: "ceph_default"
    - name: "cephadm"
      rules:
        - alert: "CephadmUpgradeFailed"
author	Arun Kumar Mohan <amohan@redhat.com>
	Sun, 2 Apr 2023 03:36:04 +0000 (09:06 +0530)
committer	Arun Kumar Mohan <amohan@redhat.com>
	Thu, 28 Mar 2024 08:21:57 +0000 (13:51 +0530)
monitoring/ceph-mixin/prometheus_alerts.libsonnet		patch \| blob \| history
monitoring/ceph-mixin/prometheus_alerts.yml		patch \| blob \| history