},
"id": 53,
"legend": {
+ "alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"steppedLine": false,
"targets": [
{
- "expr": "ceph_pg_total",
+ "expr": "ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_total)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "Total",
+ "legendFormat": "{{name}} Total",
"refId": "A"
},
{
- "expr": "ceph_pg_active",
+ "expr": "ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_active)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "Active",
+ "legendFormat": "{{name}} Active",
"refId": "B"
},
{
- "expr": "ceph_pg_total - ceph_pg_active",
+ "expr": "ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_total - ceph_pg_active)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "Inactive",
+ "legendFormat": "{{name}} Inactive",
"refId": "G"
},
{
- "expr": "ceph_pg_undersized",
+ "expr": "ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_undersized)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "Undersized",
+ "legendFormat": "{{name}} Undersized",
"refId": "F"
},
{
- "expr": "ceph_pg_degraded",
+ "expr": "ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_degraded)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "Degraded",
+ "legendFormat": "{{name}} Degraded",
"refId": "C"
},
{
- "expr": "ceph_pg_inconsistent",
+ "expr": "ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_inconsistent)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "Inconsistent",
+ "legendFormat": "{{name}} Inconsistent",
"refId": "D"
},
{
- "expr": "ceph_pg_down",
+ "expr": "ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_down)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "Down",
+ "legendFormat": "{{name}} Down",
"refId": "E"
}
],
- name: pgs
rules:
- alert: pgs inactive
- expr: ceph_pg_total - ceph_pg_active > 0
+ expr: ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_total - ceph_pg_active) > 0
for: 5m
labels:
severity: critical
oid: 1.3.6.1.4.1.50495.15.1.2.7.1
annotations:
description: >
- {{ $value }} PGs have been inactive for more than 5 minutes.
+ {{ $value }} PGs have been inactive for more than 5 minutes in pool {{ $labels.name }}.
Inactive placement groups aren't able to serve read/write
requests.
- alert: pgs unclean
- expr: ceph_pg_total - ceph_pg_clean > 0
+ expr: ceph_pool_metadata * on(pool_id,instance) group_left() (ceph_pg_total - ceph_pg_clean) > 0
for: 15m
labels:
severity: warning
oid: 1.3.6.1.4.1.50495.15.1.2.7.2
annotations:
description: >
- {{ $value }} PGs haven't been clean for more than 15 minutes.
+ {{ $value }} PGs haven't been clean for more than 15 minutes in pool {{ $labels.name }}.
Unclean PGs haven't been able to completely recover from a
previous failure.
- name: nodes