dashboardTags: ['ceph-mixin'],
clusterLabel: 'cluster',
- showMultiCluster: false,
+ showMultiCluster: true,
CephNodeNetworkPacketDropsThreshold: 0.005,
CephNodeNetworkPacketDropsPerSec: 10,
local g = import 'grafonnet/grafana.libsonnet';
-local u = import 'utils.libsonnet';
(import 'utils.libsonnet') {
- 'ceph-cluster-advanced.json': u.dashboardSchema(
+ 'ceph-cluster-advanced.json': $.dashboardSchema(
'Ceph Cluster - Advanced',
'Ceph cluster overview',
- '',
+ 'dn13KBeTv',
'now-6h',
'1m',
38,
$._config.dashboardTags,
''
).addAnnotation(
- u.addAnnotationSchema(
+ $.addAnnotationSchema(
1,
'-- Grafana --',
true, // enable
type='panel', id='heatmap', name='Heatmap', version='5.0.0'
).addRequired(
type='panel', id='singlestat', name='Singlestat', version='5.0.0'
- ).
- addTemplate(g.template.datasource('DS_PROMETHEUS', 'prometheus', 'Prometheus', label='Data Source')).
- addTemplate(
- u.addCustomTemplate(
+ ).addTemplate(
+ g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
+ ).addTemplate(
+ $.addClusterTemplate()
+ ).addTemplate(
+ $.addCustomTemplate(
name='interval',
query='5s,10s,30s,1m,10m,30m,1h,6h,12h,1d,7d,14d,30d',
current='$__auto_interval_interval',
)
).addPanels(
[
- u.addRowSchema(collapse=false, showTitle=true, title='CLUSTER STATE') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } },
- u.addStatPanel(
+ $.addRowSchema(collapse=false, showTitle=true, title='CLUSTER STATE') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } },
+ $.addStatPanel(
title='Ceph health status',
unit='none',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ x: 0, y: 1, w: 3, h: 3 },
colorMode='value',
interval='1m',
{ color: 'rgba(237, 129, 40, 0.89)', value: 1 },
{ color: 'rgba(245, 54, 54, 0.9)', value: 2 },
])
- .addTarget(u.addTargetSchema(
- expr='ceph_health_status{}',
+ .addTarget($.addTargetSchema(
+ expr='ceph_health_status{%(matchers)s}' % $.matchers(),
instant=true,
interval='$interval',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
step=300,
)),
- u.addGaugePanel(
+ $.addGaugePanel(
title='Available Capacity',
gridPosition={ h: 6, w: 3, x: 3, y: 1 },
unit='percentunit',
{ color: 'rgba(237, 129, 40, 0.89)', value: 0.1 },
{ color: 'rgba(50, 172, 45, 0.97)', value: 0.3 },
])
- .addTarget(u.addTargetSchema(
- expr='(ceph_cluster_total_bytes{}-ceph_cluster_total_used_bytes{})/ceph_cluster_total_bytes{}',
+ .addTarget($.addTargetSchema(
+ expr='(ceph_cluster_total_bytes{%(matchers)s}-ceph_cluster_total_used_bytes{%(matchers)s})/ceph_cluster_total_bytes{%(matchers)s}' % $.matchers(),
instant=true,
interval='$interval',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
step=300
)),
- u.addStatPanel(
+ $.addStatPanel(
title='Cluster Capacity',
unit='decbytes',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ x: 6, y: 1, w: 3, h: 3 },
graphMode='area',
decimals=2,
{ color: 'rgba(237, 129, 40, 0.89)', value: 0.025 },
{ color: 'rgba(245, 54, 54, 0.9)', value: 1.0 },
])
- .addTarget(u.addTargetSchema(
- expr='ceph_cluster_total_bytes{}',
+ .addTarget($.addTargetSchema(
+ expr='ceph_cluster_total_bytes{%(matchers)s}' % $.matchers(),
instant=true,
interval='$interval',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
step=300
)),
- u.addStatPanel(
+ $.addStatPanel(
title='Write Throughput',
unit='Bps',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ x: 9, y: 1, w: 3, h: 3 },
decimals=1,
color={ mode: 'thresholds' },
]).addThresholds([
{ color: 'green' },
])
- .addTarget(u.addTargetSchema(
- expr='sum(irate(ceph_osd_op_w_in_bytes{}[5m]))',
+ .addTarget($.addTargetSchema(
+ expr='sum(irate(ceph_osd_op_w_in_bytes{%(matchers)s}[5m]))' % $.matchers(),
instant=true,
interval='$interval',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
)),
- u.addStatPanel(
+ $.addStatPanel(
title='Read Throughput',
unit='Bps',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ x: 12, y: 1, w: 3, h: 3 },
decimals=1,
color={ mode: 'thresholds' },
{ color: 'rgba(237, 129, 40, 0.89)', value: 0 },
{ color: '#9ac48a', value: 0 },
])
- .addTarget(u.addTargetSchema(
- expr='sum(irate(ceph_osd_op_r_out_bytes{}[5m]))',
+ .addTarget($.addTargetSchema(
+ expr='sum(irate(ceph_osd_op_r_out_bytes{%(matchers)s}[5m]))' % $.matchers(),
instant=true,
interval='$interval',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
)),
- u.addStatPanel(
+ $.addStatPanel(
title='OSDs',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 3, w: 6, x: 15, y: 1 },
color={ mode: 'thresholds' },
thresholdsMode='absolute',
{ color: 'red', value: 80 },
])
.addTargets([
- u.addTargetSchema(
+ $.addTargetSchema(
aggregation='Last',
alias='All',
decimals=2,
displayValueWithAlias='When Alias Displayed',
units='none',
valueHandler='Number Threshold',
- expr='count(ceph_osd_metadata)',
+ expr='count(ceph_osd_metadata{%(matchers)s})' % $.matchers(),
legendFormat='All',
interval='$interval',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
),
- u.addTargetSchema(
+ $.addTargetSchema(
aggregation='Last',
alias='In',
decimals=2,
displayValueWithAlias='When Alias Displayed',
units='none',
valueHandler='Number Threshold',
- expr='count(ceph_osd_in)',
+ expr='count(ceph_osd_in{%(matchers)s})' % $.matchers(),
legendFormat='In',
interval='$interval',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
),
- u.addTargetSchema(
+ $.addTargetSchema(
aggregation='Last',
alias='Out',
decimals=2,
displayValueWithAlias='When Alias Displayed',
units='none',
valueHandler='Number Threshold',
- expr='sum(ceph_osd_in == bool 0)',
+ expr='sum(ceph_osd_in{%(matchers)s} == bool 0)' % $.matchers(),
legendFormat='Out',
interval='',
warn=1,
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
),
- u.addTargetSchema(
+ $.addTargetSchema(
aggregation='Last',
alias='Up',
decimals=2,
displayValueWithAlias='When Alias Displayed',
units='none',
valueHandler='Number Threshold',
- expr='sum(ceph_osd_up)',
+ expr='sum(ceph_osd_up{%(matchers)s})' % $.matchers(),
legendFormat='Up',
interval='',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
),
- u.addTargetSchema(
+ $.addTargetSchema(
aggregation='Last',
alias='Down',
decimals=2,
displayValueWithAlias='When Alias Displayed',
units='none',
valueHandler='Number Threshold',
- expr='sum(ceph_osd_up == bool 0)',
+ expr='sum(ceph_osd_up{%(matchers)s} == bool 0)' % $.matchers(),
legendFormat='Down',
interval='',
warn=1,
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
),
]),
- u.addStatPanel(
+ $.addStatPanel(
title='MGRs',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 6, w: 3, x: 21, y: 1 },
color={ mode: 'thresholds' },
thresholdsMode='absolute',
{ color: 'red', value: 80 },
])
.addTargets([
- u.addTargetSchema(
+ $.addTargetSchema(
aggregation='Last',
alias='Active',
decimals=2,
displayValueWithAlias='When Alias Displayed',
units='none',
valueHandler='Number Threshold',
- expr='count(ceph_mgr_status == 1) or vector(0)',
+ expr='count(ceph_mgr_status{%(matchers)s} == 1) or vector(0)' % $.matchers(),
legendFormat='Active',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
instant=true,
),
- u.addTargetSchema(
+ $.addTargetSchema(
aggregation='Last',
alias='Standby',
decimals=2,
displayValueWithAlias='When Alias Displayed',
units='none',
valueHandler='Number Threshold',
- expr='count(ceph_mgr_status == 0) or vector(0)',
+ expr='count(ceph_mgr_status{%(matchers)s} == 0) or vector(0)' % $.matchers(),
legendFormat='Standby',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
instant=true,
),
]),
- u.addStatPanel(
+ $.addStatPanel(
title='Firing Alerts',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 3, w: 3, x: 0, y: 4 },
color={ mode: 'thresholds' },
thresholdsMode='absolute',
] },
])
.addTargets([
- u.addTargetSchema(
+ $.addTargetSchema(
aggregation='Last',
alias='Active',
decimals=2,
displayValueWithAlias='When Alias Displayed',
units='none',
valueHandler='Number Threshold',
- expr='count(ALERTS{alertstate="firing",alertname=~"^Ceph.+", severity="critical"}) OR vector(0)',
+ expr='count(ALERTS{alertstate="firing",alertname=~"^Ceph.+", severity="critical", %(matchers)s}) OR vector(0)' % $.matchers(),
legendFormat='Critical',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
instant=true,
),
- u.addTargetSchema(
+ $.addTargetSchema(
aggregation='Last',
alias='Standby',
decimals=2,
displayValueWithAlias='When Alias Displayed',
units='none',
valueHandler='Number Threshold',
- expr='count(ALERTS{alertstate="firing",alertname=~"^Ceph.+", severity="warning"}) OR vector(0)',
+ expr='count(ALERTS{alertstate="firing",alertname=~"^Ceph.+", severity="warning", %(matchers)s}) OR vector(0)' % $.matchers(),
legendFormat='Warning',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
instant=true,
),
]),
- u.addStatPanel(
+ $.addStatPanel(
title='Used Capacity',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 3, w: 3, x: 6, y: 4 },
color={ mode: 'thresholds' },
thresholdsMode='absolute',
{ color: 'rgba(245, 54, 54, 0.9)', value: 0.1 },
])
.addTargets([
- u.addTargetSchema(
- expr='ceph_cluster_total_used_bytes{}',
+ $.addTargetSchema(
+ expr='ceph_cluster_total_used_bytes{%(matchers)s}' % $.matchers(),
legendFormat='',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
instant=true,
),
]),
- u.addStatPanel(
+ $.addStatPanel(
title='Write IOPS',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 3, w: 3, x: 9, y: 4 },
color={ mode: 'thresholds' },
thresholdsMode='absolute',
{ color: 'green', value: null },
])
.addTargets([
- u.addTargetSchema(
- expr='sum(irate(ceph_osd_op_w{}[1m]))',
+ $.addTargetSchema(
+ expr='sum(irate(ceph_osd_op_w{%(matchers)s}[1m]))' % $.matchers(),
legendFormat='',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
instant=true,
),
]),
- u.addStatPanel(
+ $.addStatPanel(
title='Read IOPS',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 3, w: 3, x: 12, y: 4 },
color={ mode: 'thresholds' },
thresholdsMode='absolute',
{ color: '#9ac48a', value: 0 },
])
.addTargets([
- u.addTargetSchema(
- expr='sum(irate(ceph_osd_op_r{}[1m]))',
+ $.addTargetSchema(
+ expr='sum(irate(ceph_osd_op_r{%(matchers)s}[1m]))' % $.matchers(),
legendFormat='',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
instant=true,
),
]),
- u.addStatPanel(
+ $.addStatPanel(
title='Monitors',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 3, w: 6, x: 15, y: 4 },
color={ mode: 'thresholds' },
thresholdsMode='absolute',
{ color: 'red', value: 80 },
])
.addTargets([
- u.addTargetSchema(
+ $.addTargetSchema(
aggregation='Last',
alias='In Quorum',
decimals=2,
displayValueWithAlias='When Alias Displayed',
units='none',
valueHandler='Text Only',
- expr='sum(ceph_mon_quorum_status)',
+ expr='sum(ceph_mon_quorum_status{%(matchers)s})' % $.matchers(),
legendFormat='In Quorum',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
),
- u.addTargetSchema(
+ $.addTargetSchema(
aggregation='Last',
alias='Total',
crit=1,
displayValueWithAlias='When Alias Displayed',
units='none',
valueHandler='Text Only',
- expr='count(ceph_mon_quorum_status)',
+ expr='count(ceph_mon_quorum_status{%(matchers)s})' % $.matchers(),
legendFormat='Total',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
warn=2,
),
- u.addTargetSchema(
+ $.addTargetSchema(
aggregation='Last',
alias='MONs out of Quorum',
crit=1.6,
displayValueWithAlias='Never',
units='none',
valueHandler='Number Threshold',
- expr='count(ceph_mon_quorum_status) - sum(ceph_mon_quorum_status)',
+ expr='count(ceph_mon_quorum_status{%(matchers)s}) - sum(ceph_mon_quorum_status{%(matchers)s})' % $.matchers(),
legendFormat='MONs out of Quorum',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
warn=1.1,
range=true,
),
]),
- u.addRowSchema(collapse=false, showTitle=true, title='CLUSTER STATS') + { gridPos: { x: 0, y: 7, w: 24, h: 1 } },
- u.addAlertListPanel(
+ $.addRowSchema(collapse=false, showTitle=true, title='CLUSTER STATS') + { gridPos: { x: 0, y: 7, w: 24, h: 1 } },
+ $.addAlertListPanel(
title='Alerts',
datasource={
type: 'datasource',
uid: 'grafana',
},
gridPosition={ h: 8, w: 8, x: 0, y: 8 },
- alertInstanceLabelFilter='{alertname=~"^Ceph.+"}',
+ alertInstanceLabelFilter='{alertname=~"^Ceph.+", %(matchers)s}' % $.matchers(),
alertName='',
dashboardAlerts=false,
groupBy=[],
},
),
- u.timeSeriesPanel(
+ $.timeSeriesPanel(
title='Capacity',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 8, w: 8, x: 8, y: 8 },
fillOpacity=40,
pointSize=5,
)
.addTargets(
[
- u.addTargetSchema(
- expr='ceph_cluster_total_bytes{}',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='ceph_cluster_total_bytes{%(matchers)s}' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
instant=false,
legendFormat='Total Capacity',
step=300,
range=true,
),
- u.addTargetSchema(
- expr='ceph_cluster_total_used_bytes{}',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='ceph_cluster_total_used_bytes{%(matchers)s}' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
instant=false,
legendFormat='Used',
]
),
- u.timeSeriesPanel(
+ $.timeSeriesPanel(
title='Cluster Throughput',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 8, w: 8, x: 16, y: 8 },
fillOpacity=10,
pointSize=5,
])
.addTargets(
[
- u.addTargetSchema(
- expr='sum(irate(ceph_osd_op_w_in_bytes{}[5m]))',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(irate(ceph_osd_op_w_in_bytes{%(matchers)s}[5m]))' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Write',
step=300,
range=true,
),
- u.addTargetSchema(
- expr='sum(irate(ceph_osd_op_r_out_bytes{}[5m]))',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(irate(ceph_osd_op_r_out_bytes{%(matchers)s}[5m]))' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Read',
step=300,
]
),
- u.timeSeriesPanel(
+ $.timeSeriesPanel(
title='IOPS',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 8, w: 8, x: 0, y: 16 },
fillOpacity=10,
pointSize=5,
])
.addTargets(
[
- u.addTargetSchema(
- expr='sum(irate(ceph_osd_op_w{}[1m]))',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(irate(ceph_osd_op_w{%(matchers)s}[1m]))' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Write',
step=300,
range=true,
),
- u.addTargetSchema(
- expr='sum(irate(ceph_osd_op_r{}[1m]))',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(irate(ceph_osd_op_r{%(matchers)s}[1m]))' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Read',
step=300,
]
),
- u.timeSeriesPanel(
+ $.timeSeriesPanel(
title='Pool Used Bytes',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 8, w: 8, x: 8, y: 16 },
fillOpacity=10,
pointSize=5,
])
.addTargets(
[
- u.addTargetSchema(
- expr='(ceph_pool_bytes_used{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='(ceph_pool_bytes_used{%(matchers)s}) *on (pool_id) group_left(name)(ceph_pool_metadata{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='{{name}}',
step=300,
]
),
- u.timeSeriesPanel(
+ $.timeSeriesPanel(
title='Pool Used RAW Bytes',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 8, w: 8, x: 16, y: 16 },
fillOpacity=10,
pointSize=5,
)
.addTargets(
[
- u.addTargetSchema(
- expr='(ceph_pool_stored_raw{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='(ceph_pool_stored_raw{%(matchers)s}) *on (pool_id) group_left(name)(ceph_pool_metadata{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='',
legendFormat='{{name}}',
step=300,
]
),
- u.timeSeriesPanel(
+ $.timeSeriesPanel(
title='Pool Objects Quota',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 7, w: 8, x: 0, y: 24 },
fillOpacity=10,
pointSize=5,
])
.addTargets(
[
- u.addTargetSchema(
- expr='(ceph_pool_quota_objects{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='(ceph_pool_quota_objects{%(matchers)s}) *on (pool_id) group_left(name)(ceph_pool_metadata{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='',
legendFormat='{{name}}',
step=300,
]
),
- u.timeSeriesPanel(
+ $.timeSeriesPanel(
title='Pool Quota Bytes',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 7, w: 8, x: 8, y: 24 },
fillOpacity=10,
pointSize=5,
])
.addTargets(
[
- u.addTargetSchema(
- expr='(ceph_pool_quota_bytes{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='(ceph_pool_quota_bytes{%(matchers)s}) *on (pool_id) group_left(name)(ceph_pool_metadata{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='',
legendFormat='{{name}}',
step=300,
]
),
- u.timeSeriesPanel(
+ $.timeSeriesPanel(
title='Objects Per Pool',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 7, w: 8, x: 16, y: 24 },
fillOpacity=10,
pointSize=5,
])
.addTargets(
[
- u.addTargetSchema(
- expr='(ceph_pool_objects{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='(ceph_pool_objects{%(matchers)s}) * on (pool_id) group_left(name)(ceph_pool_metadata{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='',
legendFormat='{{name}}',
),
]
),
- u.addRowSchema(collapse=false, showTitle=true, title='OBJECTS') + { gridPos: { x: 0, y: 31, w: 24, h: 1 } },
+ $.addRowSchema(collapse=false, showTitle=true, title='OBJECTS') + { gridPos: { x: 0, y: 31, w: 24, h: 1 } },
- u.timeSeriesPanel(
+ $.timeSeriesPanel(
title='OSD Type Count',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 12, w: 6, x: 0, y: 32 },
fillOpacity=10,
pointSize=5,
)
.addTargets(
[
- u.addTargetSchema(
- expr='sum(ceph_pool_objects)',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pool_objects{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Total',
range=true,
]
),
- u.timeSeriesPanel(
+ $.timeSeriesPanel(
title='PGs State',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 12, w: 8, x: 6, y: 32 },
fillOpacity=10,
pointSize=5,
)
.addTargets(
[
- u.addTargetSchema(
- expr='sum(ceph_pg_active{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_active{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Active',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_clean{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_clean{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Clean',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_peering{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_peering{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Peering',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_degraded{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_degraded{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Degraded',
range=true,
step=300,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_stale{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_stale{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Stale',
range=true,
step=300,
),
- u.addTargetSchema(
- expr='sum(ceph_unclean_pgs{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_unclean_pgs{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Unclean',
range=true,
step=300,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_undersized{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_undersized{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Undersized',
range=true,
step=300,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_incomplete{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_incomplete{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Incomplete',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_forced_backfill{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_forced_backfill{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Forced Backfill',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_forced_recovery{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_forced_recovery{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Forced Recovery',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_creating{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_creating{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Creating',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_wait_backfill{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_wait_backfill{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Wait Backfill',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_deep{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_deep{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Deep',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_scrubbing{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_scrubbing{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Scrubbing',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_recovering{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_recovering{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Recovering',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_repair{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_repair{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Repair',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_down{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_down{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Down',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_peered{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_peered{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Peered',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_backfill{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_backfill{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Backfill',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_remapped{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_remapped{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Remapped',
range=true,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_backfill_toofull{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_backfill_toofull{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Backfill Toofull',
range=true,
]
),
- u.timeSeriesPanel(
+ $.timeSeriesPanel(
title='Stuck PGs',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 6, w: 10, x: 14, y: 32 },
fillOpacity=10,
pointSize=5,
]
)
.addTargets([
- u.addTargetSchema(
- expr='sum(ceph_pg_degraded{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_degraded{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Degraded',
range=true,
step=300,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_stale{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_stale{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Stale',
range=true,
step=300,
),
- u.addTargetSchema(
- expr='sum(ceph_pg_undersized{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(ceph_pg_undersized{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='Undersized',
range=true,
),
]),
- u.timeSeriesPanel(
+ $.timeSeriesPanel(
title='Recovery Operations',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 6, w: 10, x: 14, y: 38 },
fillOpacity=10,
pointSize=5,
{ color: 'red', value: 80 },
])
.addTargets([
- u.addTargetSchema(
- expr='sum(irate(ceph_osd_recovery_ops{}[$interval]))',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='sum(irate(ceph_osd_recovery_ops{%(matchers)s}[$interval]))' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='OPS',
step=300,
),
]),
- u.addRowSchema(false, true, 'LATENCY', collapsed=true)
+ $.addRowSchema(false, true, 'LATENCY', collapsed=true)
.addPanels([
- u.heatMapPanel(
+ $.heatMapPanel(
title='OSD Apply Latency Distribution',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 8, w: 12, x: 0, y: 42 },
colorMode='opacity',
legendShow=true,
yAxisMin='0',
yBucketSize=10,
pluginVersion='9.4.7',
- ).addTarget(u.addTargetSchema(
- expr='ceph_osd_apply_latency_ms{}',
- datasource='${DS_PROMETHEUS}',
+ ).addTarget($.addTargetSchema(
+ expr='ceph_osd_apply_latency_ms{%(matchers)s}' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
instant=false,
)),
- u.heatMapPanel(
+ $.heatMapPanel(
title='OSD Commit Latency Distribution',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 8, w: 12, x: 12, y: 42 },
colorMode='opacity',
legendShow=true,
yAxisMin='0',
yBucketSize=10,
pluginVersion='9.4.7',
- ).addTarget(u.addTargetSchema(
- expr='ceph_osd_commit_latency_ms{}',
- datasource='${DS_PROMETHEUS}',
+ ).addTarget($.addTargetSchema(
+ expr='ceph_osd_commit_latency_ms{%(matchers)s}' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
instant=false,
)),
- u.heatMapPanel(
+ $.heatMapPanel(
title='OSD Read Op Latency Distribution',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 8, w: 12, x: 0, y: 50 },
colorMode='opacity',
legendShow=true,
yAxisMin='0',
yBucketSize=null,
pluginVersion='9.4.7',
- ).addTarget(u.addTargetSchema(
- expr='rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0',
- datasource='${DS_PROMETHEUS}',
+ ).addTarget($.addTargetSchema(
+ expr='rate(ceph_osd_op_r_latency_sum{%(matchers)s}[5m]) / rate(ceph_osd_op_r_latency_count{%(matchers)s}[5m]) >= 0' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
instant=false,
)),
- u.heatMapPanel(
+ $.heatMapPanel(
title='OSD Write Op Latency Distribution',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 8, w: 12, x: 12, y: 50 },
colorMode='opacity',
legendShow=true,
yAxisMin='0',
yBucketSize=null,
pluginVersion='9.4.7',
- ).addTarget(u.addTargetSchema(
- expr='rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0',
- datasource='${DS_PROMETHEUS}',
+ ).addTarget($.addTargetSchema(
+ expr='rate(ceph_osd_op_w_latency_sum{%(matchers)s}[5m]) / rate(ceph_osd_op_w_latency_count{%(matchers)s}[5m]) >= 0' % $.matchers(),
+ datasource='$datasource',
interval='$interval',
legendFormat='',
instant=false,
)),
- u.timeSeriesPanel(
+ $.timeSeriesPanel(
title='Recovery Operations',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 7, w: 12, x: 0, y: 58 },
fillOpacity=10,
pointSize=5,
{ color: 'red', value: 80 },
])
.addTargets([
- u.addTargetSchema(
- expr='avg(rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0)',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='avg(rate(ceph_osd_op_r_latency_sum{%(matchers)s}[5m]) / rate(ceph_osd_op_r_latency_count{%(matchers)s}[5m]) >= 0)' % $.matchers(),
+ datasource='$datasource',
legendFormat='Read',
),
- u.addTargetSchema(
- expr='avg(rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0)',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='avg(rate(ceph_osd_op_w_latency_sum{%(matchers)s}[5m]) / rate(ceph_osd_op_w_latency_count{%(matchers)s}[5m]) >= 0)' % $.matchers(),
+ datasource='$datasource',
legendFormat='Write',
),
]),
- u.timeSeriesPanel(
+ $.timeSeriesPanel(
title='AVG OSD Apply + Commit Latency',
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={ h: 7, w: 12, x: 12, y: 58 },
fillOpacity=10,
pointSize=5,
{ color: 'red', value: 80 },
])
.addTargets([
- u.addTargetSchema(
- expr='avg(ceph_osd_apply_latency_ms{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='avg(ceph_osd_apply_latency_ms{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
legendFormat='apply',
interval='$interval',
metric='ceph_osd_perf_apply_latency_seconds',
step=4,
),
- u.addTargetSchema(
- expr='avg(ceph_osd_commit_latency_ms{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='avg(ceph_osd_commit_latency_ms{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
legendFormat='commit',
interval='$interval',
metric='ceph_osd_perf_commit_latency_seconds',
]),
])
+ { gridPos: { x: 0, y: 44, w: 24, h: 1 } },
- u.addRowSchema(collapse=true, showTitle=true, title='', collapsed=false) + { gridPos: { x: 0, y: 45, w: 24, h: 1 } },
+ $.addRowSchema(collapse=true, showTitle=true, title='', collapsed=false) + { gridPos: { x: 0, y: 45, w: 24, h: 1 } },
- u.addTableExtended(
- datasource='${DS_PROMETHEUS}',
+ $.addTableExtended(
+ datasource='$datasource',
title='Ceph Versions',
gridPosition={ h: 6, w: 24, x: 0, y: 46 },
options={
},
},
]).addTargets([
- u.addTargetSchema(
- expr='count by (ceph_version)(ceph_osd_metadata{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='count by (ceph_version)(ceph_osd_metadata{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
format='table',
hide=false,
exemplar=false,
legendFormat='OSD Services',
range=false,
),
- u.addTargetSchema(
- expr='count by (ceph_version)(ceph_mon_metadata{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='count by (ceph_version)(ceph_mon_metadata{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
format='table',
hide=false,
exemplar=false,
legendFormat='Mon Services',
range=false,
),
- u.addTargetSchema(
- expr='count by (ceph_version)(ceph_mds_metadata{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='count by (ceph_version)(ceph_mds_metadata{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
format='table',
hide=false,
exemplar=false,
legendFormat='MDS Services',
range=false,
),
- u.addTargetSchema(
- expr='count by (ceph_version)(ceph_rgw_metadata{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='count by (ceph_version)(ceph_rgw_metadata{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
format='table',
hide=false,
exemplar=false,
legendFormat='RGW Services',
range=false,
),
- u.addTargetSchema(
- expr='count by (ceph_version)(ceph_mgr_metadata{})',
- datasource='${DS_PROMETHEUS}',
+ $.addTargetSchema(
+ expr='count by (ceph_version)(ceph_mgr_metadata{%(matchers)s})' % $.matchers(),
+ datasource='$datasource',
format='table',
hide=false,
exemplar=false,
.addTemplate(
$.addClusterTemplate()
)
- .addTemplate(
- $.addJobTemplate()
- )
.addTemplate(
$.addTemplateSchema('mds_servers',
'$datasource',
'none',
'Reads(-) / Writes (+)',
0,
- 'sum(rate(ceph_objecter_op_r{%(matchers)s, ceph_daemon=~"($mds_servers).*"}[$__rate_interval]))' % $.matchers(),
+ 'sum(rate(ceph_objecter_op_r{ceph_daemon=~"($mds_servers).*", %(matchers)s}[$__rate_interval]))' % $.matchers(),
'Read Ops',
0,
1,
9
)
.addTarget($.addTargetSchema(
- 'sum(rate(ceph_objecter_op_w{%(matchers)s, ceph_daemon=~"($mds_servers).*"}[$__rate_interval]))' % $.matchers(),
+ 'sum(rate(ceph_objecter_op_w{ceph_daemon=~"($mds_servers).*", %(matchers)s}[$__rate_interval]))' % $.matchers(),
'Write Ops'
))
.addSeriesOverride(
'none',
'Client Requests',
0,
- 'ceph_mds_server_handle_client_request{%(matchers)s, ceph_daemon=~"($mds_servers).*"}' % $.matchers(),
+ 'ceph_mds_server_handle_client_request{ceph_daemon=~"($mds_servers).*", %(matchers)s}' % $.matchers(),
'{{ceph_daemon}}',
12,
1,
.addTemplate(
$.addClusterTemplate()
)
- .addTemplate(
- $.addJobTemplate()
- )
.addTemplate(
$.addTemplateSchema('osd_hosts',
'$datasource',
- 'label_values(ceph_disk_occupation{%(matchers)s}, exported_instance)' % $.matchers(),
+ 'label_values(ceph_osd_metadata{%(matchers)s}, hostname)' % $.matchers(),
1,
true,
1,
.addTemplate(
$.addTemplateSchema('mon_hosts',
'$datasource',
- 'label_values(ceph_mon_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(),
+ 'label_values(ceph_mon_metadata{%(matchers)s}, hostname)' % $.matchers(),
1,
true,
1,
.addTemplate(
$.addTemplateSchema('mds_hosts',
'$datasource',
- 'label_values(ceph_mds_inodes{%(matchers)s}, ceph_daemon)' % $.matchers(),
+ 'label_values(ceph_mds_inodes{hostname, %(matchers)s})' % $.matchers(),
1,
true,
1,
.addTemplate(
$.addTemplateSchema('rgw_hosts',
'$datasource',
- 'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(),
+ 'label_values(ceph_rgw_metadata{hostname, %(matchers)s})' % $.matchers(),
1,
true,
1,
"instance", "$1", "instance", "([^.:]*).*"
) * on(instance, device) group_left(ceph_daemon) label_replace(
label_replace(
- ceph_disk_occupation_human{%(matchers)s, instance=~"($osd_hosts).*"},
+ ceph_disk_occupation_human{instance=~"($osd_hosts).*", %(matchers)s},
"device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^.:]*).*"
)
|||
sum (
(
- rate(node_network_receive_bytes{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval]) or
- rate(node_network_receive_bytes_total{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval])
+ rate(node_network_receive_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or
+ rate(node_network_receive_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval])
) unless on (device, instance)
- label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)")
+ label_replace((node_bonding_slaves > 0), "device", "$1", "master", "(.+)")
) +
sum (
(
- rate(node_network_transmit_bytes{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval]) or
- rate(node_network_transmit_bytes_total{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval])
+ rate(node_network_transmit_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or
+ rate(node_network_transmit_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval])
) unless on (device, instance)
- label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)")
+ label_replace((node_bonding_slaves > 0), "device", "$1", "master", "(.+)")
)
|||,
true,
rate(node_network_transmit_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or
rate(node_network_transmit_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval])
) unless on (device, instance)
- label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)"))
+ label_replace((node_bonding_slaves > 0), "device", "$1", "master", "(.+)"))
))
|||,
'{{instance}}',
.addTemplate(
$.addClusterTemplate()
)
- .addTemplate(
- $.addJobTemplate()
- )
.addTemplate(
$.addTemplateSchema('ceph_hosts',
'$datasource',
- if $._config.showMultiCluster then ('label_values({%(clusterMatcher)s}, instance)' % $.matchers()) else 'label_values(instance)',
+ 'label_values({__name__=~"ceph_.+_metadata", %(matchers)s}, hostname)' % $.matchers(),
1,
- false,
- 3,
- 'Hostname',
- '([^.:]*).*')
+ true,
+ 1,
+ null,
+ '([^.]*).*')
)
.addPanels([
$.addRowSchema(false, true, '$ceph_hosts System Overview') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } },
'OSDs',
'',
'current',
- "count(sum by (ceph_daemon) (ceph_osd_metadata{%(matchers)s, hostname='$ceph_hosts'}))" % $.matchers(),
+ 'count(sum by (ceph_daemon) (ceph_osd_metadata{%(matchers)s}))' % $.matchers(),
null,
'time_series',
0,
|||
sum(
ceph_osd_stat_bytes{%(matchers)s} and
- on (ceph_daemon) ceph_disk_occupation{%(matchers)s, instance=~"($ceph_hosts)([\\\\.:].*)?"}
+ on (ceph_daemon) ceph_disk_occupation{instance=~"($ceph_hosts)([\\\\.:].*)?", %(matchers)s}
)
||| % $.matchers(),
null,
rate(node_disk_io_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) * 100
), "instance", "$1", "instance", "([^:.]*).*"
) * on(instance, device) group_left(ceph_daemon) label_replace(
- label_replace(ceph_disk_occupation_human{%(matchers)s, instance=~"($ceph_hosts)([\\\\.:].*)?"},
+ label_replace(ceph_disk_occupation_human{instance=~"($ceph_hosts)([\\\\.:].*)?", %(matchers)s},
"device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*"
)
||| % $.matchers(),
$.addTargetSchema(
|||
topk(10,
- (sum by (instance)(ceph_daemon_health_metrics{type="SLOW_OPS", ceph_daemon=~"osd.*"}))
+ (sum by (instance)(ceph_daemon_health_metrics{type="SLOW_OPS", ceph_daemon=~"osd.*", %(matchers)s}))
)
||| % $.matchers(),
'',
.addTemplate(
$.addClusterTemplate()
)
- .addTemplate(
- $.addJobTemplate()
- )
.addPanels([
$.simpleGraphPanel(
{ '@95%ile': '#e0752d' },
'count(ceph_bluefs_wal_total_bytes{%(matchers)s})' % $.matchers(), 'bluestore', 'time_series', 2
))
.addTarget($.addTargetSchema(
- 'absent(ceph_bluefs_wal_total_bytes{job=~"$job"}) * count(ceph_osd_metadata{job=~"$job"})' % $.matchers(), 'filestore', 'time_series', 2
+ 'absent(ceph_bluefs_wal_total_bytes{%(matchers)s}) * count(ceph_osd_metadata{%(matchers)s})' % $.matchers(), 'filestore', 'time_series', 2
)),
$.pieChartPanel('OSD Size Summary', 'The pie chart shows the various OSD sizes used within the cluster', '$datasource', { x: 8, y: 8, w: 4, h: 8 }, 'table', 'bottom', true, ['percent'], { mode: 'single', sort: 'none' }, 'pie', ['percent', 'value'], 'palette-classic')
.addTarget($.addTargetSchema(
.addTemplate(
$.addClusterTemplate()
)
- .addTemplate(
- $.addJobTemplate()
- )
.addTemplate(
$.addTemplateSchema('osd',
'$datasource',
's',
'Read (-) / Write (+)',
|||
- rate(ceph_osd_op_r_latency_sum{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval]) /
+ rate(ceph_osd_op_r_latency_sum{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval]) /
on (ceph_daemon) rate(ceph_osd_op_r_latency_count{%(matchers)s}[$__rate_interval])
||| % $.matchers(),
|||
- rate(ceph_osd_op_w_latency_sum{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval]) /
+ rate(ceph_osd_op_w_latency_sum{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval]) /
on (ceph_daemon) rate(ceph_osd_op_w_latency_count{%(matchers)s}[$__rate_interval])
||| % $.matchers(),
'read',
'',
'short',
'Read (-) / Write (+)',
- 'rate(ceph_osd_op_r{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(),
- 'rate(ceph_osd_op_w{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(),
+ 'rate(ceph_osd_op_r{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval])' % $.matchers(),
+ 'rate(ceph_osd_op_w{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval])' % $.matchers(),
'Reads',
'Writes',
6,
'',
'bytes',
'Read (-) / Write (+)',
- 'rate(ceph_osd_op_r_out_bytes{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(),
- 'rate(ceph_osd_op_w_in_bytes{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % $.matchers(),
+ 'rate(ceph_osd_op_r_out_bytes{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval])' % $.matchers(),
+ 'rate(ceph_osd_op_w_in_bytes{ceph_daemon=~"$osd", %(matchers)s}[$__rate_interval])' % $.matchers(),
'Read Bytes',
'Write Bytes',
12,
|||
(
label_replace(
- rate(node_disk_read_time_seconds_total{%(clusterMatcher)s}[$__rate_interval]) /
- rate(node_disk_reads_completed_total{%(clusterMatcher)s}[$__rate_interval]),
+ rate(node_disk_read_time_seconds_total[$__rate_interval]) /
+ rate(node_disk_reads_completed_total[$__rate_interval]),
"instance", "$1", "instance", "([^:.]*).*"
) and on (instance, device) label_replace(
label_replace(
- ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
+ ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s},
"device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
|||
(
label_replace(
- rate(node_disk_write_time_seconds_total{%(clusterMatcher)s}[$__rate_interval]) /
- rate(node_disk_writes_completed_total{%(clusterMatcher)s}[$__rate_interval]),
+ rate(node_disk_write_time_seconds_total[$__rate_interval]) /
+ rate(node_disk_writes_completed_total[$__rate_interval]),
"instance", "$1", "instance", "([^:.]*).*") and on (instance, device)
label_replace(
label_replace(
- ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"
+ ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s}, "device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
)
'Read (-) / Write (+)',
|||
label_replace(
- rate(node_disk_writes_completed_total{%(clusterMatcher)s}[$__rate_interval]),
+ rate(node_disk_writes_completed_total[$__rate_interval]),
"instance", "$1", "instance", "([^:.]*).*"
) and on (instance, device) label_replace(
label_replace(
- ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
+ ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s},
"device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
||| % $.matchers(),
|||
label_replace(
- rate(node_disk_reads_completed_total{%(clusterMatcher)s}[$__rate_interval]),
+ rate(node_disk_reads_completed_total[$__rate_interval]),
"instance", "$1", "instance", "([^:.]*).*"
) and on (instance, device) label_replace(
label_replace(
- ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
+ ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s},
"device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
'Read (-) / Write (+)',
|||
label_replace(
- rate(node_disk_read_bytes_total{%(clusterMatcher)s}[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*"
+ rate(node_disk_read_bytes_total[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*"
) and on (instance, device) label_replace(
label_replace(
- ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
+ ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s},
"device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
||| % $.matchers(),
|||
label_replace(
- rate(node_disk_written_bytes_total{%(clusterMatcher)s}[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*"
+ rate(node_disk_written_bytes_total[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*"
) and on (instance, device) label_replace(
label_replace(
- ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
+ ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s},
"device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
.addTarget($.addTargetSchema(
|||
label_replace(
- rate(node_disk_io_time_seconds_total{%(clusterMatcher)s}[$__rate_interval]),
+ rate(node_disk_io_time_seconds_total[$__rate_interval]),
"instance", "$1", "instance", "([^:.]*).*"
) and on (instance, device) label_replace(
label_replace(
- ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"
+ ceph_disk_occupation_human{ceph_daemon=~"$osd", %(matchers)s}, "device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
||| % $.matchers(),
.addTemplate(
$.addClusterTemplate()
)
- .addTemplate(
- $.addJobTemplate()
- )
.addTemplate(
g.template.custom(label='TopK',
name='topk',
'Pools with Compression',
'Count of the pools that have compression enabled',
'current',
- 'count(ceph_pool_metadata{%(matchers)s, compression_mode!="none"})' % $.matchers(),
+ 'count(ceph_pool_metadata{compression_mode!="none", %(matchers)s})' % $.matchers(),
null,
'',
3,
true
),
$.addTargetSchema(
- 'ceph_pool_metadata{%(matchers)s, compression_mode!="none"}' % $.matchers(), 'K', 'table', 1, true
+ 'ceph_pool_metadata{compression_mode!="none", %(matchers)s}' % $.matchers(), 'K', 'table', 1, true
),
$.addTargetSchema('', 'L', '', '', null),
]
.addTemplate(
$.addClusterTemplate()
)
- .addTemplate(
- $.addJobTemplate()
- )
.addTemplate(
$.addTemplateSchema('pool_name',
'$datasource',
'.7,.8',
|||
(ceph_pool_stored{%(matchers)s} / (ceph_pool_stored{%(matchers)s} + ceph_pool_max_avail{%(matchers)s})) *
- on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s}
||| % $.matchers(),
'time_series',
0,
'current',
|||
(ceph_pool_max_avail{%(matchers)s} / deriv(ceph_pool_stored{%(matchers)s}[6h])) *
- on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} > 0
+ on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s} > 0
||| % $.matchers(),
'time_series',
7,
null,
|||
deriv(ceph_pool_objects{%(matchers)s}[1m]) *
- on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s}
||| % $.matchers(),
'Objects per second',
12,
null,
|||
rate(ceph_pool_rd{%(matchers)s}[$__rate_interval]) *
- on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s}
||| % $.matchers(),
'reads',
0,
$.addTargetSchema(
|||
rate(ceph_pool_wr{%(matchers)s}[$__rate_interval]) *
- on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s}
||| % $.matchers(),
'writes'
)
null,
|||
rate(ceph_pool_rd_bytes{%(matchers)s}[$__rate_interval]) +
- on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s}
||| % $.matchers(),
'reads',
12,
$.addTargetSchema(
|||
rate(ceph_pool_wr_bytes{%(matchers)s}[$__rate_interval]) +
- on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s}
||| % $.matchers(),
'writes'
)
null,
|||
ceph_pool_objects{%(matchers)s} *
- on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name", %(matchers)s}
||| % $.matchers(),
'Number of Objects',
0,
local g = import 'grafonnet/grafana.libsonnet';
-local u = import 'utils.libsonnet';
local info_rbd_stats = std.join(
'',
.addTemplate(
$.addClusterTemplate()
)
- .addTemplate(
- $.addJobTemplate()
- )
.addTemplate(
$.addTemplateSchema('pool',
'$datasource',
- 'label_values(pool)',
+ 'label_values(ceph_rbd_read_ops{%(matchers)s}, pool)' % $.matchers(),
1,
false,
0,
'',
'')
)
+
.addTemplate(
$.addTemplateSchema('image',
'$datasource',
- 'label_values(image)',
+ 'label_values(ceph_rbd_read_ops{%(matchers)s, pool="$pool"}, image)' % $.matchers(),
1,
false,
0,
'IOPS',
info_rbd_stats,
'iops',
- 'rate(ceph_rbd_write_ops{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers()
+ 'rate(ceph_rbd_write_ops{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])' % $.matchers()
,
- 'rate(ceph_rbd_read_ops{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers(),
+ 'rate(ceph_rbd_read_ops{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])' % $.matchers(),
0,
0,
8,
'Throughput',
info_rbd_stats,
'Bps',
- 'rate(ceph_rbd_write_bytes{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers(),
- 'rate(ceph_rbd_read_bytes{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % $.matchers(),
+ 'rate(ceph_rbd_write_bytes{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])' % $.matchers(),
+ 'rate(ceph_rbd_read_bytes{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])' % $.matchers(),
8,
0,
8,
info_rbd_stats,
'ns',
|||
- rate(ceph_rbd_write_latency_sum{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval]) /
- rate(ceph_rbd_write_latency_count{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])
+ rate(ceph_rbd_write_latency_sum{pool="$pool", image="$image", %(matchers)s}[$__rate_interval]) /
+ rate(ceph_rbd_write_latency_count{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])
||| % $.matchers(),
|||
- rate(ceph_rbd_read_latency_sum{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval]) /
- rate(ceph_rbd_read_latency_count{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])
+ rate(ceph_rbd_read_latency_sum{pool="$pool", image="$image", %(matchers)s}[$__rate_interval]) /
+ rate(ceph_rbd_read_latency_count{pool="$pool", image="$image", %(matchers)s}[$__rate_interval])
||| % $.matchers(),
16,
0,
.addTemplate(
$.addClusterTemplate()
)
- .addTemplate(
- $.addJobTemplate()
- )
.addPanels([
RbdOverviewPanel(
'IOPS',
g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
)
+ .addTemplate(
+ $.addClusterTemplate()
+ )
+
.addTemplate(
$.addTemplateSchema('rgw_servers',
'$datasource',
- 'label_values(ceph_rgw_metadata{}, ceph_daemon)',
+ 'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % $.matchers(),
2,
true,
0,
])
.addTargets([
$.addTargetSchema(
- expr='sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='__auto',
range=true
])
.addTargets([
$.addTargetSchema(
- expr='sum\n(ceph_rgw_op_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum\n(ceph_rgw_op_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='__auto',
range=true
])
.addTargets([
$.addTargetSchema(
- expr='sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='__auto',
range=true
])
.addTargets([
$.addTargetSchema(
- expr='sum\n((sum by(instance_id)(ceph_rgw_op_put_obj_bytes) > 0) / (sum by(instance_id)(ceph_rgw_op_put_obj_ops) > 0) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum\n((sum by(instance_id)(ceph_rgw_op_put_obj_bytes) > 0) / (sum by(instance_id)(ceph_rgw_op_put_obj_ops) > 0) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='__auto',
range=true
)
.addTargets([
$.addTargetSchema(
- expr='sum(ceph_rgw_op_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='List Objects',
range=false,
instant=true
),
$.addTargetSchema(
- expr='sum(ceph_rgw_op_list_buckets_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_list_buckets_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='List Buckets',
range=true
),
$.addTargetSchema(
- expr='sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='Put Objects',
range=false,
instant=true
),
$.addTargetSchema(
- expr='sum(ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='Get Objects',
range=false,
instant=true
),
$.addTargetSchema(
- expr='sum(ceph_rgw_op_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='Delete Objects',
range=false,
instant=true
),
$.addTargetSchema(
- expr='sum(ceph_rgw_op_del_bucket_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_del_bucket_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='Delete Buckets',
range=false,
instant=true
),
$.addTargetSchema(
- expr='sum(ceph_rgw_op_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='Copy Objects',
range=true
)
.addTargets([
$.addTargetSchema(
- expr='sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='Put Objects',
range=false,
instant=true
),
$.addTargetSchema(
- expr='sum(ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='Get Objects',
range=false,
instant=true
),
$.addTargetSchema(
- expr='sum(ceph_rgw_op_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='Delete Objects',
range=false,
instant=true
),
$.addTargetSchema(
- expr='sum(ceph_rgw_op_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='Copy Objects',
range=true
)
.addTargets([
$.addTargetSchema(
- expr='sum(ceph_rgw_op_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='List Object',
range=false,
instant=true
),
$.addTargetSchema(
- expr='sum(ceph_rgw_op_list_buckets_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_list_buckets_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='List Bucket',
range=true
),
$.addTargetSchema(
- expr='sum(ceph_rgw_op_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='Put Object',
range=false,
instant=true
),
$.addTargetSchema(
- expr='sum(ceph_rgw_op_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='Get Object',
range=false,
instant=true
),
$.addTargetSchema(
- expr='sum(ceph_rgw_op_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='Delete Object',
range=false,
instant=true
),
$.addTargetSchema(
- expr='sum(ceph_rgw_op_del_bucket_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_del_bucket_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='Delete Bucket',
range=false,
instant=true
),
$.addTargetSchema(
- expr='sum(ceph_rgw_op_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum(ceph_rgw_op_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
legendFormat='Copy Object',
range=true
},
]).addTargets([
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
},
]).addTargets([
$.addTargetSchema(
- expr='ceph_rgw_op_per_bucket_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
+ expr='ceph_rgw_op_per_bucket_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='ceph_rgw_op_per_bucket_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
+ expr='ceph_rgw_op_per_bucket_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='ceph_rgw_op_per_bucket_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
+ expr='ceph_rgw_op_per_bucket_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='ceph_rgw_op_per_bucket_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
+ expr='ceph_rgw_op_per_bucket_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='ceph_rgw_op_per_bucket_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
+ expr='ceph_rgw_op_per_bucket_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
},
]).addTargets([
$.addTargetSchema(
- expr='ceph_rgw_op_per_user_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
+ expr='ceph_rgw_op_per_user_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='ceph_rgw_op_per_user_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
+ expr='ceph_rgw_op_per_user_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='ceph_rgw_op_per_user_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
+ expr='ceph_rgw_op_per_user_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='ceph_rgw_op_per_user_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
+ expr='ceph_rgw_op_per_user_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
},
]).addTargets([
$.addTargetSchema(
- expr='ceph_rgw_op_per_user_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
+ expr='ceph_rgw_op_per_user_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='ceph_rgw_op_per_user_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
+ expr='ceph_rgw_op_per_user_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='ceph_rgw_op_per_user_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
+ expr='ceph_rgw_op_per_user_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='ceph_rgw_op_per_user_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
+ expr='ceph_rgw_op_per_user_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='ceph_rgw_op_per_user_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
+ expr='ceph_rgw_op_per_user_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
)
.addTargets([
$.addTargetSchema(
- expr='topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)',
+ expr='topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(),
datasource='${datasource}',
legendFormat='{{ceph_daemon}} - {{bucket}}',
range=false,
)
.addTargets([
$.addTargetSchema(
- expr='topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)',
+ expr='topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(),
datasource='${datasource}',
legendFormat='{{ceph_daemon}} - {{bucket}}',
range=false,
)
.addTargets([
$.addTargetSchema(
- expr='topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)',
+ expr='topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(),
datasource='${datasource}',
legendFormat='{{ceph_daemon}} - {{bucket}}',
range=false,
)
.addTargets([
$.addTargetSchema(
- expr='topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)',
+ expr='topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(),
datasource='${datasource}',
legendFormat='{{ceph_daemon}} - {{bucket}}',
range=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
},
]).addTargets([
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
)
.addTargets([
$.addTargetSchema(
- expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)\n',
+ expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)\n' % $.matchers(),
datasource='${datasource}',
legendFormat='{{ceph_daemon}} - {{user}}',
range=false,
)
.addTargets([
$.addTargetSchema(
- expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)\n',
+ expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)\n' % $.matchers(),
datasource='${datasource}',
legendFormat='{{ceph_daemon}} - {{user}}',
range=false,
)
.addTargets([
$.addTargetSchema(
- expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)',
+ expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(),
datasource='${datasource}',
legendFormat='{{ceph_daemon}} - {{user}}',
range=false,
)
.addTargets([
$.addTargetSchema(
- expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})\n)',
+ expr='topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})\n)' % $.matchers(),
datasource='${datasource}',
legendFormat='{{ceph_daemon}} - {{user}}',
range=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
.addTargets(
[
$.addTargetSchema(
- expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource='${datasource}',
format='time_series',
instant=false,
},
]).addTargets([
$.addTargetSchema(
- expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
range=false,
),
$.addTargetSchema(
- expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"})',
+ expr='sum by (user, ceph_daemon) (ceph_rgw_op_per_user_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s})' % $.matchers(),
datasource={ type: 'prometheus', uid: '${datasource}' },
format='table',
hide=false,
local g = import 'grafonnet/grafana.libsonnet';
-local u = import 'utils.libsonnet';
(import 'utils.libsonnet') {
'radosgw-sync-overview.json':
.addTemplate(
$.addClusterTemplate()
)
- .addTemplate(
- $.addJobTemplate()
- )
+
.addTemplate(
$.addTemplateSchema(
'rgw_servers',
1,
true,
1,
- '',
- 'RGW Server'
+ null,
+ 'rgw.(.*)'
)
)
.addPanels([
.addTemplate(
$.addClusterTemplate()
)
- .addTemplate(
- $.addJobTemplate()
- )
.addTemplate(
$.addTemplateSchema(
'rgw_servers',
true,
1,
'',
- 'RGW Server'
+ '.*'
)
)
.addTemplate(
.addTemplate(
$.addClusterTemplate()
)
- .addTemplate(
- $.addJobTemplate()
- )
.addTemplate(
$.addTemplateSchema('rgw_servers',
'$datasource',
sum by (instance_id) (
rate(ceph_rgw_op_get_obj_lat_sum{%(matchers)s}[$__rate_interval]) /
rate(ceph_rgw_op_get_obj_lat_count{%(matchers)s}[$__rate_interval])
- ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
||| % $.matchers(),
|||
sum by (instance_id) (
rate(ceph_rgw_op_put_obj_lat_sum{%(matchers)s}[$__rate_interval]) /
rate(ceph_rgw_op_put_obj_lat_count{%(matchers)s}[$__rate_interval])
- ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
||| % $.matchers(),
'GET {{ceph_daemon}}',
'PUT {{ceph_daemon}}',
'short',
|||
rate(ceph_rgw_op_get_obj_bytes{%(matchers)s}[$__rate_interval]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
||| % $.matchers(),
|||
rate(ceph_rgw_op_put_obj_bytes{%(matchers)s}[$__rate_interval]) *
on (instance_id) group_left (ceph_daemon)
- ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
||| % $.matchers(),
'GETs {{ceph_daemon}}',
'PUTs {{ceph_daemon}}',
'short',
|||
rate(ceph_rgw_failed_req{%(matchers)s}[$__rate_interval]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s,ceph_daemon=~"$rgw_servers"}
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
||| % $.matchers(),
|||
rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
||| % $.matchers(),
'Requests Failed {{ceph_daemon}}',
'GETs {{ceph_daemon}}',
$.addTargetSchema(
|||
rate(ceph_rgw_put{%(matchers)s}[$__rate_interval]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
||| % $.matchers(),
'PUTs {{ceph_daemon}}'
),
rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) +
rate(ceph_rgw_put{%(matchers)s}[$__rate_interval])
)
- ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
||| % $.matchers(),
'Other {{ceph_daemon}}'
),
.addTarget($.addTargetSchema(
|||
rate(ceph_rgw_failed_req{%(matchers)s}[$__rate_interval]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
||| % $.matchers(),
'Failures {{ceph_daemon}}'
))
.addTarget($.addTargetSchema(
|||
rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
||| % $.matchers(),
'GETs {{ceph_daemon}}'
))
.addTarget($.addTargetSchema(
|||
rate(ceph_rgw_put{%(matchers)s}[$__rate_interval]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
||| % $.matchers(),
'PUTs {{ceph_daemon}}'
))
rate(ceph_rgw_put{%(matchers)s}[$__rate_interval])
)
) * on (instance_id) group_left (ceph_daemon)
- ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s}
||| % $.matchers(),
'Other (DELETE,LIST) {{ceph_daemon}}'
)),
unit: unit,
valueMaps: valueMaps,
},
+
matchers()::
- local jobMatcher = 'job=~"$job"';
local clusterMatcher = '%s=~"$cluster"' % $._config.clusterLabel;
{
// Common labels
- jobMatcher: jobMatcher,
- clusterMatcher: (if $._config.showMultiCluster then clusterMatcher else ''),
- matchers: jobMatcher +
- (if $._config.showMultiCluster then ', ' + clusterMatcher else ''),
+ matchers: (if $._config.showMultiCluster then clusterMatcher + ', ' else ''),
},
+
addClusterTemplate()::
$.addTemplateSchema(
'cluster',
'$datasource',
- 'label_values(ceph_osd_metadata, %s)' % $._config.clusterLabel,
+ 'label_values(ceph_health_status, %s)' % $._config.clusterLabel,
1,
- true,
+ false,
1,
'cluster',
'(.*)',
if !$._config.showMultiCluster then 'variable' else '',
- multi=true,
- allValues='.+',
- ),
-
- addJobTemplate()::
- $.addTemplateSchema(
- 'job',
- '$datasource',
- 'label_values(ceph_osd_metadata{%(clusterMatcher)s}, job)' % $.matchers(),
- 1,
- true,
- 1,
- 'job',
- '(.*)',
- multi=true,
- allValues='.+',
+ multi=false,
+ allValues=null,
),
overviewStyle(alias,
addGaugePanel(title='',
description='',
transparent=false,
- datasource='${DS_PROMETHEUS}',
+ datasource='$datasource',
gridPosition={},
pluginVersion='9.1.3',
unit='percentunit',
+++ /dev/null
-exclusions:
- template-instance-rule:
- reason: "Instance template not needed because of ceph-mgr leader election."
- target-instance-rule:
- reason: "Instance matcher not needed because of ceph-mgr leader election."
},
{
"colors": null,
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"description": "",
"fieldConfig": {
"defaults": {
"pluginVersion": "9.4.7",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "ceph_health_status{}",
+ "datasource": "$datasource",
+ "expr": "ceph_health_status{cluster=~\"$cluster\", }",
"format": "time_series",
"instant": true,
"interval": "$interval",
"type": "stat"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"description": "",
"fieldConfig": {
"defaults": {
"pluginVersion": "9.4.7",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "(ceph_cluster_total_bytes{}-ceph_cluster_total_used_bytes{})/ceph_cluster_total_bytes{}",
+ "datasource": "$datasource",
+ "expr": "(ceph_cluster_total_bytes{cluster=~\"$cluster\", }-ceph_cluster_total_used_bytes{cluster=~\"$cluster\", })/ceph_cluster_total_bytes{cluster=~\"$cluster\", }",
"format": "time_series",
"instant": true,
"interval": "$interval",
},
{
"colors": null,
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"description": "",
"fieldConfig": {
"defaults": {
"pluginVersion": "9.4.7",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "ceph_cluster_total_bytes{}",
+ "datasource": "$datasource",
+ "expr": "ceph_cluster_total_bytes{cluster=~\"$cluster\", }",
"format": "time_series",
"instant": true,
"interval": "$interval",
},
{
"colors": null,
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"description": "",
"fieldConfig": {
"defaults": {
"pluginVersion": "9.4.7",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(irate(ceph_osd_op_w_in_bytes{}[5m]))",
+ "datasource": "$datasource",
+ "expr": "sum(irate(ceph_osd_op_w_in_bytes{cluster=~\"$cluster\", }[5m]))",
"format": "time_series",
"instant": true,
"interval": "$interval",
},
{
"colors": null,
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"description": "",
"fieldConfig": {
"defaults": {
"pluginVersion": "9.4.7",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(irate(ceph_osd_op_r_out_bytes{}[5m]))",
+ "datasource": "$datasource",
+ "expr": "sum(irate(ceph_osd_op_r_out_bytes{cluster=~\"$cluster\", }[5m]))",
"format": "time_series",
"instant": true,
"interval": "$interval",
"warn": "rgba(237, 129, 40, 0.9)"
},
"cornerRadius": 0,
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"description": "",
"displayName": "",
"fieldConfig": {
{
"aggregation": "Last",
"alias": "All",
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"decimals": 2,
"displayAliasType": "Always",
"displayType": "Regular",
"displayValueWithAlias": "When Alias Displayed",
- "expr": "count(ceph_osd_metadata)",
+ "expr": "count(ceph_osd_metadata{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
{
"aggregation": "Last",
"alias": "In",
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"decimals": 2,
"displayAliasType": "Always",
"displayType": "Regular",
"displayValueWithAlias": "When Alias Displayed",
- "expr": "count(ceph_osd_in)",
+ "expr": "count(ceph_osd_in{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
{
"aggregation": "Last",
"alias": "Out",
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"decimals": 2,
"displayAliasType": "Warning / Critical",
"displayType": "Regular",
"displayValueWithAlias": "When Alias Displayed",
- "expr": "sum(ceph_osd_in == bool 0)",
+ "expr": "sum(ceph_osd_in{cluster=~\"$cluster\", } == bool 0)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
{
"aggregation": "Last",
"alias": "Up",
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"decimals": 2,
"displayAliasType": "Always",
"displayType": "Regular",
"displayValueWithAlias": "When Alias Displayed",
- "expr": "sum(ceph_osd_up)",
+ "expr": "sum(ceph_osd_up{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
{
"aggregation": "Last",
"alias": "Down",
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"decimals": 2,
"displayAliasType": "Warning / Critical",
"displayType": "Regular",
"displayValueWithAlias": "When Alias Displayed",
- "expr": "sum(ceph_osd_up == bool 0)",
+ "expr": "sum(ceph_osd_up{cluster=~\"$cluster\", } == bool 0)",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"warn": "rgba(237, 129, 40, 0.9)"
},
"cornerRadius": 1,
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"description": "",
"displayName": "",
"fieldConfig": {
{
"aggregation": "Last",
"alias": "Active",
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"decimals": 2,
"displayAliasType": "Always",
"displayType": "Regular",
"displayValueWithAlias": "When Alias Displayed",
- "expr": "count(ceph_mgr_status == 1) or vector(0)",
+ "expr": "count(ceph_mgr_status{cluster=~\"$cluster\", } == 1) or vector(0)",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
{
"aggregation": "Last",
"alias": "Standby",
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"decimals": 2,
"displayAliasType": "Always",
"displayType": "Regular",
"displayValueWithAlias": "When Alias Displayed",
- "expr": "count(ceph_mgr_status == 0) or vector(0)",
+ "expr": "count(ceph_mgr_status{cluster=~\"$cluster\", } == 0) or vector(0)",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"warn": "rgba(237, 129, 40, 0.9)"
},
"cornerRadius": 1,
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"description": "",
"displayName": "",
"fieldConfig": {
{
"aggregation": "Last",
"alias": "Active",
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"decimals": 2,
"displayAliasType": "Always",
"displayType": "Regular",
"displayValueWithAlias": "When Alias Displayed",
- "expr": "count(ALERTS{alertstate=\"firing\",alertname=~\"^Ceph.+\", severity=\"critical\"}) OR vector(0)",
+ "expr": "count(ALERTS{alertstate=\"firing\",alertname=~\"^Ceph.+\", severity=\"critical\", cluster=~\"$cluster\", }) OR vector(0)",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
{
"aggregation": "Last",
"alias": "Standby",
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"decimals": 2,
"displayAliasType": "Always",
"displayType": "Regular",
"displayValueWithAlias": "When Alias Displayed",
- "expr": "count(ALERTS{alertstate=\"firing\",alertname=~\"^Ceph.+\", severity=\"warning\"}) OR vector(0)",
+ "expr": "count(ALERTS{alertstate=\"firing\",alertname=~\"^Ceph.+\", severity=\"warning\", cluster=~\"$cluster\", }) OR vector(0)",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"colors": null,
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"description": "",
"displayName": "",
"fieldConfig": {
"pluginVersion": "9.4.7",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "ceph_cluster_total_used_bytes{}",
+ "datasource": "$datasource",
+ "expr": "ceph_cluster_total_used_bytes{cluster=~\"$cluster\", }",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"colors": null,
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"description": "",
"displayName": "",
"fieldConfig": {
"pluginVersion": "9.4.7",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(irate(ceph_osd_op_w{}[1m]))",
+ "datasource": "$datasource",
+ "expr": "sum(irate(ceph_osd_op_w{cluster=~\"$cluster\", }[1m]))",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"colors": null,
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"description": "",
"displayName": "",
"fieldConfig": {
"pluginVersion": "9.4.7",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(irate(ceph_osd_op_r{}[1m]))",
+ "datasource": "$datasource",
+ "expr": "sum(irate(ceph_osd_op_r{cluster=~\"$cluster\", }[1m]))",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"warn": "rgba(237, 129, 40, 0.9)"
},
"cornerRadius": 1,
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"description": "",
"displayName": "",
"fieldConfig": {
{
"aggregation": "Last",
"alias": "In Quorum",
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"decimals": 2,
"displayAliasType": "Always",
"displayType": "Regular",
"displayValueWithAlias": "When Alias Displayed",
- "expr": "sum(ceph_mon_quorum_status)",
+ "expr": "sum(ceph_mon_quorum_status{cluster=~\"$cluster\", })",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "In Quorum",
"aggregation": "Last",
"alias": "Total",
"crit": 1,
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"decimals": 2,
"displayAliasType": "Always",
"displayType": "Regular",
"displayValueWithAlias": "When Alias Displayed",
- "expr": "count(ceph_mon_quorum_status)",
+ "expr": "count(ceph_mon_quorum_status{cluster=~\"$cluster\", })",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Total",
"aggregation": "Last",
"alias": "MONs out of Quorum",
"crit": 1.6000000000000001,
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"decimals": 2,
"displayAliasType": "Warning / Critical",
"displayType": "Annotation",
"displayValueWithAlias": "Never",
- "expr": "count(ceph_mon_quorum_status) - sum(ceph_mon_quorum_status)",
+ "expr": "count(ceph_mon_quorum_status{cluster=~\"$cluster\", }) - sum(ceph_mon_quorum_status{cluster=~\"$cluster\", })",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "MONs out of Quorum",
"limit": 10,
"onlyAlertsOnDashboard": true,
"options": {
- "alertInstanceLabelFilter": "{alertname=~\"^Ceph.+\"}",
+ "alertInstanceLabelFilter": "{alertname=~\"^Ceph.+\", cluster=~\"$cluster\", }",
"alertName": "",
"dashboardAlerts": false,
"groupBy": [ ],
"type": "alertlist"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"pluginVersion": "9.1.3",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "ceph_cluster_total_bytes{}",
+ "datasource": "$datasource",
+ "expr": "ceph_cluster_total_bytes{cluster=~\"$cluster\", }",
"format": "time_series",
"instant": false,
"interval": "$interval",
"step": 300
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "ceph_cluster_total_used_bytes{}",
+ "datasource": "$datasource",
+ "expr": "ceph_cluster_total_used_bytes{cluster=~\"$cluster\", }",
"format": "time_series",
"instant": false,
"interval": "$interval",
"type": "timeseries"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"pluginVersion": "9.1.3",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(irate(ceph_osd_op_w_in_bytes{}[5m]))",
+ "datasource": "$datasource",
+ "expr": "sum(irate(ceph_osd_op_w_in_bytes{cluster=~\"$cluster\", }[5m]))",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"step": 300
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(irate(ceph_osd_op_r_out_bytes{}[5m]))",
+ "datasource": "$datasource",
+ "expr": "sum(irate(ceph_osd_op_r_out_bytes{cluster=~\"$cluster\", }[5m]))",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"type": "timeseries"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"pluginVersion": "9.1.3",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(irate(ceph_osd_op_w{}[1m]))",
+ "datasource": "$datasource",
+ "expr": "sum(irate(ceph_osd_op_w{cluster=~\"$cluster\", }[1m]))",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"step": 300
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(irate(ceph_osd_op_r{}[1m]))",
+ "datasource": "$datasource",
+ "expr": "sum(irate(ceph_osd_op_r{cluster=~\"$cluster\", }[1m]))",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"type": "timeseries"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"pluginVersion": "9.1.3",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "(ceph_pool_bytes_used{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})",
+ "datasource": "$datasource",
+ "expr": "(ceph_pool_bytes_used{cluster=~\"$cluster\", }) *on (pool_id) group_left(name)(ceph_pool_metadata{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"type": "timeseries"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"pluginVersion": "9.1.3",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "(ceph_pool_stored_raw{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})",
+ "datasource": "$datasource",
+ "expr": "(ceph_pool_stored_raw{cluster=~\"$cluster\", }) *on (pool_id) group_left(name)(ceph_pool_metadata{cluster=~\"$cluster\", })",
"format": "time_series",
"hide": false,
"interval": "",
"type": "timeseries"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"pluginVersion": "9.1.3",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "(ceph_pool_quota_objects{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})",
+ "datasource": "$datasource",
+ "expr": "(ceph_pool_quota_objects{cluster=~\"$cluster\", }) *on (pool_id) group_left(name)(ceph_pool_metadata{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"type": "timeseries"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"pluginVersion": "9.1.3",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "(ceph_pool_quota_bytes{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})",
+ "datasource": "$datasource",
+ "expr": "(ceph_pool_quota_bytes{cluster=~\"$cluster\", }) *on (pool_id) group_left(name)(ceph_pool_metadata{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"type": "timeseries"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"pluginVersion": "9.1.3",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "(ceph_pool_objects{}) *on (pool_id) group_left(name)(ceph_pool_metadata{})",
+ "datasource": "$datasource",
+ "expr": "(ceph_pool_objects{cluster=~\"$cluster\", }) * on (pool_id) group_left(name)(ceph_pool_metadata{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"type": "row"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"pluginVersion": "9.1.3",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pool_objects)",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pool_objects{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"type": "timeseries"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"pluginVersion": "9.1.3",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_active{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_active{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "A"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_clean{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_clean{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "B"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_peering{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_peering{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "C"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_degraded{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_degraded{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"step": 300
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_stale{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_stale{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"step": 300
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_unclean_pgs{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_unclean_pgs{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"step": 300
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_undersized{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_undersized{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"step": 300
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_incomplete{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_incomplete{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "H"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_forced_backfill{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_forced_backfill{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "I"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_forced_recovery{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_forced_recovery{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "J"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_creating{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_creating{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "K"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_wait_backfill{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_wait_backfill{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "L"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_deep{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_deep{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "M"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_scrubbing{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_scrubbing{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "N"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_recovering{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_recovering{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "O"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_repair{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_repair{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "P"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_down{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_down{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "Q"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_peered{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_peered{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "R"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_backfill{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_backfill{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "S"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_remapped{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_remapped{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"refId": "T"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_backfill_toofull{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_backfill_toofull{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"type": "timeseries"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"pluginVersion": "9.1.3",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_degraded{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_degraded{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"step": 300
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_stale{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_stale{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"step": 300
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(ceph_pg_undersized{})",
+ "datasource": "$datasource",
+ "expr": "sum(ceph_pg_undersized{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"type": "timeseries"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"pluginVersion": "9.1.3",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "sum(irate(ceph_osd_recovery_ops{}[$interval]))",
+ "datasource": "$datasource",
+ "expr": "sum(irate(ceph_osd_recovery_ops{cluster=~\"$cluster\", }[$interval]))",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"mode": "opacity"
},
"dataFormat": "timeseries",
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"pluginVersion": "9.4.7",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "ceph_osd_apply_latency_ms{}",
+ "datasource": "$datasource",
+ "expr": "ceph_osd_apply_latency_ms{cluster=~\"$cluster\", }",
"format": "time_series",
"instant": false,
"interval": "$interval",
"mode": "opacity"
},
"dataFormat": "timeseries",
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"pluginVersion": "9.4.7",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "ceph_osd_commit_latency_ms{}",
+ "datasource": "$datasource",
+ "expr": "ceph_osd_commit_latency_ms{cluster=~\"$cluster\", }",
"format": "time_series",
"instant": false,
"interval": "$interval",
"mode": "opacity"
},
"dataFormat": "timeseries",
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"pluginVersion": "9.4.7",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0",
+ "datasource": "$datasource",
+ "expr": "rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[5m]) / rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[5m]) >= 0",
"format": "time_series",
"instant": false,
"interval": "$interval",
"mode": "opacity"
},
"dataFormat": "timeseries",
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"pluginVersion": "9.4.7",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0",
+ "datasource": "$datasource",
+ "expr": "rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[5m]) / rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[5m]) >= 0",
"format": "time_series",
"instant": false,
"interval": "$interval",
"yBucketSize": null
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"pluginVersion": "9.1.3",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "avg(rate(ceph_osd_op_r_latency_sum{}[5m]) / rate(ceph_osd_op_r_latency_count{}[5m]) >= 0)",
+ "datasource": "$datasource",
+ "expr": "avg(rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[5m]) / rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[5m]) >= 0)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Read",
"refId": "A"
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "avg(rate(ceph_osd_op_w_latency_sum{}[5m]) / rate(ceph_osd_op_w_latency_count{}[5m]) >= 0)",
+ "datasource": "$datasource",
+ "expr": "avg(rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[5m]) / rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[5m]) >= 0)",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Write",
"type": "timeseries"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"color": {
"pluginVersion": "9.1.3",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "avg(ceph_osd_apply_latency_ms{})",
+ "datasource": "$datasource",
+ "expr": "avg(ceph_osd_apply_latency_ms{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
"step": 4
},
{
- "datasource": "${DS_PROMETHEUS}",
- "expr": "avg(ceph_osd_commit_latency_ms{})",
+ "datasource": "$datasource",
+ "expr": "avg(ceph_osd_commit_latency_ms{cluster=~\"$cluster\", })",
"format": "time_series",
"interval": "$interval",
"intervalFactor": 1,
},
{
"columns": [ ],
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"fieldConfig": {
"defaults": {
"custom": {
"styles": "",
"targets": [
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"exemplar": false,
- "expr": "count by (ceph_version)(ceph_osd_metadata{})",
+ "expr": "count by (ceph_version)(ceph_osd_metadata{cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"refId": "A"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"exemplar": false,
- "expr": "count by (ceph_version)(ceph_mon_metadata{})",
+ "expr": "count by (ceph_version)(ceph_mon_metadata{cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"refId": "B"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"exemplar": false,
- "expr": "count by (ceph_version)(ceph_mds_metadata{})",
+ "expr": "count by (ceph_version)(ceph_mds_metadata{cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"refId": "C"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"exemplar": false,
- "expr": "count by (ceph_version)(ceph_rgw_metadata{})",
+ "expr": "count by (ceph_version)(ceph_rgw_metadata{cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"refId": "D"
},
{
- "datasource": "${DS_PROMETHEUS}",
+ "datasource": "$datasource",
"exemplar": false,
- "expr": "count by (ceph_version)(ceph_mgr_metadata{})",
+ "expr": "count by (ceph_version)(ceph_mgr_metadata{cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"list": [
{
"current": {
- "text": "Prometheus",
- "value": "Prometheus"
+ "text": "default",
+ "value": "default"
},
"hide": 0,
"label": "Data Source",
- "name": "DS_PROMETHEUS",
+ "name": "datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "cluster",
+ "multi": false,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_health_status, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"auto": true,
"auto_count": 10,
},
"timezone": "",
"title": "Ceph Cluster - Advanced",
+ "uid": "dn13KBeTv",
"version": 0
}
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(ceph_objecter_op_r{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))",
+ "expr": "sum(rate(ceph_objecter_op_r{ceph_daemon=~\"($mds_servers).*\", cluster=~\"$cluster\", }[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Read Ops",
"refId": "A"
},
{
- "expr": "sum(rate(ceph_objecter_op_w{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))",
+ "expr": "sum(rate(ceph_objecter_op_w{ceph_daemon=~\"($mds_servers).*\", cluster=~\"$cluster\", }[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Write Ops",
"steppedLine": false,
"targets": [
{
- "expr": "ceph_mds_server_handle_client_request{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}",
+ "expr": "ceph_mds_server_handle_client_request{ceph_daemon=~\"($mds_servers).*\", cluster=~\"$cluster\", }",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ceph_daemon}}",
"type": "datasource"
},
{
- "allValue": ".+",
+ "allValue": null,
"current": { },
"datasource": "$datasource",
- "hide": 2,
- "includeAll": true,
+ "hide": 0,
+ "includeAll": false,
"label": "cluster",
- "multi": true,
+ "multi": false,
"name": "cluster",
"options": [ ],
- "query": "label_values(ceph_osd_metadata, cluster)",
- "refresh": 1,
- "regex": "(.*)",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [ ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": ".+",
- "current": { },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": "job",
- "multi": true,
- "name": "job",
- "options": [ ],
- "query": "label_values(ceph_osd_metadata{}, job)",
+ "query": "label_values(ceph_health_status, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"multi": false,
"name": "mds_servers",
"options": [ ],
- "query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)",
+ "query": "label_values(ceph_mds_inodes{cluster=~\"$cluster\", }, ceph_daemon)",
"refresh": 1,
"regex": "",
"sort": 1,
"tableColumn": "",
"targets": [
{
- "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{job=~\"$job\", hostname='$ceph_hosts'}))",
+ "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{cluster=~\"$cluster\", }))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"tableColumn": "",
"targets": [
{
- "expr": "sum(\n ceph_osd_stat_bytes{job=~\"$job\"} and\n on (ceph_daemon) ceph_disk_occupation{job=~\"$job\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}\n)\n",
+ "expr": "sum(\n ceph_osd_stat_bytes{cluster=~\"$cluster\", } and\n on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", cluster=~\"$cluster\", }\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n (\n rate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n (\n rate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{cluster=~\"$cluster\", }, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}}) writes",
"refId": "A"
},
{
- "expr": "label_replace(\n (\n rate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\"},\"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n (\n rate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{cluster=~\"$cluster\", },\"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}}) reads",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n (\n rate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n",
+ "expr": "label_replace(\n (\n rate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{cluster=~\"$cluster\", }, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}}) write",
"refId": "A"
},
{
- "expr": "label_replace(\n (\n rate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n",
+ "expr": "label_replace(\n (\n rate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{cluster=~\"$cluster\", }, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}}) read",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n (\n (rate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) / 10) or\n rate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) * 100\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n (\n (rate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) / 10) or\n rate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) * 100\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}})",
"styles": "",
"targets": [
{
- "expr": "topk(10,\n (sum by (instance)(ceph_daemon_health_metrics{type=\"SLOW_OPS\", ceph_daemon=~\"osd.*\"}))\n)\n",
+ "expr": "topk(10,\n (sum by (instance)(ceph_daemon_health_metrics{type=\"SLOW_OPS\", ceph_daemon=~\"osd.*\", cluster=~\"$cluster\", }))\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"type": "datasource"
},
{
- "allValue": ".+",
+ "allValue": null,
"current": { },
"datasource": "$datasource",
- "hide": 2,
- "includeAll": true,
+ "hide": 0,
+ "includeAll": false,
"label": "cluster",
- "multi": true,
+ "multi": false,
"name": "cluster",
"options": [ ],
- "query": "label_values(ceph_osd_metadata, cluster)",
- "refresh": 1,
- "regex": "(.*)",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [ ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": ".+",
- "current": { },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": "job",
- "multi": true,
- "name": "job",
- "options": [ ],
- "query": "label_values(ceph_osd_metadata{}, job)",
+ "query": "label_values(ceph_health_status, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"current": { },
"datasource": "$datasource",
"hide": 0,
- "includeAll": false,
- "label": "Hostname",
+ "includeAll": true,
+ "label": null,
"multi": false,
"name": "ceph_hosts",
"options": [ ],
- "query": "label_values(instance)",
+ "query": "label_values({__name__=~\"ceph_.+_metadata\", cluster=~\"$cluster\", }, hostname)",
"refresh": 1,
- "regex": "([^.:]*).*",
- "sort": 3,
+ "regex": "([^.]*).*",
+ "sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"tagsQuery": "",
"tableColumn": "",
"targets": [
{
- "expr": "count(sum by (hostname) (ceph_osd_metadata{job=~\"$job\"}))",
+ "expr": "count(sum by (hostname) (ceph_osd_metadata{cluster=~\"$cluster\", }))",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"tableColumn": "",
"targets": [
{
- "expr": "avg (\n label_replace(\n (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or\n (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", instance=~\"($osd_hosts).*\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n",
+ "expr": "avg (\n label_replace(\n (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or\n (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{instance=~\"($osd_hosts).*\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"tableColumn": "",
"targets": [
{
- "expr": "sum (\n (\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n)\n",
+ "expr": "sum (\n (\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((node_bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((node_bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n)\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"steppedLine": false,
"targets": [
{
- "expr": "topk(10, (sum by(instance) (\n(\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) +\n(\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\"))\n))\n",
+ "expr": "topk(10, (sum by(instance) (\n(\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) +\n(\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) unless on (device, instance)\n label_replace((node_bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\"))\n))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"type": "datasource"
},
{
- "allValue": ".+",
+ "allValue": null,
"current": { },
"datasource": "$datasource",
- "hide": 2,
- "includeAll": true,
+ "hide": 0,
+ "includeAll": false,
"label": "cluster",
- "multi": true,
+ "multi": false,
"name": "cluster",
"options": [ ],
- "query": "label_values(ceph_osd_metadata, cluster)",
- "refresh": 1,
- "regex": "(.*)",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [ ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": ".+",
- "current": { },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": "job",
- "multi": true,
- "name": "job",
- "options": [ ],
- "query": "label_values(ceph_osd_metadata{}, job)",
+ "query": "label_values(ceph_health_status, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"multi": false,
"name": "osd_hosts",
"options": [ ],
- "query": "label_values(ceph_disk_occupation{job=~\"$job\"}, exported_instance)",
+ "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\", }, hostname)",
"refresh": 1,
"regex": "([^.]*).*",
"sort": 1,
"multi": false,
"name": "mon_hosts",
"options": [ ],
- "query": "label_values(ceph_mon_metadata{job=~\"$job\"}, ceph_daemon)",
+ "query": "label_values(ceph_mon_metadata{cluster=~\"$cluster\", }, hostname)",
"refresh": 1,
"regex": "mon.(.*)",
"sort": 1,
"multi": false,
"name": "mds_hosts",
"options": [ ],
- "query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)",
+ "query": "label_values(ceph_mds_inodes{hostname, cluster=~\"$cluster\", })",
"refresh": 1,
"regex": "mds.(.*)",
"sort": 1,
"multi": false,
"name": "rgw_hosts",
"options": [ ],
- "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
+ "query": "label_values(ceph_rgw_metadata{hostname, cluster=~\"$cluster\", })",
"refresh": 1,
"regex": "rgw.(.*)",
"sort": 1,
"steppedLine": false,
"targets": [
{
- "expr": "rate(ceph_osd_op_r_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n",
+ "expr": "rate(ceph_osd_op_r_latency_sum{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "read",
"refId": "A"
},
{
- "expr": "rate(ceph_osd_op_w_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n",
+ "expr": "rate(ceph_osd_op_w_latency_sum{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "write",
"steppedLine": false,
"targets": [
{
- "expr": "rate(ceph_osd_op_r{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
+ "expr": "rate(ceph_osd_op_r{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Reads",
"refId": "A"
},
{
- "expr": "rate(ceph_osd_op_w{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
+ "expr": "rate(ceph_osd_op_w{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Writes",
"steppedLine": false,
"targets": [
{
- "expr": "rate(ceph_osd_op_r_out_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
+ "expr": "rate(ceph_osd_op_r_out_bytes{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Read Bytes",
"refId": "A"
},
{
- "expr": "rate(ceph_osd_op_w_in_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
+ "expr": "rate(ceph_osd_op_w_in_bytes{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Write Bytes",
"steppedLine": false,
"targets": [
{
- "expr": "(\n label_replace(\n rate(node_disk_read_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n",
+ "expr": "(\n label_replace(\n rate(node_disk_read_time_seconds_total[$__rate_interval]) /\n rate(node_disk_reads_completed_total[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}/{{device}} Reads",
"refId": "A"
},
{
- "expr": "(\n label_replace(\n rate(node_disk_write_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n",
+ "expr": "(\n label_replace(\n rate(node_disk_write_time_seconds_total[$__rate_interval]) /\n rate(node_disk_writes_completed_total[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}/{{device}} Writes",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n rate(node_disk_writes_completed_total[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}} on {{instance}} Writes",
"refId": "A"
},
{
- "expr": "label_replace(\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n rate(node_disk_reads_completed_total[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}} on {{instance}} Reads",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n rate(node_disk_read_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n rate(node_disk_read_bytes_total[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}} {{device}} Reads",
"refId": "A"
},
{
- "expr": "label_replace(\n rate(node_disk_written_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n rate(node_disk_written_bytes_total[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", },\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}} {{device}} Writes",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n rate(node_disk_io_time_seconds_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n rate(node_disk_io_time_seconds_total[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\", cluster=~\"$cluster\", }, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}} on {{instance}}",
"type": "datasource"
},
{
- "allValue": ".+",
+ "allValue": null,
"current": { },
"datasource": "$datasource",
- "hide": 2,
- "includeAll": true,
+ "hide": 0,
+ "includeAll": false,
"label": "cluster",
- "multi": true,
+ "multi": false,
"name": "cluster",
"options": [ ],
- "query": "label_values(ceph_osd_metadata, cluster)",
- "refresh": 1,
- "regex": "(.*)",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [ ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": ".+",
- "current": { },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": "job",
- "multi": true,
- "name": "job",
- "options": [ ],
- "query": "label_values(ceph_osd_metadata{}, job)",
+ "query": "label_values(ceph_health_status, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"multi": false,
"name": "osd",
"options": [ ],
- "query": "label_values(ceph_osd_metadata{job=~\"$job\"}, ceph_daemon)",
+ "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\", }, ceph_daemon)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"steppedLine": false,
"targets": [
{
- "expr": "avg (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n",
+ "expr": "avg (\n rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) * 1000\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "AVG read",
"refId": "A"
},
{
- "expr": "max(\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n",
+ "expr": "max(\n rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) * 1000\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "MAX read",
"refId": "B"
},
{
- "expr": "quantile(0.95,\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n )\n)\n",
+ "expr": "quantile(0.95,\n (\n rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[$__rate_interval])\n * 1000\n )\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "@95%ile",
"styles": "",
"targets": [
{
- "expr": "topk(10,\n (sort(\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n )\n ))\n)\n",
+ "expr": "topk(10,\n (sort(\n (\n rate(ceph_osd_op_r_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n 1000\n )\n ))\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"steppedLine": false,
"targets": [
{
- "expr": "avg(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n)\n",
+ "expr": "avg(\n rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[$__rate_interval])\n * 1000\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "AVG write",
"refId": "A"
},
{
- "expr": "max(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n)\n",
+ "expr": "max(\n rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n 1000\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "MAX write",
"refId": "B"
},
{
- "expr": "quantile(0.95, (\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n))\n",
+ "expr": "quantile(0.95, (\n rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n 1000\n))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "@95%ile write",
"styles": "",
"targets": [
{
- "expr": "topk(10,\n (sort(\n (rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000)\n ))\n)\n",
+ "expr": "topk(10,\n (sort(\n (rate(ceph_osd_op_w_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n 1000)\n ))\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
},
"targets": [
{
- "expr": "count by (device_class) (ceph_osd_metadata{job=~\"$job\"})",
+ "expr": "count by (device_class) (ceph_osd_metadata{cluster=~\"$cluster\", })",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device_class}}",
},
"targets": [
{
- "expr": "count(ceph_bluefs_wal_total_bytes{job=~\"$job\"})",
+ "expr": "count(ceph_bluefs_wal_total_bytes{cluster=~\"$cluster\", })",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "bluestore",
"refId": "A"
},
{
- "expr": "absent(ceph_bluefs_wal_total_bytes{job=~\"$job\"}) * count(ceph_osd_metadata{job=~\"$job\"})",
+ "expr": "absent(ceph_bluefs_wal_total_bytes{cluster=~\"$cluster\", }) * count(ceph_osd_metadata{cluster=~\"$cluster\", })",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "filestore",
},
"targets": [
{
- "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} < 1099511627776)",
+ "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } < 1099511627776)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<1TB",
"refId": "A"
},
{
- "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 1099511627776 < 2199023255552)",
+ "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 1099511627776 < 2199023255552)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<2TB",
"refId": "B"
},
{
- "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 2199023255552 < 3298534883328)",
+ "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 2199023255552 < 3298534883328)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<3TB",
"refId": "C"
},
{
- "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 3298534883328 < 4398046511104)",
+ "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 3298534883328 < 4398046511104)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<4TB",
"refId": "D"
},
{
- "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 4398046511104 < 6597069766656)",
+ "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 4398046511104 < 6597069766656)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<6TB",
"refId": "E"
},
{
- "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 6597069766656 < 8796093022208)",
+ "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 6597069766656 < 8796093022208)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<8TB",
"refId": "F"
},
{
- "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 8796093022208 < 10995116277760)",
+ "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 8796093022208 < 10995116277760)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<10TB",
"refId": "G"
},
{
- "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 10995116277760 < 13194139533312)",
+ "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 10995116277760 < 13194139533312)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<12TB",
"refId": "H"
},
{
- "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 13194139533312)",
+ "expr": "count(ceph_osd_stat_bytes{cluster=~\"$cluster\", } >= 13194139533312)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<12TB+",
"steppedLine": false,
"targets": [
{
- "expr": "ceph_osd_numpg{job=~\"$job\"}",
+ "expr": "ceph_osd_numpg{cluster=~\"$cluster\", }",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"tableColumn": "",
"targets": [
{
- "expr": "sum(ceph_bluestore_onode_hits{job=~\"$job\"}) / (\n sum(ceph_bluestore_onode_hits{job=~\"$job\"}) +\n sum(ceph_bluestore_onode_misses{job=~\"$job\"})\n)\n",
+ "expr": "sum(ceph_bluestore_onode_hits{cluster=~\"$cluster\", }) / (\n sum(ceph_bluestore_onode_hits{cluster=~\"$cluster\", }) +\n sum(ceph_bluestore_onode_misses{cluster=~\"$cluster\", })\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"steppedLine": false,
"targets": [
{
- "expr": "round(sum(rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval])))",
+ "expr": "round(sum(rate(ceph_pool_rd{cluster=~\"$cluster\", }[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Reads",
"refId": "A"
},
{
- "expr": "round(sum(rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])))",
+ "expr": "round(sum(rate(ceph_pool_wr{cluster=~\"$cluster\", }[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Writes",
"type": "datasource"
},
{
- "allValue": ".+",
+ "allValue": null,
"current": { },
"datasource": "$datasource",
- "hide": 2,
- "includeAll": true,
+ "hide": 0,
+ "includeAll": false,
"label": "cluster",
- "multi": true,
+ "multi": false,
"name": "cluster",
"options": [ ],
- "query": "label_values(ceph_osd_metadata, cluster)",
- "refresh": 1,
- "regex": "(.*)",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [ ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": ".+",
- "current": { },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": "job",
- "multi": true,
- "name": "job",
- "options": [ ],
- "query": "label_values(ceph_osd_metadata{}, job)",
+ "query": "label_values(ceph_health_status, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"tableColumn": "",
"targets": [
{
- "expr": "(ceph_pool_stored{job=~\"$job\"} / (ceph_pool_stored{job=~\"$job\"} + ceph_pool_max_avail{job=~\"$job\"})) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
+ "expr": "(ceph_pool_stored{cluster=~\"$cluster\", } / (ceph_pool_stored{cluster=~\"$cluster\", } + ceph_pool_max_avail{cluster=~\"$cluster\", })) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"tableColumn": "",
"targets": [
{
- "expr": "(ceph_pool_max_avail{job=~\"$job\"} / deriv(ceph_pool_stored{job=~\"$job\"}[6h])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"} > 0\n",
+ "expr": "(ceph_pool_max_avail{cluster=~\"$cluster\", } / deriv(ceph_pool_stored{cluster=~\"$cluster\", }[6h])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", } > 0\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"steppedLine": false,
"targets": [
{
- "expr": "deriv(ceph_pool_objects{job=~\"$job\"}[1m]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
+ "expr": "deriv(ceph_pool_objects{cluster=~\"$cluster\", }[1m]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Objects per second",
"steppedLine": false,
"targets": [
{
- "expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
+ "expr": "rate(ceph_pool_rd{cluster=~\"$cluster\", }[$__rate_interval]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "reads",
"refId": "A"
},
{
- "expr": "rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
+ "expr": "rate(ceph_pool_wr{cluster=~\"$cluster\", }[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "writes",
"steppedLine": false,
"targets": [
{
- "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
+ "expr": "rate(ceph_pool_rd_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "reads",
"refId": "A"
},
{
- "expr": "rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
+ "expr": "rate(ceph_pool_wr_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "writes",
"steppedLine": false,
"targets": [
{
- "expr": "ceph_pool_objects{job=~\"$job\"} *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n",
+ "expr": "ceph_pool_objects{cluster=~\"$cluster\", } *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Number of Objects",
"type": "datasource"
},
{
- "allValue": ".+",
+ "allValue": null,
"current": { },
"datasource": "$datasource",
- "hide": 2,
- "includeAll": true,
+ "hide": 0,
+ "includeAll": false,
"label": "cluster",
- "multi": true,
+ "multi": false,
"name": "cluster",
"options": [ ],
- "query": "label_values(ceph_osd_metadata, cluster)",
- "refresh": 1,
- "regex": "(.*)",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [ ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": ".+",
- "current": { },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": "job",
- "multi": true,
- "name": "job",
- "options": [ ],
- "query": "label_values(ceph_osd_metadata{}, job)",
+ "query": "label_values(ceph_health_status, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"multi": false,
"name": "pool_name",
"options": [ ],
- "query": "label_values(ceph_pool_metadata{job=~\"$job\"}, name)",
+ "query": "label_values(ceph_pool_metadata{cluster=~\"$cluster\", }, name)",
"refresh": 1,
"regex": "",
"sort": 1,
"tableColumn": "",
"targets": [
{
- "expr": "count(ceph_pool_metadata{job=~\"$job\"})",
+ "expr": "count(ceph_pool_metadata{cluster=~\"$cluster\", })",
"format": "table",
"instant": true,
"intervalFactor": 1,
"tableColumn": "",
"targets": [
{
- "expr": "count(ceph_pool_metadata{job=~\"$job\", compression_mode!=\"none\"})",
+ "expr": "count(ceph_pool_metadata{compression_mode!=\"none\", cluster=~\"$cluster\", })",
"format": "",
"intervalFactor": 1,
"legendFormat": "",
"tableColumn": "",
"targets": [
{
- "expr": "sum(ceph_osd_stat_bytes{job=~\"$job\"})",
+ "expr": "sum(ceph_osd_stat_bytes{cluster=~\"$cluster\", })",
"format": "",
"intervalFactor": 1,
"legendFormat": "",
"tableColumn": "",
"targets": [
{
- "expr": "sum(ceph_pool_bytes_used{job=~\"$job\"})",
+ "expr": "sum(ceph_pool_bytes_used{cluster=~\"$cluster\", })",
"format": "",
"instant": true,
"intervalFactor": 1,
"tableColumn": "",
"targets": [
{
- "expr": "sum(ceph_pool_stored{job=~\"$job\"})",
+ "expr": "sum(ceph_pool_stored{cluster=~\"$cluster\", })",
"format": "",
"instant": true,
"intervalFactor": 1,
"tableColumn": "",
"targets": [
{
- "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\"}\n)\n",
+ "expr": "sum(\n ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } -\n ceph_pool_compress_bytes_used{cluster=~\"$cluster\", }\n)\n",
"format": "",
"intervalFactor": 1,
"legendFormat": "",
"tableColumn": "",
"targets": [
{
- "expr": "(\n sum(ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n sum(ceph_pool_stored_raw{job=~\"$job\"} and ceph_pool_compress_under_bytes{job=~\"$job\"} > 0)\n) * 100\n",
+ "expr": "(\n sum(ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } > 0) /\n sum(ceph_pool_stored_raw{cluster=~\"$cluster\", } and ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } > 0)\n) * 100\n",
"format": "table",
"intervalFactor": 1,
"legendFormat": "",
"tableColumn": "",
"targets": [
{
- "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\"} > 0)\n / sum(ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n)\n",
+ "expr": "sum(\n ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } > 0)\n / sum(ceph_pool_compress_bytes_used{cluster=~\"$cluster\", } > 0\n)\n",
"format": "",
"intervalFactor": 1,
"legendFormat": "",
"styles": "",
"targets": [
{
- "expr": "(\n ceph_pool_compress_under_bytes{job=~\"$job\"} /\n ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n) and on(pool_id) (\n (\n (ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\"}\n ) * 100 > 0.5\n)\n",
+ "expr": "(\n ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } /\n ceph_pool_compress_bytes_used{cluster=~\"$cluster\", } > 0\n) and on(pool_id) (\n (\n (ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } > 0) /\n ceph_pool_stored_raw{cluster=~\"$cluster\", }\n ) * 100 > 0.5\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "A"
},
{
- "expr": "ceph_pool_max_avail{job=~\"$job\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"}\n",
+ "expr": "ceph_pool_max_avail{cluster=~\"$cluster\", } *\n on(pool_id) group_left(name) ceph_pool_metadata{cluster=~\"$cluster\", }\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "B"
},
{
- "expr": "(\n (ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\"}\n) * 100\n",
+ "expr": "(\n (ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } > 0) /\n ceph_pool_stored_raw{cluster=~\"$cluster\", }\n) * 100\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "C"
},
{
- "expr": "ceph_pool_percent_used{job=~\"$job\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"}\n",
+ "expr": "ceph_pool_percent_used{cluster=~\"$cluster\", } *\n on(pool_id) group_left(name) ceph_pool_metadata{cluster=~\"$cluster\", }\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "D"
},
{
- "expr": "ceph_pool_compress_under_bytes{job=~\"$job\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n",
+ "expr": "ceph_pool_compress_under_bytes{cluster=~\"$cluster\", } -\n ceph_pool_compress_bytes_used{cluster=~\"$cluster\", } > 0\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "E"
},
{
- "expr": "delta(ceph_pool_stored{job=~\"$job\"}[5d])",
+ "expr": "delta(ceph_pool_stored{cluster=~\"$cluster\", }[5d])",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "F"
},
{
- "expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval])\n + rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])\n",
+ "expr": "rate(ceph_pool_rd{cluster=~\"$cluster\", }[$__rate_interval])\n + rate(ceph_pool_wr{cluster=~\"$cluster\", }[$__rate_interval])\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "G"
},
{
- "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval])\n",
+ "expr": "rate(ceph_pool_rd_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{cluster=~\"$cluster\", }[$__rate_interval])\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "H"
},
{
- "expr": "ceph_pool_metadata{job=~\"$job\"}",
+ "expr": "ceph_pool_metadata{cluster=~\"$cluster\", }",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "I"
},
{
- "expr": "ceph_pool_stored{job=~\"$job\"} * on(pool_id) group_left ceph_pool_metadata{job=~\"$job\"}",
+ "expr": "ceph_pool_stored{cluster=~\"$cluster\", } * on(pool_id) group_left ceph_pool_metadata{cluster=~\"$cluster\", }",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "J"
},
{
- "expr": "ceph_pool_metadata{job=~\"$job\", compression_mode!=\"none\"}",
+ "expr": "ceph_pool_metadata{compression_mode!=\"none\", cluster=~\"$cluster\", }",
"format": "table",
"instant": true,
"intervalFactor": 1,
"steppedLine": false,
"targets": [
{
- "expr": "topk($topk,\n round(\n (\n rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])\n ), 1\n ) * on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"})\n",
+ "expr": "topk($topk,\n round(\n (\n rate(ceph_pool_rd{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_pool_wr{cluster=~\"$cluster\", }[$__rate_interval])\n ), 1\n ) * on(pool_id) group_left(instance,name) ceph_pool_metadata{cluster=~\"$cluster\", })\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{name}} ",
"refId": "A"
},
{
- "expr": "topk($topk,\n rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"}\n)\n",
+ "expr": "topk($topk,\n rate(ceph_pool_wr{cluster=~\"$cluster\", }[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{cluster=~\"$cluster\", }\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{name}} - write",
"steppedLine": false,
"targets": [
{
- "expr": "topk($topk,\n (\n rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval])\n ) * on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\"}\n)\n",
+ "expr": "topk($topk,\n (\n rate(ceph_pool_rd_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{cluster=~\"$cluster\", }[$__rate_interval])\n ) * on(pool_id) group_left(instance, name) ceph_pool_metadata{cluster=~\"$cluster\", }\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{name}}",
"steppedLine": false,
"targets": [
{
- "expr": "ceph_pool_bytes_used{job=~\"$job\"} * on(pool_id) group_right ceph_pool_metadata{job=~\"$job\"}",
+ "expr": "ceph_pool_bytes_used{cluster=~\"$cluster\", } * on(pool_id) group_right ceph_pool_metadata{cluster=~\"$cluster\", }",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{name}}",
"type": "datasource"
},
{
- "allValue": ".+",
+ "allValue": null,
"current": { },
"datasource": "$datasource",
- "hide": 2,
- "includeAll": true,
+ "hide": 0,
+ "includeAll": false,
"label": "cluster",
- "multi": true,
+ "multi": false,
"name": "cluster",
"options": [ ],
- "query": "label_values(ceph_osd_metadata, cluster)",
- "refresh": 1,
- "regex": "(.*)",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [ ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": ".+",
- "current": { },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": "job",
- "multi": true,
- "name": "job",
- "options": [ ],
- "query": "label_values(ceph_osd_metadata{}, job)",
+ "query": "label_values(ceph_health_status, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"steppedLine": false,
"targets": [
{
- "expr": "sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GET {{ceph_daemon}}",
"refId": "A"
},
{
- "expr": "sum by (instance_id) (\n rate(ceph_rgw_op_put_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "sum by (instance_id) (\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUT {{ceph_daemon}}",
"steppedLine": false,
"targets": [
{
- "expr": "rate(ceph_rgw_op_get_obj_bytes{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_op_get_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GETs {{ceph_daemon}}",
"refId": "A"
},
{
- "expr": "rate(ceph_rgw_op_put_obj_bytes{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_op_put_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUTs {{ceph_daemon}}",
"steppedLine": false,
"targets": [
{
- "expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\",ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_failed_req{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Requests Failed {{ceph_daemon}}",
"refId": "A"
},
{
- "expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_get{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GETs {{ceph_daemon}}",
"refId": "B"
},
{
- "expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_put{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUTs {{ceph_daemon}}",
"refId": "C"
},
{
- "expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "(\n rate(ceph_rgw_req{cluster=~\"$cluster\", }[$__rate_interval]) -\n (\n rate(ceph_rgw_get{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rgw_put{cluster=~\"$cluster\", }[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Other {{ceph_daemon}}",
},
"targets": [
{
- "expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_failed_req{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Failures {{ceph_daemon}}",
"refId": "A"
},
{
- "expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_get{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GETs {{ceph_daemon}}",
"refId": "B"
},
{
- "expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_put{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUTs {{ceph_daemon}}",
"refId": "C"
},
{
- "expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "(\n rate(ceph_rgw_req{cluster=~\"$cluster\", }[$__rate_interval]) -\n (\n rate(ceph_rgw_get{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rgw_put{cluster=~\"$cluster\", }[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}",
"type": "datasource"
},
{
- "allValue": ".+",
+ "allValue": null,
"current": { },
"datasource": "$datasource",
- "hide": 2,
- "includeAll": true,
+ "hide": 0,
+ "includeAll": false,
"label": "cluster",
- "multi": true,
+ "multi": false,
"name": "cluster",
"options": [ ],
- "query": "label_values(ceph_osd_metadata, cluster)",
- "refresh": 1,
- "regex": "(.*)",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [ ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": ".+",
- "current": { },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": "job",
- "multi": true,
- "name": "job",
- "options": [ ],
- "query": "label_values(ceph_osd_metadata{}, job)",
+ "query": "label_values(ceph_health_status, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"multi": false,
"name": "rgw_servers",
"options": [ ],
- "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
+ "query": "label_values(ceph_rgw_metadata{cluster=~\"$cluster\", }, ceph_daemon)",
"refresh": 1,
"regex": "",
"sort": 1,
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GET {{rgw_host}}",
"refId": "A"
},
{
- "expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUT {{rgw_host}}",
"steppedLine": false,
"targets": [
{
- "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n",
+ "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(ceph_rgw_op_get_obj_bytes{job=~\"$job\"}[$__rate_interval]))",
+ "expr": "sum(rate(ceph_rgw_op_get_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GETs",
"refId": "A"
},
{
- "expr": "sum(rate(ceph_rgw_op_put_obj_bytes{job=~\"$job\"}[$__rate_interval]))",
+ "expr": "sum(rate(ceph_rgw_op_put_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUTs",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_op_put_obj_bytes{job=~\"$job\"}[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rgw_op_put_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
"type": "datasource"
},
{
- "allValue": ".+",
+ "allValue": null,
"current": { },
"datasource": "$datasource",
- "hide": 2,
- "includeAll": true,
+ "hide": 0,
+ "includeAll": false,
"label": "cluster",
- "multi": true,
+ "multi": false,
"name": "cluster",
"options": [ ],
- "query": "label_values(ceph_osd_metadata, cluster)",
- "refresh": 1,
- "regex": "(.*)",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [ ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": ".+",
- "current": { },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": "job",
- "multi": true,
- "name": "job",
- "options": [ ],
- "query": "label_values(ceph_osd_metadata{}, job)",
+ "query": "label_values(ceph_health_status, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"multi": false,
"name": "rgw_servers",
"options": [ ],
- "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
+ "query": "label_values(ceph_rgw_metadata{cluster=~\"$cluster\", }, ceph_daemon)",
"refresh": 1,
- "regex": "RGW Server",
+ "regex": ".*",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"steppedLine": false,
"targets": [
{
- "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum{job=~\"$job\"}[$__rate_interval]))",
+ "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum{cluster=~\"$cluster\", }[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"steppedLine": false,
"targets": [
{
- "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count{job=~\"$job\"}[$__rate_interval]))",
+ "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count{cluster=~\"$cluster\", }[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"steppedLine": false,
"targets": [
{
- "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum{job=~\"$job\"}[$__rate_interval]))",
+ "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"steppedLine": false,
"targets": [
{
- "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors{job=~\"$job\"}[$__rate_interval]))",
+ "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors{cluster=~\"$cluster\", }[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"type": "datasource"
},
{
- "allValue": ".+",
+ "allValue": null,
"current": { },
"datasource": "$datasource",
- "hide": 2,
- "includeAll": true,
+ "hide": 0,
+ "includeAll": false,
"label": "cluster",
- "multi": true,
+ "multi": false,
"name": "cluster",
"options": [ ],
- "query": "label_values(ceph_osd_metadata, cluster)",
- "refresh": 1,
- "regex": "(.*)",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [ ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": ".+",
- "current": { },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": "job",
- "multi": true,
- "name": "job",
- "options": [ ],
- "query": "label_values(ceph_osd_metadata{}, job)",
+ "query": "label_values(ceph_health_status, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
- "label": "",
+ "label": null,
"multi": false,
"name": "rgw_servers",
"options": [ ],
- "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)",
+ "query": "label_values(ceph_rgw_metadata{cluster=~\"$cluster\", }, ceph_daemon)",
"refresh": 1,
- "regex": "RGW Server",
+ "regex": "rgw.(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"steppedLine": false,
"targets": [
{
- "expr": "rate(ceph_rbd_write_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
+ "expr": "rate(ceph_rbd_write_ops{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Write",
"refId": "A"
},
{
- "expr": "rate(ceph_rbd_read_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
+ "expr": "rate(ceph_rbd_read_ops{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Read",
"steppedLine": false,
"targets": [
{
- "expr": "rate(ceph_rbd_write_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
+ "expr": "rate(ceph_rbd_write_bytes{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Write",
"refId": "A"
},
{
- "expr": "rate(ceph_rbd_read_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
+ "expr": "rate(ceph_rbd_read_bytes{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Read",
"steppedLine": false,
"targets": [
{
- "expr": "rate(ceph_rbd_write_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_write_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n",
+ "expr": "rate(ceph_rbd_write_latency_sum{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rbd_write_latency_count{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Write",
"refId": "A"
},
{
- "expr": "rate(ceph_rbd_read_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_read_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n",
+ "expr": "rate(ceph_rbd_read_latency_sum{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rbd_read_latency_count{pool=\"$pool\", image=\"$image\", cluster=~\"$cluster\", }[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Read",
"type": "datasource"
},
{
- "allValue": ".+",
+ "allValue": null,
"current": { },
"datasource": "$datasource",
- "hide": 2,
- "includeAll": true,
+ "hide": 0,
+ "includeAll": false,
"label": "cluster",
- "multi": true,
+ "multi": false,
"name": "cluster",
"options": [ ],
- "query": "label_values(ceph_osd_metadata, cluster)",
- "refresh": 1,
- "regex": "(.*)",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [ ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": ".+",
- "current": { },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": "job",
- "multi": true,
- "name": "job",
- "options": [ ],
- "query": "label_values(ceph_osd_metadata{}, job)",
+ "query": "label_values(ceph_health_status, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"multi": false,
"name": "pool",
"options": [ ],
- "query": "label_values(pool)",
+ "query": "label_values(ceph_rbd_read_ops{cluster=~\"$cluster\", }, pool)",
"refresh": 1,
"regex": "",
"sort": 0,
"multi": false,
"name": "image",
"options": [ ],
- "query": "label_values(image)",
+ "query": "label_values(ceph_rbd_read_ops{cluster=~\"$cluster\", , pool=\"$pool\"}, image)",
"refresh": 1,
"regex": "",
"sort": 0,
"steppedLine": false,
"targets": [
{
- "expr": "round(sum(rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval])))",
+ "expr": "round(sum(rate(ceph_rbd_write_ops{cluster=~\"$cluster\", }[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Writes",
"refId": "A"
},
{
- "expr": "round(sum(rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])))",
+ "expr": "round(sum(rate(ceph_rbd_read_ops{cluster=~\"$cluster\", }[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Reads",
"steppedLine": false,
"targets": [
{
- "expr": "round(sum(rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])))",
+ "expr": "round(sum(rate(ceph_rbd_write_bytes{cluster=~\"$cluster\", }[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Write",
"refId": "A"
},
{
- "expr": "round(sum(rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval])))",
+ "expr": "round(sum(rate(ceph_rbd_read_bytes{cluster=~\"$cluster\", }[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Read",
"steppedLine": false,
"targets": [
{
- "expr": "round(\n sum(rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n",
+ "expr": "round(\n sum(rate(ceph_rbd_write_latency_sum{cluster=~\"$cluster\", }[$__rate_interval])) /\n sum(rate(ceph_rbd_write_latency_count{cluster=~\"$cluster\", }[$__rate_interval]))\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Write",
"refId": "A"
},
{
- "expr": "round(\n sum(rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n",
+ "expr": "round(\n sum(rate(ceph_rbd_read_latency_sum{cluster=~\"$cluster\", }[$__rate_interval])) /\n sum(rate(ceph_rbd_read_latency_count{cluster=~\"$cluster\", }[$__rate_interval]))\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Read",
"styles": "",
"targets": [
{
- "expr": "topk(10,\n (\n sort((\n rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval]) +\n on (image, pool, namespace) rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])\n ))\n )\n)\n",
+ "expr": "topk(10,\n (\n sort((\n rate(ceph_rbd_write_ops{cluster=~\"$cluster\", }[$__rate_interval]) +\n on (image, pool, namespace) rate(ceph_rbd_read_ops{cluster=~\"$cluster\", }[$__rate_interval])\n ))\n )\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"styles": "",
"targets": [
{
- "expr": "topk(10,\n sort(\n sum(\n rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])\n ) by (pool, image, namespace)\n )\n)\n",
+ "expr": "topk(10,\n sort(\n sum(\n rate(ceph_rbd_read_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rbd_write_bytes{cluster=~\"$cluster\", }[$__rate_interval])\n ) by (pool, image, namespace)\n )\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"styles": "",
"targets": [
{
- "expr": "topk(10,\n sum(\n rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]), 1) +\n rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]), 1)\n ) by (pool, image, namespace)\n)\n",
+ "expr": "topk(10,\n sum(\n rate(ceph_rbd_write_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_write_latency_count{cluster=~\"$cluster\", }[$__rate_interval]), 1) +\n rate(ceph_rbd_read_latency_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_read_latency_count{cluster=~\"$cluster\", }[$__rate_interval]), 1)\n ) by (pool, image, namespace)\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"type": "datasource"
},
{
- "allValue": ".+",
+ "allValue": null,
"current": { },
"datasource": "$datasource",
- "hide": 2,
- "includeAll": true,
+ "hide": 0,
+ "includeAll": false,
"label": "cluster",
- "multi": true,
+ "multi": false,
"name": "cluster",
"options": [ ],
- "query": "label_values(ceph_osd_metadata, cluster)",
- "refresh": 1,
- "regex": "(.*)",
- "sort": 1,
- "tagValuesQuery": "",
- "tags": [ ],
- "tagsQuery": "",
- "type": "query",
- "useTags": false
- },
- {
- "allValue": ".+",
- "current": { },
- "datasource": "$datasource",
- "hide": 0,
- "includeAll": true,
- "label": "job",
- "multi": true,
- "name": "job",
- "options": [ ],
- "query": "label_values(ceph_osd_metadata{}, job)",
+ "query": "label_values(ceph_health_status, cluster)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "__auto",
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum\n(ceph_rgw_op_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum\n(ceph_rgw_op_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "__auto",
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "__auto",
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum\n((sum by(instance_id)(ceph_rgw_op_put_obj_bytes) > 0) / (sum by(instance_id)(ceph_rgw_op_put_obj_ops) > 0) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum\n((sum by(instance_id)(ceph_rgw_op_put_obj_bytes) > 0) / (sum by(instance_id)(ceph_rgw_op_put_obj_ops) > 0) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "__auto",
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_list_buckets_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_list_buckets_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "List Buckets",
},
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_del_bucket_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_del_bucket_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Copy Objects",
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Copy Objects",
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_list_buckets_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_list_buckets_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "List Bucket",
},
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_del_bucket_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_del_bucket_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
},
{
"datasource": "${datasource}",
- "expr": "sum(ceph_rgw_op_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum(ceph_rgw_op_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Copy Object",
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "ceph_rgw_op_per_bucket_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
+ "expr": "ceph_rgw_op_per_bucket_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "ceph_rgw_op_per_bucket_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
+ "expr": "ceph_rgw_op_per_bucket_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "ceph_rgw_op_per_bucket_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
+ "expr": "ceph_rgw_op_per_bucket_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "ceph_rgw_op_per_bucket_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
+ "expr": "ceph_rgw_op_per_bucket_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "ceph_rgw_op_per_bucket_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
+ "expr": "ceph_rgw_op_per_bucket_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "ceph_rgw_op_per_user_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
+ "expr": "ceph_rgw_op_per_user_put_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "ceph_rgw_op_per_user_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
+ "expr": "ceph_rgw_op_per_user_get_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "ceph_rgw_op_per_user_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
+ "expr": "ceph_rgw_op_per_user_del_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "ceph_rgw_op_per_user_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
+ "expr": "ceph_rgw_op_per_user_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "ceph_rgw_op_per_user_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
+ "expr": "ceph_rgw_op_per_user_list_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "ceph_rgw_op_per_user_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
+ "expr": "ceph_rgw_op_per_user_put_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "ceph_rgw_op_per_user_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
+ "expr": "ceph_rgw_op_per_user_get_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "ceph_rgw_op_per_user_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
+ "expr": "ceph_rgw_op_per_user_del_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "ceph_rgw_op_per_user_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
+ "expr": "ceph_rgw_op_per_user_copy_obj_lat_sum *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", }",
"format": "table",
"hide": false,
"instant": true,
"targets": [
{
"datasource": "${datasource}",
- "expr": "topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)",
+ "expr": "topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)",
+ "expr": "topk(5, \n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)",
+ "expr": "topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)",
+ "expr": "topk(5,\n sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) ((ceph_rgw_op_per_bucket_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_copy_obj_bytes *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (bucket, ceph_daemon) (ceph_rgw_op_per_bucket_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"targets": [
{
"datasource": "${datasource}",
- "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)\n",
+ "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)\n",
+ "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops ) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)",
+ "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})\n)",
+ "expr": "topk(5, \n sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })\n)",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_bytes) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_get_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_put_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_list_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_del_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"targets": [
{
"datasource": "${datasource}",
- "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (user, ceph_daemon) ((ceph_rgw_op_per_user_copy_obj_ops) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "time_series",
"instant": false,
"intervalFactor": 1,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_put_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_get_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_del_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_copy_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"uid": "${datasource}"
},
"exemplar": false,
- "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"})",
+ "expr": "sum by (user, ceph_daemon) (ceph_rgw_op_per_user_list_obj_ops *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", })",
"format": "table",
"hide": false,
"instant": true,
"regex": "",
"type": "datasource"
},
+ {
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": false,
+ "label": "cluster",
+ "multi": false,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_health_status, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"allValue": null,
"current": { },
"multi": false,
"name": "rgw_servers",
"options": [ ],
- "query": "label_values(ceph_rgw_metadata{}, ceph_daemon)",
+ "query": "label_values(ceph_rgw_metadata{cluster=~\"$cluster\", }, ceph_daemon)",
"refresh": 2,
"regex": "",
"sort": 0,
The workflow of testing would be something like:
# add prometheus query to test
- self.set_expression('bonding_slaves > 0')
+ self.set_expression('node_bonding_slaves > 0')
# add some prometheus input series
- self.add_series('bonding_slaves{master="bond0"}', '2')
- self.add_series('bonding_slaves{master="bond1"}', '3')
+ self.add_series('node_bonding_slaves{master="bond0"}', '2')
+ self.add_series('node_bonding_slaves{master="bond1"}', '3')
self.add_series('node_network_receive_bytes{instance="127.0.0.1",
device="eth1"}', "10 100 230 22")
# expected output of the query
- self.add_exp_samples('bonding_slaves{master="bond0"}', 2)
- self.add_exp_samples('bonding_slaves{master="bond1"}', 3)
+ self.add_exp_samples('node_bonding_slaves{master="bond0"}', 2)
+ self.add_exp_samples('node_bonding_slaves{master="bond1"}', 3)
# at last, always call promtool with:
self.assertTrue(self.run_promtool())
'$osd_hosts', you should change this to a real value. Example:
- > self.set_expression('bonding_slaves{master="$osd_hosts"} > 0')
+ > self.set_expression('node_bonding_slaves{master="$osd_hosts"} > 0')
> self.set_variable('osd_hosts', '127.0.0.1')
> print(self.query)
- > bonding_slaves{master="127.0.0.1"} > 0
+ > node_bonding_slaves{master="127.0.0.1"} > 0
Args:
variable(str): Variable name
Scenario: "Test cluster health"
Given the following series:
| metrics | values |
- | ceph_health_status{} | 1.0 |
- Then Grafana panel `Health Status` with legend `EMPTY` shows:
+ | ceph_health_status{job="ceph",cluster="mycluster"} | 1 |
+ Then Grafana panel `Ceph health status` with legend `EMPTY` shows:
| metrics | values |
- | ceph_health_status{} | 1.0 |
+ | ceph_health_status{job="ceph",cluster="mycluster"} | 1 |
Scenario: "Test Firing Alerts Warning"
Given the following series:
| metrics | values |
- | ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning"} | 1 |
- | ALERTS{alertstate="firing",alertname="Ceph.2", severity="critical"} | 1 |
+ | ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning", cluster="mycluster"} | 1 |
+ | ALERTS{alertstate="firing",alertname="Ceph.1", severity="critical", cluster="someothercluster"} | 1 |
+ | ALERTS{alertstate="firing",alertname="Ceph.2", severity="critical", cluster="mycluster"} | 1 |
Then Grafana panel `Firing Alerts` with legend `Warning` shows:
| metrics | values |
| {} | 1 |
Scenario: "Test Firing Alerts Critical"
Given the following series:
| metrics | values |
- | ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning"} | 1 |
- | ALERTS{alertstate="firing",alertname="Ceph.2", severity="critical"} | 1 |
+ | ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning", cluster="mycluster"} | 1 |
+ | ALERTS{alertstate="firing",alertname="Ceph.1", severity="warning", cluster="someothercluster"} | 1 |
+ | ALERTS{alertstate="firing",alertname="Ceph.2", severity="critical", cluster="mycluster"} | 1 |
Then Grafana panel `Firing Alerts` with legend `Critical` shows:
| metrics | values |
| {} | 1 |
Scenario: "Test Available Capacity"
Given the following series:
| metrics | values |
- | ceph_cluster_total_bytes{}| 100 |
- | ceph_cluster_total_used_bytes{}| 70 |
+ | ceph_cluster_total_bytes{job="ceph",cluster="mycluster"}| 100 |
+ | ceph_cluster_total_used_bytes{job="ceph",cluster="mycluster"}| 70 |
Then Grafana panel `Available Capacity` with legend `EMPTY` shows:
| metrics | values |
- | {} | 0.3 |
+ | {job="ceph",cluster="mycluster"} | 0.3 |
Scenario: "Test Cluster Capacity"
Given the following series:
| metrics | values |
- | ceph_cluster_total_bytes{}| 100 |
+ | ceph_cluster_total_bytes{job="ceph",cluster="mycluster"}| 100 |
Then Grafana panel `Cluster Capacity` with legend `EMPTY` shows:
| metrics | values |
- | ceph_cluster_total_bytes{} | 100 |
+ | ceph_cluster_total_bytes{job="ceph",cluster="mycluster"} | 100 |
Scenario: "Test Used Capacity"
Given the following series:
| metrics | values |
- | ceph_cluster_total_used_bytes{}| 100 |
+ | ceph_cluster_total_used_bytes{job="ceph",cluster="mycluster"}| 100 |
Then Grafana panel `Used Capacity` with legend `EMPTY` shows:
| metrics | values |
- | ceph_cluster_total_used_bytes{} | 100 |
+ | ceph_cluster_total_used_bytes{job="ceph",cluster="mycluster"} | 100 |
Scenario: "Test Write Throughput"
Given the following series:
| metrics | values |
- | ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.0"} | 500 500 500 |
- | ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.1"} | 500 120 110 |
+ | ceph_osd_op_w_in_bytes{job="ceph", cluster="mycluster", osd="osd.0"} | 500 500 500 |
+ | ceph_osd_op_w_in_bytes{job="ceph", cluster="mycluster", osd="osd.1"} | 500 120 110 |
Then Grafana panel `Write Throughput` with legend `EMPTY` shows:
| metrics | values |
| {} | 2 |
Scenario: "Test Write IOPS"
Given the following series:
| metrics | values |
- | ceph_osd_op_w{cluster="mycluster", osd="osd.0"} | 500 500 500 |
- | ceph_osd_op_w{cluster="mycluster", osd="osd.1"} | 500 120 110 |
+ | ceph_osd_op_w{job="ceph",cluster="mycluster", osd="osd.0"} | 500 500 500 |
+ | ceph_osd_op_w{job="ceph",cluster="mycluster", osd="osd.1"} | 500 120 110 |
Then Grafana panel `Write IOPS` with legend `EMPTY` shows:
| metrics | values |
| {} | 2 |
Scenario: "Test Read Throughput"
Given the following series:
| metrics | values |
- | ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.0"} | 500 500 500 |
- | ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.1"} | 500 120 110 |
+ | ceph_osd_op_r_out_bytes{job="ceph", cluster="mycluster", osd="osd.0"} | 500 500 500 |
+ | ceph_osd_op_r_out_bytes{job="ceph", cluster="mycluster", osd="osd.1"} | 500 120 110 |
Then Grafana panel `Read Throughput` with legend `EMPTY` shows:
| metrics | values |
| {} | 2 |
Scenario: "Test Read IOPS"
Given the following series:
| metrics | values |
- | ceph_osd_op_r{cluster="mycluster", osd="osd.0"} | 500 500 500 |
- | ceph_osd_op_r{cluster="mycluster", osd="osd.1"} | 500 120 110 |
+ | ceph_osd_op_r{job="ceph", cluster="mycluster", osd="osd.0"} | 500 500 500 |
+ | ceph_osd_op_r{job="ceph", cluster="mycluster", osd="osd.1"} | 500 120 110 |
Then Grafana panel `Read IOPS` with legend `EMPTY` shows:
| metrics | values |
| {} | 2 |
Scenario: "Test OSDs All"
Given the following series:
| metrics | values |
- | ceph_osd_metadata{cluster="mycluster", osd="osd.0"} | 1 |
- | ceph_osd_metadata{cluster="mycluster", osd="osd.2"} | 1 |
- | ceph_osd_metadata{cluster="mycluster", osd="osd.3"} | 1 |
+ | ceph_osd_metadata{job="ceph", cluster="mycluster", osd="osd.0"} | 1 |
+ | ceph_osd_metadata{job="ceph", cluster="mycluster", osd="osd.2"} | 1 |
+ | ceph_osd_metadata{job="ceph", cluster="mycluster", osd="osd.3"} | 1 |
Then Grafana panel `OSDs` with legend `All` shows:
| metrics | values |
| {} | 3 |
Scenario: "Test OSDs In"
Given the following series:
| metrics | values |
- | ceph_osd_in{cluster="mycluster", osd="osd.0"} | 1 |
- | ceph_osd_in{cluster="mycluster", osd="osd.1"} | 1 |
- | ceph_osd_in{cluster="mycluster", osd="osd.2"} | 1 |
+ | ceph_osd_in{job="ceph", cluster="mycluster", osd="osd.0"} | 1 |
+ | ceph_osd_in{job="ceph", cluster="mycluster", osd="osd.1"} | 1 |
+ | ceph_osd_in{job="ceph", cluster="mycluster", osd="osd.2"} | 1 |
Then Grafana panel `OSDs` with legend `In` shows:
| metrics | values |
| {} | 3 |
Scenario: "Test OSDs Out"
Given the following series:
| metrics | values |
- | ceph_osd_in{cluster="mycluster", osd="osd.0"} | 1 |
- | ceph_osd_in{cluster="mycluster", osd="osd.1"} | 0 |
- | ceph_osd_in{cluster="mycluster", osd="osd.2"} | 0 |
+ | ceph_osd_in{cjob="ceph", cluster="mycluster", osd="osd.0"} | 1 |
+ | ceph_osd_in{job="ceph", cluster="mycluster", osd="osd.1"} | 0 |
+ | ceph_osd_in{job="ceph", cluster="mycluster", osd="osd.2"} | 0 |
Then Grafana panel `OSDs` with legend `Out` shows:
| metrics | values |
| {} | 2 |
Scenario: "Test OSDs Up"
Given the following series:
| metrics | values |
- | ceph_osd_up{cluster="mycluster", osd="osd.0"} | 1 |
- | ceph_osd_up{cluster="mycluster", osd="osd.1"} | 0 |
- | ceph_osd_up{cluster="mycluster", osd="osd.2"} | 0 |
+ | ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.0"} | 1 |
+ | ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.1"} | 0 |
+ | ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.2"} | 0 |
Then Grafana panel `OSDs` with legend `Up` shows:
| metrics | values |
| {} | 1 |
Scenario: "Test OSDs Down"
Given the following series:
| metrics | values |
- | ceph_osd_up{cluster="mycluster", osd="osd.0"} | 1 |
- | ceph_osd_up{cluster="mycluster", osd="osd.1"} | 0 |
- | ceph_osd_up{cluster="mycluster", osd="osd.2"} | 0 |
+ | ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.0"} | 1 |
+ | ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.1"} | 0 |
+ | ceph_osd_up{job="ceph", cluster="mycluster", osd="osd.2"} | 0 |
Then Grafana panel `OSDs` with legend `Down` shows:
| metrics | values |
| {} | 2 |
Scenario: "Test MGRs Standby"
Given the following series:
| metrics | values |
- | ceph_mgr_status{cluster="mycluster", osd="osd.0"} | 1 |
- | ceph_mgr_status{cluster="mycluster", osd="osd.1"} | 0 |
- | ceph_mgr_status{cluster="mycluster", osd="osd.2"} | 0 |
+ | ceph_mgr_status{job="ceph",cluster="mycluster", osd="osd.0"} | 1 |
+ | ceph_mgr_status{job="ceph",cluster="mycluster", osd="osd.1"} | 0 |
+ | ceph_mgr_status{job="ceph",cluster="mycluster", osd="osd.2"} | 0 |
Then Grafana panel `MGRs` with legend `Standby` shows:
| metrics | values |
| {} | 2 |
Scenario: "Test MGRs Active"
Given the following series:
| metrics | values |
- | ceph_mgr_status{cluster="mycluster", osd="osd.0"} | 1 |
- | ceph_mgr_status{cluster="mycluster", osd="osd.1"} | 0 |
+ | ceph_mgr_status{job="ceph",cluster="mycluster", osd="osd.0"} | 1 |
+ | ceph_mgr_status{job="ceph",cluster="mycluster", osd="osd.1"} | 0 |
Then Grafana panel `MGRs` with legend `Active` shows:
| metrics | values |
| {} | 1 |
Scenario: "Test Monitors Total"
Given the following series:
| metrics | values |
- | ceph_mon_quorum_status{cluster="mycluster", osd="osd.0"} | 1 |
- | ceph_mon_quorum_status{cluster="mycluster", osd="osd.1"} | 0 |
- | ceph_mon_quorum_status{cluster="mycluster", osd="osd.2"} | 0 |
+ | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.0"} | 1 |
+ | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.1"} | 0 |
+ | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.2"} | 0 |
Then Grafana panel `Monitors` with legend `Total` shows:
| metrics | values |
| {} | 3 |
Scenario: "Test Monitors In Quorum"
Given the following series:
| metrics | values |
- | ceph_mon_quorum_status{cluster="mycluster", osd="osd.0"} | 1 |
- | ceph_mon_quorum_status{cluster="mycluster", osd="osd.1"} | 0 |
- | ceph_mon_quorum_status{cluster="mycluster", osd="osd.2"} | 0 |
+ | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.0"} | 1 |
+ | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.1"} | 0 |
+ | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.2"} | 0 |
Then Grafana panel `Monitors` with legend `In Quorum` shows:
| metrics | values |
| {} | 1 |
Scenario: "Test Monitors out of Quorum"
Given the following series:
| metrics | values |
- | ceph_mon_quorum_status{cluster="mycluster", osd="osd.0"} | 1 |
- | ceph_mon_quorum_status{cluster="mycluster", osd="osd.1"} | 0 |
- | ceph_mon_quorum_status{cluster="mycluster", osd="osd.2"} | 0 |
+ | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.0"} | 1 |
+ | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.1"} | 0 |
+ | ceph_mon_quorum_status{job="ceph",cluster="mycluster", osd="osd.2"} | 0 |
Then Grafana panel `Monitors` with legend `MONs out of Quorum` shows:
| metrics | values |
| {} | 2 |
Scenario: "Test Total Capacity"
Given the following series:
| metrics | values |
- | ceph_cluster_total_bytes{cluster="mycluster", osd="osd.0"} | 100 |
+ | ceph_cluster_total_bytes{job="ceph",cluster="mycluster", osd="osd.0"} | 100 |
Then Grafana panel `Capacity` with legend `Total Capacity` shows:
| metrics | values |
- | ceph_cluster_total_bytes{cluster="mycluster", osd="osd.0"} | 100 |
+ | ceph_cluster_total_bytes{job="ceph", cluster="mycluster", osd="osd.0"} | 100 |
Scenario: "Test Used Capacity"
Given the following series:
| metrics | values |
- | ceph_cluster_total_used_bytes{cluster="mycluster", osd="osd.0"} | 100 |
+ | ceph_cluster_total_used_bytes{job="ceph",cluster="mycluster", osd="osd.0"} | 100 |
Then Grafana panel `Capacity` with legend `Used` shows:
| metrics | values |
- | ceph_cluster_total_used_bytes{cluster="mycluster", osd="osd.0"} | 100 |
+ | ceph_cluster_total_used_bytes{job="ceph",cluster="mycluster", osd="osd.0"} | 100 |
Scenario: "Test Cluster Throughput Write"
Given the following series:
| metrics | values |
- | ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.0"} | 1000 1000|
- | ceph_osd_op_w_in_bytes{cluster="mycluster", osd="osd.1"} | 2000 1500 |
+ | ceph_osd_op_w_in_bytes{job="ceph",cluster="mycluster", osd="osd.0"} | 1000 1000|
+ | ceph_osd_op_w_in_bytes{job="ceph",cluster="mycluster", osd="osd.1"} | 2000 1500 |
Then Grafana panel `Cluster Throughput` with legend `Write` shows:
| metrics | values |
| {} | 25 |
Scenario: "Test Cluster Throughput Read"
Given the following series:
| metrics | values |
- | ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.0"} | 1000 1000|
- | ceph_osd_op_r_out_bytes{cluster="mycluster", osd="osd.1"} | 2000 1500 |
+ | ceph_osd_op_r_out_bytes{job="ceph",cluster="mycluster", osd="osd.0"} | 1000 1000|
+ | ceph_osd_op_r_out_bytes{job="ceph",cluster="mycluster", osd="osd.1"} | 2000 1500 |
Then Grafana panel `Cluster Throughput` with legend `Read` shows:
| metrics | values |
| {} | 25 |
Scenario: "Test IOPS Read"
Given the following series:
| metrics | values |
- | ceph_osd_op_r{cluster="mycluster", osd="osd.0"} | 1000 1000|
- | ceph_osd_op_r{cluster="mycluster", osd="osd.1"} | 2000 1500 |
+ | ceph_osd_op_r{job="ceph",cluster="mycluster", osd="osd.0"} | 1000 1000|
+ | ceph_osd_op_r{job="ceph",cluster="mycluster", osd="osd.1"} | 2000 1500 |
Then Grafana panel `IOPS` with legend `Read` shows:
| metrics | values |
| {} | 25 |
Scenario: "Test IOPS Write"
Given the following series:
| metrics | values |
- | ceph_osd_op_w{cluster="mycluster", osd="osd.0"} | 1000 1000|
- | ceph_osd_op_w{cluster="mycluster", osd="osd.1"} | 2000 1500 |
+ | ceph_osd_op_w{job="ceph",cluster="mycluster", osd="osd.0"} | 1000 1000|
+ | ceph_osd_op_w{job="ceph",cluster="mycluster", osd="osd.1"} | 2000 1500 |
Then Grafana panel `IOPS` with legend `Write` shows:
| metrics | values |
| {} | 25 |
Scenario: "Test Pool Used Bytes"
Given the following series:
| metrics | values |
- | ceph_pool_bytes_used{cluster="mycluster", pool_id="1"} | 10000 |
- | ceph_pool_bytes_used{cluster="mycluster", pool_id="2"} | 20000 |
- | ceph_pool_bytes_used{cluster="mycluster", pool_id="3"} | 30000 |
- | ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 2000 |
- | ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 4000 |
- | ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 6000 |
+ | ceph_pool_bytes_used{job="ceph", cluster="mycluster", pool_id="1"} | 10000 |
+ | ceph_pool_bytes_used{job="ceph", cluster="mycluster", pool_id="2"} | 20000 |
+ | ceph_pool_bytes_used{job="ceph", cluster="mycluster", pool_id="3"} | 30000 |
+ | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="1", name="pool1"} | 2000 |
+ | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="2", name="pool2"} | 4000 |
+ | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="3", name="pool3"} | 6000 |
Then Grafana panel `Pool Used Bytes` with legend `{{name}}` shows:
| metrics | values |
- | {cluster="mycluster", name="pool1", pool_id="1"} | 20000000 |
- | {cluster="mycluster", name="pool2", pool_id="2"} | 80000000 |
- | {cluster="mycluster", name="pool3", pool_id="3"} | 180000000 |
+ | {job="ceph", cluster="mycluster", name="pool1", pool_id="1"} | 20000000 |
+ | {job="ceph", cluster="mycluster", name="pool2", pool_id="2"} | 80000000 |
+ | {job="ceph", cluster="mycluster", name="pool3", pool_id="3"} | 180000000 |
Scenario: "Test Pool Used RAW Bytes"
Given the following series:
| metrics | values |
- | ceph_pool_stored_raw{cluster="mycluster", pool_id="1"} | 10000 |
- | ceph_pool_stored_raw{cluster="mycluster", pool_id="2"} | 20000 |
- | ceph_pool_stored_raw{cluster="mycluster", pool_id="3"} | 30000 |
- | ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 2000 |
- | ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 4000 |
- | ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 6000 |
+ | ceph_pool_stored_raw{job="ceph", cluster="mycluster", pool_id="1"} | 10000 |
+ | ceph_pool_stored_raw{job="ceph", cluster="mycluster", pool_id="2"} | 20000 |
+ | ceph_pool_stored_raw{job="ceph", cluster="mycluster", pool_id="3"} | 30000 |
+ | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="1", name="pool1"} | 2000 |
+ | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="2", name="pool2"} | 4000 |
+ | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="3", name="pool3"} | 6000 |
Then Grafana panel `Pool Used RAW Bytes` with legend `{{name}}` shows:
| metrics | values |
- | {cluster="mycluster", name="pool1", pool_id="1"} | 20000000 |
- | {cluster="mycluster", name="pool2", pool_id="2"} | 80000000 |
- | {cluster="mycluster", name="pool3", pool_id="3"} | 180000000 |
+ | {job="ceph", cluster="mycluster", name="pool1", pool_id="1"} | 20000000 |
+ | {job="ceph", cluster="mycluster", name="pool2", pool_id="2"} | 80000000 |
+ | {job="ceph", cluster="mycluster", name="pool3", pool_id="3"} | 180000000 |
Scenario: "Test Pool Objects Quota"
Given the following series:
| metrics | values |
- | ceph_pool_quota_objects{cluster="mycluster", pool_id="1"} | 10 |
- | ceph_pool_quota_objects{cluster="mycluster", pool_id="2"} | 20 |
- | ceph_pool_quota_objects{cluster="mycluster", pool_id="3"} | 30 |
- | ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 10 |
- | ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 15 |
- | ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 15 |
+ | ceph_pool_quota_objects{job="ceph", cluster="mycluster", pool_id="1"} | 10 |
+ | ceph_pool_quota_objects{job="ceph", cluster="mycluster", pool_id="2"} | 20 |
+ | ceph_pool_quota_objects{job="ceph", cluster="mycluster", pool_id="3"} | 30 |
+ | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="1", name="pool1"} | 10 |
+ | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="2", name="pool2"} | 15 |
+ | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="3", name="pool3"} | 15 |
Then Grafana panel `Pool Objects Quota` with legend `{{name}}` shows:
| metrics | values |
- | {cluster="mycluster", name="pool1", pool_id="1"} | 100 |
- | {cluster="mycluster", name="pool2", pool_id="2"} | 300 |
- | {cluster="mycluster", name="pool3", pool_id="3"} | 450|
+ | {job="ceph", cluster="mycluster", name="pool1", pool_id="1"} | 100 |
+ | {job="ceph", cluster="mycluster", name="pool2", pool_id="2"} | 300 |
+ | {job="ceph", cluster="mycluster", name="pool3", pool_id="3"} | 450|
Scenario: "Test Pool Quota Bytes"
Given the following series:
| metrics | values |
- | ceph_pool_quota_bytes{cluster="mycluster", pool_id="1"} | 100 |
- | ceph_pool_quota_bytes{cluster="mycluster", pool_id="2"} | 200 |
- | ceph_pool_quota_bytes{cluster="mycluster", pool_id="3"} | 300 |
- | ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 100 |
- | ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 150 |
- | ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 150 |
+ | ceph_pool_quota_bytes{job="ceph", cluster="mycluster", pool_id="1"} | 100 |
+ | ceph_pool_quota_bytes{job="ceph", cluster="mycluster", pool_id="2"} | 200 |
+ | ceph_pool_quota_bytes{job="ceph", cluster="mycluster", pool_id="3"} | 300 |
+ | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="1", name="pool1"} | 100 |
+ | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="2", name="pool2"} | 150 |
+ | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="3", name="pool3"} | 150 |
Then Grafana panel `Pool Quota Bytes` with legend `{{name}}` shows:
| metrics | values |
- | {cluster="mycluster", name="pool1", pool_id="1"} | 10000 |
- | {cluster="mycluster", name="pool2", pool_id="2"} | 30000 |
- | {cluster="mycluster", name="pool3", pool_id="3"} | 45000 |
+ | {job="ceph", cluster="mycluster", name="pool1", pool_id="1"} | 10000 |
+ | {job="ceph", cluster="mycluster", name="pool2", pool_id="2"} | 30000 |
+ | {job="ceph", cluster="mycluster", name="pool3", pool_id="3"} | 45000 |
Scenario: "Test Objects Per Pool"
Given the following series:
| metrics | values |
- | ceph_pool_objects{cluster="mycluster", pool_id="1"} | 100 |
- | ceph_pool_objects{cluster="mycluster", pool_id="2"} | 200 |
- | ceph_pool_objects{cluster="mycluster", pool_id="3"} | 300 |
- | ceph_pool_metadata{cluster="mycluster", pool_id="1", name="pool1"} | 100 |
- | ceph_pool_metadata{cluster="mycluster", pool_id="2", name="pool2"} | 150 |
- | ceph_pool_metadata{cluster="mycluster", pool_id="3", name="pool3"} | 150 |
+ | ceph_pool_objects{job="ceph", cluster="mycluster", pool_id="1"} | 100 |
+ | ceph_pool_objects{job="ceph", cluster="mycluster", pool_id="2"} | 200 |
+ | ceph_pool_objects{job="ceph", cluster="mycluster", pool_id="3"} | 300 |
+ | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="1", name="pool1"} | 100 |
+ | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="2", name="pool2"} | 150 |
+ | ceph_pool_metadata{job="ceph", cluster="mycluster", pool_id="3", name="pool3"} | 150 |
Then Grafana panel `Objects Per Pool` with legend `{{name}}` shows:
| metrics | values |
- | {cluster="mycluster", name="pool1", pool_id="1"} | 10000 |
- | {cluster="mycluster", name="pool2", pool_id="2"} | 30000 |
- | {cluster="mycluster", name="pool3", pool_id="3"} | 45000|
+ | {job="ceph", cluster="mycluster", name="pool1", pool_id="1"} | 10000 |
+ | {job="ceph", cluster="mycluster", name="pool2", pool_id="2"} | 30000 |
+ | {job="ceph", cluster="mycluster", name="pool3", pool_id="3"} | 45000|
Scenario: "Test OSD Type Count"
Given the following series:
| metrics | values |
- | ceph_pool_objects{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pool_objects{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pool_objects{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pool_objects{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `OSD Type Count` with legend `Total` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test PGs State Backfill Toofull"
Given the following series:
| metrics | values |
- | ceph_pg_backfill_toofull{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_backfill_toofull{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_backfill_toofull{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_backfill_toofull{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `PGs State` with legend `Backfill Toofull` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test PGs State Remapped"
Given the following series:
| metrics | values |
- | ceph_pg_remapped{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_remapped{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_remapped{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_remapped{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `PGs State` with legend `Remapped` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test PGs State Backfill"
Given the following series:
| metrics | values |
- | ceph_pg_backfill{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_backfill{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_backfill{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_backfill{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `PGs State` with legend `Backfill` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test PGs State Peered"
Given the following series:
| metrics | values |
- | ceph_pg_peered{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_peered{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_peered{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_peered{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `PGs State` with legend `Peered` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test PGs State Down"
Given the following series:
| metrics | values |
- | ceph_pg_down{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_down{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_down{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_down{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `PGs State` with legend `Down` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test PGs State Repair"
Given the following series:
| metrics | values |
- | ceph_pg_repair{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_repair{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_repair{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_repair{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `PGs State` with legend `Repair` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test PGs State Recovering"
Given the following series:
| metrics | values |
- | ceph_pg_recovering{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_recovering{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_recovering{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_recovering{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `PGs State` with legend `Recovering` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test PGs State Deep"
Given the following series:
| metrics | values |
- | ceph_pg_deep{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_deep{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_deep{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_deep{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `PGs State` with legend `Deep` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test PGs State Wait Backfill"
Given the following series:
| metrics | values |
- | ceph_pg_wait_backfill{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_wait_backfill{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_wait_backfill{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_wait_backfill{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `PGs State` with legend `Wait Backfill` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test PGs State Creating"
Given the following series:
| metrics | values |
- | ceph_pg_creating{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_creating{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_creating{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_creating{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `PGs State` with legend `Creating` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test PGs State Forced Recovery"
Given the following series:
| metrics | values |
- | ceph_pg_forced_recovery{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_forced_recovery{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_forced_recovery{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_forced_recovery{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `PGs State` with legend `Forced Recovery` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test PGs State Forced Backfill"
Given the following series:
| metrics | values |
- | ceph_pg_forced_backfill{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_forced_backfill{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_forced_backfill{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_forced_backfill{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `PGs State` with legend `Forced Backfill` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test PGs State Incomplete"
Given the following series:
| metrics | values |
- | ceph_pg_incomplete{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_incomplete{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_incomplete{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_incomplete{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `PGs State` with legend `Incomplete` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test PGs State Undersized"
Given the following series:
| metrics | values |
- | ceph_pg_undersized{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_undersized{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_undersized{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_undersized{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `PGs State` with legend `Undersized` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test Stuck PGs Undersized"
Given the following series:
| metrics | values |
- | ceph_pg_undersized{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_undersized{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_undersized{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_undersized{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `Stuck PGs` with legend `Undersized` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test Stuck PGs Stale"
Given the following series:
| metrics | values |
- | ceph_pg_stale{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_stale{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_stale{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_stale{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `Stuck PGs` with legend `Stale` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test Stuck PGs Degraded"
Given the following series:
| metrics | values |
- | ceph_pg_degraded{cluster="mycluster", osd="osd.0"} | 10 |
- | ceph_pg_degraded{cluster="mycluster", osd="osd.1"} | 20 |
+ | ceph_pg_degraded{job="ceph", cluster="mycluster", osd="osd.0"} | 10 |
+ | ceph_pg_degraded{job="ceph", cluster="mycluster", osd="osd.1"} | 20 |
Then Grafana panel `Stuck PGs` with legend `Degraded` shows:
| metrics | values |
| {} | 30 |
Scenario: "Test Recovery Operations"
Given the following series:
| metrics | values |
- | ceph_osd_recovery_ops{cluster="mycluster", osd="osd.0"}| 250 200 |
- | ceph_osd_recovery_ops{cluster="mycluster", osd="osd.1"} | 800 100 |
+ | ceph_osd_recovery_ops{job="ceph", cluster="mycluster", osd="osd.0"}| 250 200 |
+ | ceph_osd_recovery_ops{job="ceph", cluster="mycluster", osd="osd.1"} | 800 100 |
When variable `interval` is `120s`
Then Grafana panel `Recovery Operations` with legend `OPS` shows:
| metrics | values |
Scenario: "Test Ceph Versions OSD"
Given the following series:
| metrics | values |
- | ceph_osd_metadata{cluster="mycluster", osd="osd.0"}| 17 |
+ | ceph_osd_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", ceph_daemon="osd.0", device_class="ssd"} | 1 |
+ | ceph_osd_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", ceph_daemon="osd.1", device_class="hdd"} | 1 |
Then Grafana panel `Ceph Versions` with legend `OSD Services` shows:
| metrics | values |
- | {} | 1 |
+ | {ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)"} | 1 |
+ | {ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)"} | 1 |
Scenario: "Test Ceph Versions Mon"
Given the following series:
| metrics | values |
- | ceph_mon_metadata{cluster="mycluster", osd="osd.0"}| 17 |
+ | ceph_mon_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", hostname="somehostname"}| 1 |
+ | ceph_mon_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", hostname="someotherhostname"}| 1 |
Then Grafana panel `Ceph Versions` with legend `Mon Services` shows:
| metrics | values |
- | {} | 1 |
+ | {ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)"} | 1 |
+ | {ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)"} | 1 |
Scenario: "Test Ceph Versions MDS"
Given the following series:
| metrics | values |
- | ceph_mds_metadata{cluster="mycluster", osd="osd.0"}| 17 |
+ | ceph_mds_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", hostname="someotherhostname", ceph_daemon="mds.someotherhostname",fs_id="1"}| 1 |
+ | ceph_mds_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", hostname="somehostname", ceph_daemon="mds.somehostname",fs_id="1"}| 1 |
Then Grafana panel `Ceph Versions` with legend `MDS Services` shows:
| metrics | values |
- | {} | 1 |
+ | {ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)"} | 1 |
+ | {ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)"} | 1 |
Scenario: "Test Ceph Versions RGW"
Given the following series:
| metrics | values |
- | ceph_rgw_metadata{cluster="mycluster", osd="osd.0"}| 17 |
+ | ceph_rgw_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", ceph_daemon="rgw.somehostname", hostname="somehostname"}| 1 |
+ | ceph_rgw_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", ceph_daemon="rgw.someotherhostname", hostname="someotherhostname"}| 1 |
Then Grafana panel `Ceph Versions` with legend `RGW Services` shows:
| metrics | values |
- | {} | 1 |
+ | {ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)"} | 1 |
+ | {ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)"} | 1 |
Scenario: "Test Ceph Versions MGR"
Given the following series:
| metrics | values |
- | ceph_mgr_metadata{cluster="mycluster", osd="osd.0"}| 17 |
+ | ceph_mgr_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", ceph_daemon="mgr.somehostname", hostname="somehostname"}| 1 |
+ | ceph_mgr_metadata{job="ceph", cluster="mycluster", ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", ceph_daemon="mgr.someotherhostname", hostname="someotherhostname"}| 1 |
Then Grafana panel `Ceph Versions` with legend `MGR Services` shows:
| metrics | values |
- | {} | 1 |
\ No newline at end of file
+ | {ceph_version="ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)"} | 1 |
+ | {ceph_version="ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)"} | 1 |
legend = ''
query_id = panel_name + '-' + legend
if query_id not in global_context.query_map:
+ print(f"QueryMap: {global_context.query_map}")
raise KeyError((f'Query with legend {legend} in panel "{panel_name}"'
'couldn\'t be found'))
Scenario: "Test OSD"
Given the following series:
| metrics | values |
- | ceph_osd_metadata{job="ceph",back_iface="",ceph_daemon="osd.0",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
- | ceph_osd_metadata{job="ceph",back_iface="",ceph_daemon="osd.1",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
- | ceph_osd_metadata{job="ceph",back_iface="",ceph_daemon="osd.2",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
+ | ceph_osd_metadata{job="ceph",cluster="mycluster",back_iface="",ceph_daemon="osd.0",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
+ | ceph_osd_metadata{job="ceph",cluster="mycluster",back_iface="",ceph_daemon="osd.1",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
+ | ceph_osd_metadata{job="ceph",cluster="mycluster",back_iface="",ceph_daemon="osd.2",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
When variable `ceph_hosts` is `127.0.0.1`
Then Grafana panel `OSDs` with legend `EMPTY` shows:
| metrics | values |
Scenario: "Test Disk IOPS - Writes - Several OSDs per device"
Given the following series:
| metrics | values |
- | node_disk_writes_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
- | node_disk_writes_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | node_disk_writes_completed_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_writes_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `ceph_hosts` is `localhost`
Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) writes` shows:
| metrics | values |
- | {job="ceph",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 |
- | {job="ceph",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 |
+ | {job="node",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 |
+ | {job="node",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 |
Scenario: "Test Disk IOPS - Writes - Single OSD per device"
Given the following series:
| metrics | values |
- | node_disk_writes_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
- | node_disk_writes_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | node_disk_writes_completed_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_writes_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `ceph_hosts` is `localhost`
Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) writes` shows:
| metrics | values |
- | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
- | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 |
+ | {job="node", ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
+ | {job="node", ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 |
Scenario: "Test Disk IOPS - Reads - Several OSDs per device"
Given the following series:
| metrics | values |
- | node_disk_reads_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
- | node_disk_reads_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | node_disk_reads_completed_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_reads_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `ceph_hosts` is `localhost`
Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) reads` shows:
| metrics | values |
- | {job="ceph",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 |
- | {job="ceph",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 |
+ | {job="node",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 |
+ | {job="node",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 |
Scenario: "Test Disk IOPS - Reads - Single OSD per device"
Given the following series:
| metrics | values |
- | node_disk_reads_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
- | node_disk_reads_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | node_disk_reads_completed_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_reads_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `ceph_hosts` is `localhost`
Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) reads` shows:
| metrics | values |
- | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
- | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 |
+ | {job="node",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
+ | {job="node",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 |
# IOPS Panel - end
Scenario: "Test disk throughput - read"
Given the following series:
| metrics | values |
- | node_disk_read_bytes_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
- | node_disk_read_bytes_total{job="ceph",device="sdb",instance="localhost:9100"} | 100+600x1 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | node_disk_read_bytes_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_read_bytes_total{job="node",device="sdb",instance="localhost:9100"} | 100+600x1 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `ceph_hosts` is `localhost`
Then Grafana panel `$ceph_hosts Throughput by Disk` with legend `{{device}}({{ceph_daemon}}) read` shows:
| metrics | values |
- | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
- | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 |
+ | {job="node",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
+ | {job="node",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 |
Scenario: "Test disk throughput - write"
Given the following series:
| metrics | values |
- | node_disk_written_bytes_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
- | node_disk_written_bytes_total{job="ceph",device="sdb",instance="localhost:9100"} | 100+600x1 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | node_disk_written_bytes_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_written_bytes_total{job="node",device="sdb",instance="localhost:9100"} | 100+600x1 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `ceph_hosts` is `localhost`
Then Grafana panel `$ceph_hosts Throughput by Disk` with legend `{{device}}({{ceph_daemon}}) write` shows:
| metrics | values |
- | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
- | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 |
+ | {job="node",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
+ | {job="node",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 |
# Node disk bytes written/read panel - end
Scenario: "Test $ceph_hosts Disk Latency panel"
Given the following series:
| metrics | values |
- | node_disk_write_time_seconds_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
- | node_disk_write_time_seconds_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
- | node_disk_writes_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
- | node_disk_writes_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
- | node_disk_read_time_seconds_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
- | node_disk_read_time_seconds_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
- | node_disk_reads_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
- | node_disk_reads_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | node_disk_write_time_seconds_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_write_time_seconds_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_writes_completed_total{job="ndoe",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_writes_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_read_time_seconds_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_read_time_seconds_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_reads_completed_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_reads_completed_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `ceph_hosts` is `localhost`
Then Grafana panel `$ceph_hosts Disk Latency` with legend `{{device}}({{ceph_daemon}})` shows:
| metrics | values |
Scenario: "Test $ceph_hosts Disk utilization"
Given the following series:
| metrics | values |
- | node_disk_io_time_seconds_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
- | node_disk_io_time_seconds_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | node_disk_io_time_seconds_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_io_time_seconds_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `ceph_hosts` is `localhost`
Then Grafana panel `$ceph_hosts Disk utilization` with legend `{{device}}({{ceph_daemon}})` shows:
| metrics | values |
- | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 100 |
- | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 100 |
+ | {job="node",ceph_daemon="osd.0", device="sda", instance="localhost"} | 100 |
+ | {job="node",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 100 |
Scenario: "Test network load succeeds"
Given the following series:
| metrics | values |
- | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
- | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
- | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
- | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_receive_bytes{job="node",instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_receive_bytes{job="node",instance="127.0.0.1", device="eth2"} | 10 100 |
+ | node_network_transmit_bytes{job="node",instance="127.0.0.1", device="eth1"} | 10 100 |
+ | node_network_transmit_bytes{job="node",instance="127.0.0.1", device="eth2"} | 10 100 |
When variable `osd_hosts` is `127.0.0.1`
Then Grafana panel `Network Load` with legend `EMPTY` shows:
| metrics | values |
Scenario: "Test network load with bonding succeeds"
Given the following series:
| metrics | values |
- | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 |
- | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 |
- | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 |
- | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 |
- | node_network_transmit_bytes{instance="127.0.0.1", device="bond0"} | 20 200 300 |
- | node_network_transmit_bytes{instance="127.0.0.1", device="bond0"} | 20 200 300 |
- | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+ | node_network_receive_bytes{job="node",instance="127.0.0.1", device="eth1"} | 10 100 200 |
+ | node_network_receive_bytes{job="node",instance="127.0.0.1", device="eth2"} | 10 100 200 |
+ | node_network_transmit_bytes{job="node",instance="127.0.0.1", device="eth1"} | 10 100 200 |
+ | node_network_transmit_bytes{job="node",instance="127.0.0.1", device="eth2"} | 10 100 200 |
+ | node_network_transmit_bytes{job="node",instance="127.0.0.1", device="bond0"} | 20 200 300 |
+ | node_network_transmit_bytes{job="node",instance="127.0.0.1", device="bond0"} | 20 200 300 |
+ | node_bonding_slaves{job="node",instance="127.0.0.1", master="bond0"} | 2 |
When variable `osd_hosts` is `127.0.0.1`
Then Grafana panel `Network Load` with legend `EMPTY` shows:
| metrics | values |
Scenario: "Test AVG Disk Utilization"
Given the following series:
| metrics | values |
- | node_disk_io_time_seconds_total{device="sda",instance="localhost:9100"} | 10+60x1 |
- | node_disk_io_time_seconds_total{device="sdb",instance="localhost:9100"} | 10+60x1 |
- | node_disk_io_time_seconds_total{device="sdc",instance="localhost:9100"} | 10 2000 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | node_disk_io_time_seconds_total{job="node",device="sda",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_io_time_seconds_total{job="node",device="sdb",instance="localhost:9100"} | 10+60x1 |
+ | node_disk_io_time_seconds_total{job="node",device="sdc",instance="localhost:9100"} | 10 2000 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `osd_hosts` is `localhost`
Then Grafana panel `AVG Disk Utilization` with legend `EMPTY` shows:
| metrics | values |
| node_disk_reads_completed_total{device="sdb",instance="localhost"} | 10 60 |
| node_disk_read_time_seconds_total{device="sda",instance="localhost"} | 100 600 |
| node_disk_read_time_seconds_total{device="sdb",instance="localhost"} | 100 600 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `osd` is `osd.0`
Then Grafana panel `Physical Device Latency for $osd` with legend `{{instance}}/{{device}} Reads` shows:
| metrics | values |
| node_disk_writes_completed_total{device="sdb",instance="localhost"} | 10 60 |
| node_disk_write_time_seconds_total{device="sda",instance="localhost"} | 100 600 |
| node_disk_write_time_seconds_total{device="sdb",instance="localhost"} | 100 600 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `osd` is `osd.0`
Then Grafana panel `Physical Device Latency for $osd` with legend `{{instance}}/{{device}} Writes` shows:
| metrics | values |
| metrics | values |
| node_disk_writes_completed_total{device="sda",instance="localhost"} | 10 100 |
| node_disk_writes_completed_total{device="sdb",instance="localhost"} | 10 100 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `osd` is `osd.0`
Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Writes` shows:
| metrics | values |
| metrics | values |
| node_disk_reads_completed_total{device="sda",instance="localhost"} | 10 100 |
| node_disk_reads_completed_total{device="sdb",instance="localhost"} | 10 100 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `osd` is `osd.0`
Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Reads` shows:
| metrics | values |
| metrics | values |
| node_disk_reads_completed_total{device="sda",instance="localhost"} | 10 100 |
| node_disk_reads_completed_total{device="sdb",instance="localhost"} | 10 100 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `osd` is `osd.0`
Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Reads` shows:
| metrics | values |
| metrics | values |
| node_disk_writes_completed_total{device="sda",instance="localhost"} | 10 100 |
| node_disk_writes_completed_total{device="sdb",instance="localhost"} | 10 100 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `osd` is `osd.0`
Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Writes` shows:
| metrics | values |
Given the following series:
| metrics | values |
| node_disk_io_time_seconds_total{device="sda",instance="localhost:9100"} | 10 100 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
- | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+ | ceph_disk_occupation_human{job="ceph",cluster="mycluster",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
When variable `osd` is `osd.0`
Then Grafana panel `Physical Device Util% for $osd` with legend `{{device}} on {{instance}}` shows:
| metrics | values |
Scenario: "Test OSD onode Hits Ratio"
Given the following series:
| metrics | values |
- | ceph_bluestore_onode_hits{ceph_daemon="osd.0",instance="ceph:9283",job="ceph"} | 5255 |
- | ceph_bluestore_onode_hits{ceph_daemon="osd.1",instance="ceph:9283",job="ceph"} | 5419 |
- | ceph_bluestore_onode_hits{ceph_daemon="osd.2",instance="ceph:9283",job="ceph"} | 5242 |
- | ceph_bluestore_onode_misses{ceph_daemon="osd.0",instance="ceph:9283",job="ceph"} | 202 |
- | ceph_bluestore_onode_misses{ceph_daemon="osd.1",instance="ceph:9283",job="ceph"} | 247 |
- | ceph_bluestore_onode_misses{ceph_daemon="osd.2",instance="ceph:9283",job="ceph"} | 234 |
+ | ceph_bluestore_onode_hits{ceph_daemon="osd.0",instance="ceph:9283",job="ceph",cluster="mycluster"} | 5255 |
+ | ceph_bluestore_onode_hits{ceph_daemon="osd.1",instance="ceph:9283",job="ceph",cluster="mycluster"} | 5419 |
+ | ceph_bluestore_onode_hits{ceph_daemon="osd.2",instance="ceph:9283",job="ceph",cluster="mycluster"} | 5242 |
+ | ceph_bluestore_onode_misses{ceph_daemon="osd.0",instance="ceph:9283",job="ceph",cluster="mycluster"} | 202 |
+ | ceph_bluestore_onode_misses{ceph_daemon="osd.1",instance="ceph:9283",job="ceph",cluster="mycluster"} | 247 |
+ | ceph_bluestore_onode_misses{ceph_daemon="osd.2",instance="ceph:9283",job="ceph",cluster="mycluster"} | 234 |
Then Grafana panel `OSD onode Hits Ratio` with legend `EMPTY` shows:
| metrics | values |
| {} | 9.588529429483704E-01 |
Scenario: "Test $rgw_servers GET/PUT Latencies - GET"
Given the following series:
| metrics | values |
- | ceph_rgw_op_get_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 50 100 |
- | ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 20 60 80 |
- | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ | ceph_rgw_op_get_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 10 50 100 |
+ | ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 20 60 80 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
And variable `rgw_servers` is `rgw.foo`
Then Grafana panel `$rgw_servers GET/PUT Latencies` with legend `GET {{ceph_daemon}}` shows:
Scenario: "Test $rgw_servers GET/PUT Latencies - PUT"
Given the following series:
| metrics | values |
- | ceph_rgw_op_put_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 15 35 55 |
- | ceph_rgw_op_put_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 30 50 |
- | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ | ceph_rgw_op_put_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 15 35 55 |
+ | ceph_rgw_op_put_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 10 30 50 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
And variable `rgw_servers` is `rgw.foo`
Then Grafana panel `$rgw_servers GET/PUT Latencies` with legend `PUT {{ceph_daemon}}` shows:
Scenario: "Test Bandwidth by HTTP Operation - GET"
Given the following series:
| metrics | values |
- | ceph_rgw_op_get_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 |
- | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 |
+ | ceph_rgw_op_get_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 10 50 100 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
And variable `rgw_servers` is `rgw.1`
Then Grafana panel `Bandwidth by HTTP Operation` with legend `GETs {{ceph_daemon}}` shows:
| metrics | values |
- | {ceph_daemon="rgw.1", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1.5 |
+ | {ceph_daemon="rgw.1", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 1.5 |
Scenario: "Test Bandwidth by HTTP Operation - PUT"
Given the following series:
| metrics | values |
- | ceph_rgw_op_put_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 5 20 50 |
- | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 |
+ | ceph_rgw_op_put_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 5 20 50 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
And variable `rgw_servers` is `rgw.1`
Then Grafana panel `Bandwidth by HTTP Operation` with legend `PUTs {{ceph_daemon}}` shows:
| metrics | values |
- | {ceph_daemon="rgw.1", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 7.5E-01 |
+ | {ceph_daemon="rgw.1", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 7.5E-01 |
Scenario: "Test HTTP Request Breakdown - Requests Failed"
Given the following series:
| metrics | values |
- | ceph_rgw_failed_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 5 7 |
- | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ | ceph_rgw_failed_req{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 5 7 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
And variable `rgw_servers` is `rgw.foo`
Then Grafana panel `HTTP Request Breakdown` with legend `Requests Failed {{ceph_daemon}}` shows:
| metrics | values |
- | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1E-01 |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1E-01 |
Scenario: "Test HTTP Request Breakdown - GET"
Given the following series:
| metrics | values |
- | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 |
- | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 100 150 170 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
And variable `rgw_servers` is `rgw.foo`
Then Grafana panel `HTTP Request Breakdown` with legend `GETs {{ceph_daemon}}` shows:
| metrics | values |
- | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.1666666666666667 |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1.1666666666666667 |
Scenario: "Test HTTP Request Breakdown - PUT"
Given the following series:
| metrics | values |
- | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 |
- | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 70 90 160 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
And variable `rgw_servers` is `rgw.foo`
Then Grafana panel `HTTP Request Breakdown` with legend `PUTs {{ceph_daemon}}` shows:
| metrics | values |
- | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.5 |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1.5 |
Scenario: "Test HTTP Request Breakdown - Other"
Given the following series:
| metrics | values |
- | ceph_rgw_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 175 250 345 |
- | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 |
- | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 |
- | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ | ceph_rgw_req{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 175 250 345 |
+ | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 100 150 170 |
+ | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 70 90 160 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
And variable `rgw_servers` is `rgw.foo`
Then Grafana panel `HTTP Request Breakdown` with legend `Other {{ceph_daemon}}` shows:
| metrics | values |
- | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | .16666666666666652 |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | .16666666666666652 |
Scenario: "Test Workload Breakdown - Failures"
Given the following series:
| metrics | values |
- | ceph_rgw_failed_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 5 7 |
- | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ | ceph_rgw_failed_req{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 5 7 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
And variable `rgw_servers` is `rgw.foo`
Then Grafana panel `Workload Breakdown` with legend `Failures {{ceph_daemon}}` shows:
| metrics | values |
- | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1E-01 |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1E-01 |
Scenario: "Test Workload Breakdown - GETs"
Given the following series:
| metrics | values |
- | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 |
- | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 100 150 170 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
And variable `rgw_servers` is `rgw.foo`
Then Grafana panel `Workload Breakdown` with legend `GETs {{ceph_daemon}}` shows:
| metrics | values |
- | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.1666666666666667 |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1.1666666666666667 |
Scenario: "Test Workload Breakdown - PUTs"
Given the following series:
| metrics | values |
- | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 |
- | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 70 90 160 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
And variable `rgw_servers` is `rgw.foo`
Then Grafana panel `Workload Breakdown` with legend `PUTs {{ceph_daemon}}` shows:
| metrics | values |
- | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.5 |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1.5 |
Scenario: "Test Workload Breakdown - Other"
Given the following series:
| metrics | values |
- | ceph_rgw_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 175 250 345 |
- | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 |
- | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 |
- | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ | ceph_rgw_req{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 175 250 345 |
+ | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 100 150 170 |
+ | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 70 90 160 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
And variable `rgw_servers` is `rgw.foo`
Then Grafana panel `Workload Breakdown` with legend `Other (DELETE,LIST) {{ceph_daemon}}` shows:
| metrics | values |
- | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | .16666666666666652 |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | .16666666666666652 |
Scenario: "Test Average GET Latencies"
Given the following series:
| metrics | values |
- | ceph_rgw_op_get_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 50 100 |
- | ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 20 60 80 |
- | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ | ceph_rgw_op_get_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 10 50 100 |
+ | ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 20 60 80 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `GET {{rgw_host}}` shows:
| metrics | values |
- | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1.5 |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1.5 |
Scenario: "Test Average PUT Latencies"
Given the following series:
| metrics | values |
- | ceph_rgw_op_put_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 15 35 55 |
- | ceph_rgw_op_put_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 30 50 |
- | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ | ceph_rgw_op_put_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 15 35 55 |
+ | ceph_rgw_op_put_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 10 30 50 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `PUT {{rgw_host}}` shows:
| metrics | values |
- | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1 |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1 |
Scenario: "Test Total Requests/sec by RGW Instance"
Given the following series:
| metrics | values |
- | ceph_rgw_req{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 |
- | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 |
+ | ceph_rgw_req{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 10 50 100 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
Then Grafana panel `Total Requests/sec by RGW Instance` with legend `{{rgw_host}}` shows:
| metrics | values |
Scenario: "Test GET Latencies by RGW Instance"
Given the following series:
| metrics | values |
- | ceph_rgw_op_get_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 50 100 |
- | ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 20 60 80 |
- | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ | ceph_rgw_op_get_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 10 50 100 |
+ | ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 20 60 80 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When interval is `30s`
Then Grafana panel `GET Latencies by RGW Instance` with legend `{{rgw_host}}` shows:
| metrics | values |
- | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1.5 |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1.5 |
Scenario: "Test Bandwidth Consumed by Type- GET"
Given the following series:
| metrics | values |
- | ceph_rgw_op_get_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 |
+ | ceph_rgw_op_get_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 10 50 100 |
When evaluation time is `1m`
And interval is `30s`
Then Grafana panel `Bandwidth Consumed by Type` with legend `GETs` shows:
Scenario: "Test Bandwidth Consumed by Type- PUT"
Given the following series:
| metrics | values |
- | ceph_rgw_op_put_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 5 20 50 |
+ | ceph_rgw_op_put_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 5 20 50 |
When evaluation time is `1m`
And interval is `30s`
Then Grafana panel `Bandwidth Consumed by Type` with legend `PUTs` shows:
Scenario: "Test Bandwidth by RGW Instance"
Given the following series:
| metrics | values |
- | ceph_rgw_op_get_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 |
- | ceph_rgw_op_put_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 5 20 50 |
- | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 |
+ | ceph_rgw_op_get_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 10 50 100 |
+ | ceph_rgw_op_put_obj_bytes{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 5 20 50 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 1 1 1 |
When evaluation time is `1m`
And interval is `30s`
Then Grafana panel `Bandwidth by RGW Instance` with legend `{{rgw_host}}` shows:
Scenario: "Test PUT Latencies by RGW Instance"
Given the following series:
| metrics | values |
- | ceph_rgw_op_put_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 15 35 55 |
- | ceph_rgw_op_put_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 30 50 |
- | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+ | ceph_rgw_op_put_obj_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 15 35 55 |
+ | ceph_rgw_op_put_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 10 30 50 |
+ | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
When evaluation time is `1m`
And interval is `30s`
Then Grafana panel `PUT Latencies by RGW Instance` with legend `{{rgw_host}}` shows:
| metrics | values |
- | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1 |
+ | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1 |
Scenario: "Test Total backend responses by HTTP code"
Given the following series:
| node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
| node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 |
| node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 |
- | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+ | node_bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
Then query `node_network_transmit_bytes{instance="127.0.0.1"} > 0` produces:
| metrics | values |
| node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 100 |
| node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
| node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 |
| node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 |
- | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+ | node_bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
When evaluation time is `0m`
Then query `node_network_transmit_bytes{instance="127.0.0.1"} > 0` produces:
| metrics | values |
| node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
| node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 |
| node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 |
- | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+ | node_bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
When evaluation time is `0m`
And variable `osd_hosts` is `127.0.0.1`
Then query `node_network_transmit_bytes{instance="$osd_hosts"} > 0` produces:
| node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 |
| node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 300 |
| node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 300 |
- | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+ | node_bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
When evaluation time is `2h`
And evaluation interval is `1h`
And interval is `1h`
def add_default_dashboards_variables(data: Dict[str, Any]) -> None:
data['variables']['job'] = 'ceph'
+ data['variables']['cluster'] = 'mycluster'
data['variables']['job_haproxy'] = 'haproxy'
data['variables']['__rate_interval'] = '1m'