-{}
+{
+ _config+:: {
+ dashboardTags: ['ceph-mixin'],
+
+ clusterLabel: 'cluster',
+ showMultiCluster: false,
+ },
+}
local g = import 'grafonnet/grafana.libsonnet';
local u = import 'utils.libsonnet';
+local c = (import '../mixin.libsonnet')._config;
{
grafanaDashboards+:: {
'now-1h',
'15s',
16,
- [],
+ c.dashboardTags,
'',
{
refresh_intervals: ['5s', '10s', '15s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
.addTemplate(
g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
)
+ .addTemplate(
+ u.addClusterTemplate()
+ )
+ .addTemplate(
+ u.addJobTemplate()
+ )
.addTemplate(
u.addTemplateSchema('mds_servers',
'$datasource',
- 'label_values(ceph_mds_inodes, ceph_daemon)',
+ 'label_values(ceph_mds_inodes{%(matchers)s}, ceph_daemon)' % u.matchers(),
1,
true,
1,
'MDS Workload - $mds_servers',
'none',
'Reads(-) / Writes (+)',
- 'sum(rate(ceph_objecter_op_r{ceph_daemon=~"($mds_servers).*"}[1m]))',
+ 'sum(rate(ceph_objecter_op_r{%(matchers)s, ceph_daemon=~"($mds_servers).*"}[$__rate_interval]))' % u.matchers(),
'Read Ops',
0,
1,
9
)
.addTarget(u.addTargetSchema(
- 'sum(rate(ceph_objecter_op_w{ceph_daemon=~"($mds_servers).*"}[1m]))',
+ 'sum(rate(ceph_objecter_op_w{%(matchers)s, ceph_daemon=~"($mds_servers).*"}[$__rate_interval]))' % u.matchers(),
'Write Ops'
))
.addSeriesOverride(
'Client Request Load - $mds_servers',
'none',
'Client Requests',
- 'ceph_mds_server_handle_client_request{ceph_daemon=~"($mds_servers).*"}',
+ 'ceph_mds_server_handle_client_request{%(matchers)s, ceph_daemon=~"($mds_servers).*"}' % u.matchers(),
'{{ceph_daemon}}',
12,
1,
+(import '../config.libsonnet') +
(import 'cephfs.libsonnet') +
(import 'host.libsonnet') +
(import 'osd.libsonnet') +
local g = import 'grafonnet/grafana.libsonnet';
local u = import 'utils.libsonnet';
+local c = (import '../mixin.libsonnet')._config;
{
grafanaDashboards+:: {
'now-1h',
'10s',
16,
- [],
+ c.dashboardTags,
'',
{
refresh_intervals: ['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
'default',
label='Data Source')
)
+ .addTemplate(
+ u.addClusterTemplate()
+ )
+ .addTemplate(
+ u.addJobTemplate()
+ )
.addTemplate(
u.addTemplateSchema('osd_hosts',
'$datasource',
- 'label_values(ceph_disk_occupation, exported_instance)',
+ 'label_values(ceph_disk_occupation{%(matchers)s}, exported_instance)' % u.matchers(),
1,
true,
1,
.addTemplate(
u.addTemplateSchema('mon_hosts',
'$datasource',
- 'label_values(ceph_mon_metadata, ceph_daemon)',
+ 'label_values(ceph_mon_metadata{%(matchers)s}, ceph_daemon)' % u.matchers(),
1,
true,
1,
.addTemplate(
u.addTemplateSchema('mds_hosts',
'$datasource',
- 'label_values(ceph_mds_inodes, ceph_daemon)',
+ 'label_values(ceph_mds_inodes{%(matchers)s}, ceph_daemon)' % u.matchers(),
1,
true,
1,
.addTemplate(
u.addTemplateSchema('rgw_hosts',
'$datasource',
- 'label_values(ceph_rgw_metadata, ceph_daemon)',
+ 'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % u.matchers(),
1,
true,
1,
'OSD Hosts',
'',
'current',
- 'count(sum by (hostname) (ceph_osd_metadata))',
+ 'count(sum by (hostname) (ceph_osd_metadata{%(matchers)s}))' % u.matchers(),
true,
0,
0,
|||
avg(1 - (
avg by(instance) (
- irate(node_cpu_seconds_total{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or
- irate(node_cpu{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m])
+ rate(node_cpu_seconds_total{mode='idle',instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}[$__rate_interval]) or
+ rate(node_cpu{mode='idle',instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}[$__rate_interval])
)
))
|||,
node_memory_Slab_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}
)
)
- )
- (
+ ) / (
node_memory_MemTotal{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"} or
node_memory_MemTotal_bytes{instance=~"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*"}
))
'current',
|||
sum ((
- irate(node_disk_reads_completed{instance=~"($osd_hosts).*"}[5m]) or
- irate(node_disk_reads_completed_total{instance=~"($osd_hosts).*"}[5m])
+ rate(node_disk_reads_completed{instance=~"($osd_hosts).*"}[$__rate_interval]) or
+ rate(node_disk_reads_completed_total{instance=~"($osd_hosts).*"}[$__rate_interval])
) + (
- irate(node_disk_writes_completed{instance=~"($osd_hosts).*"}[5m]) or
- irate(node_disk_writes_completed_total{instance=~"($osd_hosts).*"}[5m])
+ rate(node_disk_writes_completed{instance=~"($osd_hosts).*"}[$__rate_interval]) or
+ rate(node_disk_writes_completed_total{instance=~"($osd_hosts).*"}[$__rate_interval])
))
|||,
true,
|||
avg (
label_replace(
- (irate(node_disk_io_time_ms[5m]) / 10 ) or
- (irate(node_disk_io_time_seconds_total[5m]) * 100),
+ (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or
+ (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),
"instance", "$1", "instance", "([^.:]*).*"
) * on(instance, device) group_left(ceph_daemon) label_replace(
label_replace(
- ceph_disk_occupation_human{instance=~"($osd_hosts).*"},
+ ceph_disk_occupation_human{%(matchers)s, instance=~"($osd_hosts).*"},
"device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^.:]*).*"
)
)
- |||,
+ ||| % u.matchers(),
true,
16,
0,
'current',
|||
sum (
- (
- irate(node_network_receive_bytes{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[1m]) or
- irate(node_network_receive_bytes_total{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[1m])
- ) unless on (device, instance)
- label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)")
+ (
+ rate(node_network_receive_bytes{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval]) or
+ rate(node_network_receive_bytes_total{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval])
+ ) unless on (device, instance)
+ label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)")
) +
sum (
- (
- irate(node_network_transmit_bytes{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[1m]) or
- irate(node_network_transmit_bytes_total{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[1m])
- ) unless on (device, instance)
- label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)")
- )
+ (
+ rate(node_network_transmit_bytes{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval]) or
+ rate(node_network_transmit_bytes_total{instance=~"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*",device!="lo"}[$__rate_interval])
+ ) unless on (device, instance)
+ label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)")
+ )
|||
,
true,
100 * (
1 - (
avg by(instance) (
- irate(node_cpu_seconds_total{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m]) or
- irate(node_cpu{mode=\'idle\',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[1m])
+ rate(node_cpu_seconds_total{mode='idle',instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}[$__rate_interval]) or
+ rate(node_cpu{mode='idle',instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*"}[$__rate_interval])
)
)
)
'Network Load - Top 10 Hosts', 'Top 10 hosts by network load', 'Bps', |||
topk(10, (sum by(instance) (
(
- irate(node_network_receive_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[1m]) or
- irate(node_network_receive_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[1m])
+ rate(node_network_receive_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or
+ rate(node_network_receive_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval])
) +
(
- irate(node_network_transmit_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[1m]) or
- irate(node_network_transmit_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[1m])
+ rate(node_network_transmit_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or
+ rate(node_network_transmit_bytes_total{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval])
) unless on (device, instance)
- label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)"))
+ label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)"))
))
|||
, '{{instance}}', 12, 5, 12, 9
'now-1h',
'10s',
16,
- ['overview'],
+ c.dashboardTags + ['overview'],
'',
{
refresh_intervals: ['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
)
.addTemplate(
- u.addTemplateSchema('ceph_hosts', '$datasource', 'label_values(node_scrape_collector_success, instance) ', 1, false, 3, 'Hostname', '([^.:]*).*')
+ u.addClusterTemplate()
+ )
+ .addTemplate(
+ u.addJobTemplate()
+ )
+ .addTemplate(
+ u.addTemplateSchema('ceph_hosts',
+ '$datasource',
+ 'label_values({%(clusterMatcher)s}, instance)' % u.matchers(),
+ 1,
+ false,
+ 3,
+ 'Hostname',
+ '([^.:]*).*')
)
.addPanels([
u.addRowSchema(false, true, '$ceph_hosts System Overview') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } },
'OSDs',
'',
'current',
- "count(sum by (ceph_daemon) (ceph_osd_metadata{hostname='$ceph_hosts'}))",
+ "count(sum by (ceph_daemon) (ceph_osd_metadata{%(matchers)s, hostname='$ceph_hosts'}))" % u.matchers(),
0,
1,
3,
'% Utilization',
|||
sum by (mode) (
- irate(node_cpu{instance=~"($ceph_hosts)([\\\\.:].*)?", mode=~"(irq|nice|softirq|steal|system|user|iowait)"}[1m]) or
- irate(node_cpu_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?", mode=~"(irq|nice|softirq|steal|system|user|iowait)"}[1m])
+ rate(node_cpu{instance=~"($ceph_hosts)([\\\\.:].*)?", mode=~"(irq|nice|softirq|steal|system|user|iowait)"}[$__rate_interval]) or
+ rate(node_cpu_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?", mode=~"(irq|nice|softirq|steal|system|user|iowait)"}[$__rate_interval])
) / (
scalar(
- sum(irate(node_cpu{instance=~"($ceph_hosts)([\\\\.:].*)?"}[1m]) or
- irate(node_cpu_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[1m]))
+ sum(rate(node_cpu{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_cpu_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]))
) * 100
)
|||,
'Send (-) / Receive (+)',
|||
sum by (device) (
- irate(
- node_network_receive_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m]) or
- irate(node_network_receive_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m]
+ rate(
+ node_network_receive_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[$__rate_interval]) or
+ rate(node_network_receive_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[$__rate_interval]
)
)
|||,
u.addTargetSchema(
|||
sum by (device) (
- irate(node_network_transmit_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m]) or
- irate(node_network_transmit_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[1m])
+ rate(node_network_transmit_bytes{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[$__rate_interval]) or
+ rate(node_network_transmit_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?",device!="lo"}[$__rate_interval])
)
|||,
'{{device}}.tx'
'pps',
'Send (-) / Receive (+)',
|||
- irate(node_network_receive_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or
- irate(node_network_receive_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])
+ rate(node_network_receive_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_network_receive_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval])
|||,
'{{device}}.rx',
21,
[
u.addTargetSchema(
|||
- irate(node_network_transmit_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or
- irate(node_network_transmit_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])
+ rate(node_network_transmit_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_network_transmit_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval])
|||,
'{{device}}.tx'
),
'current',
|||
sum(
- ceph_osd_stat_bytes and
- on (ceph_daemon) ceph_disk_occupation{instance=~"($ceph_hosts)([\\\\.:].*)?"}
+ ceph_osd_stat_bytes{%(matchers)s} and
+ on (ceph_daemon) ceph_disk_occupation{%(matchers)s, instance=~"($ceph_hosts)([\\\\.:].*)?"}
)
- |||,
+ ||| % u.matchers(),
0,
6,
3,
'pps',
'Send (-) / Receive (+)',
|||
- irate(node_network_receive_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or
- irate(node_network_receive_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])
+ rate(node_network_receive_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_network_receive_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval])
|||,
'{{device}}.rx',
21,
.addTargets(
[u.addTargetSchema(
|||
- irate(node_network_transmit_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m]) or
- irate(node_network_transmit_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[1m])
+ rate(node_network_transmit_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_network_transmit_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval])
|||,
'{{device}}.tx'
)]
|||
label_replace(
(
- irate(node_disk_writes_completed{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or
- irate(node_disk_writes_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])
+ rate(node_disk_writes_completed{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_disk_writes_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])
), "instance", "$1", "instance", "([^:.]*).*"
) * on(instance, device) group_left(ceph_daemon) label_replace(
label_replace(
- ceph_disk_occupation_human, "device", "$1", "device", "/dev/(.*)"
+ ceph_disk_occupation_human{%(matchers)s}, "device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
- |||,
+ ||| % u.matchers(),
'{{device}}({{ceph_daemon}}) writes',
0,
12,
|||
label_replace(
(
- irate(node_disk_reads_completed{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or
- irate(node_disk_reads_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])
+ rate(node_disk_reads_completed{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_disk_reads_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])
), "instance", "$1", "instance", "([^:.]*).*"
) * on(instance, device) group_left(ceph_daemon) label_replace(
label_replace(
- ceph_disk_occupation_human,"device", "$1", "device", "/dev/(.*)"
+ ceph_disk_occupation_human{%(matchers)s},"device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
- |||,
+ ||| % u.matchers(),
'{{device}}({{ceph_daemon}}) reads'
),
]
|||
label_replace(
(
- irate(node_disk_bytes_written{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or
- irate(node_disk_written_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])
+ rate(node_disk_bytes_written{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_disk_written_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])
), "instance", "$1", "instance", "([^:.]*).*") * on(instance, device)
group_left(ceph_daemon) label_replace(
- label_replace(ceph_disk_occupation_human, "device", "$1", "device", "/dev/(.*)"),
+ label_replace(ceph_disk_occupation_human{%(matchers)s}, "device", "$1", "device", "/dev/(.*)"),
"instance", "$1", "instance", "([^:.]*).*"
)
- |||,
+ ||| % u.matchers(),
'{{device}}({{ceph_daemon}}) write',
12,
12,
|||
label_replace(
(
- irate(node_disk_bytes_read{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) or
- irate(node_disk_read_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])
+ rate(node_disk_bytes_read{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) or
+ rate(node_disk_read_bytes_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])
),
"instance", "$1", "instance", "([^:.]*).*") * on(instance, device)
group_left(ceph_daemon) label_replace(
- label_replace(ceph_disk_occupation_human, "device", "$1", "device", "/dev/(.*)"),
+ label_replace(ceph_disk_occupation_human{%(matchers)s}, "device", "$1", "device", "/dev/(.*)"),
"instance", "$1", "instance", "([^:.]*).*"
)
- |||,
+ ||| % u.matchers(),
'{{device}}({{ceph_daemon}}) read'
)]
)
'',
|||
max by(instance, device) (label_replace(
- (irate(node_disk_write_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])) /
- clamp_min(irate(node_disk_writes_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]), 0.001) or
- (irate(node_disk_read_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m])) /
- clamp_min(irate(node_disk_reads_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]), 0.001),
+ (rate(node_disk_write_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])) /
+ clamp_min(rate(node_disk_writes_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]), 0.001) or
+ (rate(node_disk_read_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])) /
+ clamp_min(rate(node_disk_reads_completed_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]), 0.001),
"instance", "$1", "instance", "([^:.]*).*"
)) * on(instance, device) group_left(ceph_daemon) label_replace(
label_replace(
"device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
- |||,
+ ||| % u.matchers(),
'{{device}}({{ceph_daemon}})',
0,
21,
|||
label_replace(
(
- (irate(node_disk_io_time_ms{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) / 10) or
- irate(node_disk_io_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[5m]) * 100
+ (rate(node_disk_io_time_ms{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) / 10) or
+ rate(node_disk_io_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval]) * 100
), "instance", "$1", "instance", "([^:.]*).*"
) * on(instance, device) group_left(ceph_daemon) label_replace(
- label_replace(ceph_disk_occupation_human{instance=~"($ceph_hosts)([\\\\.:].*)?"},
+ label_replace(ceph_disk_occupation_human{%(matchers)s, instance=~"($ceph_hosts)([\\\\.:].*)?"},
"device", "$1", "device", "/dev/(.*)"), "instance", "$1", "instance", "([^:.]*).*"
)
- |||,
+ ||| % u.matchers(),
'{{device}}({{ceph_daemon}})',
12,
21,
local g = import 'grafonnet/grafana.libsonnet';
local u = import 'utils.libsonnet';
+local c = (import '../mixin.libsonnet')._config;
{
grafanaDashboards+:: {
'now-1h',
'10s',
16,
- [],
+ c.dashboardTags,
'',
{
refresh_intervals: ['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
.addTemplate(
g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
)
+ .addTemplate(
+ u.addClusterTemplate()
+ )
+ .addTemplate(
+ u.addJobTemplate()
+ )
.addPanels([
OsdOverviewGraphPanel(
{ '@95%ile': '#e0752d' },
'ms',
null,
'0',
- 'avg (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)',
+ |||
+ avg (
+ rate(ceph_osd_op_r_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_r_latency_count{%(matchers)s}[$__rate_interval]) * 1000
+ )
+ ||| % u.matchers(),
'AVG read',
0,
0,
u.addTargetSchema(
|||
max(
- irate(ceph_osd_op_r_latency_sum[1m]) /
- on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000
+ rate(ceph_osd_op_r_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_r_latency_count{%(matchers)s}[$__rate_interval]) * 1000
)
- |||,
+ ||| % u.matchers(),
'MAX read'
),
u.addTargetSchema(
|||
quantile(0.95,
(
- irate(ceph_osd_op_r_latency_sum[1m]) /
- on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m])
+ rate(ceph_osd_op_r_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_r_latency_count{%(matchers)s}[$__rate_interval])
* 1000
)
)
- |||,
+ ||| % u.matchers(),
'@95%ile'
),
],
topk(10,
(sort(
(
- irate(ceph_osd_op_r_latency_sum[1m]) /
- on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) *
+ rate(ceph_osd_op_r_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_r_latency_count{%(matchers)s}[$__rate_interval]) *
1000
)
))
)
- |||,
+ ||| % u.matchers(),
'',
'table',
1,
'0',
|||
avg(
- irate(ceph_osd_op_w_latency_sum[1m]) /
- on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m])
+ rate(ceph_osd_op_w_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_w_latency_count{%(matchers)s}[$__rate_interval])
* 1000
)
- |||,
+ ||| % u.matchers(),
'AVG write',
12,
0,
u.addTargetSchema(
|||
max(
- irate(ceph_osd_op_w_latency_sum[1m]) /
- on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) *
+ rate(ceph_osd_op_w_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_w_latency_count{%(matchers)s}[$__rate_interval]) *
1000
)
- |||, 'MAX write'
+ ||| % u.matchers(), 'MAX write'
),
u.addTargetSchema(
|||
quantile(0.95, (
- irate(ceph_osd_op_w_latency_sum[1m]) /
- on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) *
+ rate(ceph_osd_op_w_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_w_latency_count{%(matchers)s}[$__rate_interval]) *
1000
))
- |||, '@95%ile write'
+ ||| % u.matchers(), '@95%ile write'
),
],
),
|||
topk(10,
(sort(
- (irate(ceph_osd_op_w_latency_sum[1m]) /
- on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) *
+ (rate(ceph_osd_op_w_latency_sum{%(matchers)s}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_w_latency_count{%(matchers)s}[$__rate_interval]) *
1000)
))
)
- |||,
+ ||| % u.matchers(),
'',
'table',
1,
{}, '', 'OSD Types Summary'
)
.addTarget(
- u.addTargetSchema('count by (device_class) (ceph_osd_metadata)', '{{device_class}}')
+ u.addTargetSchema('count by (device_class) (ceph_osd_metadata{%(matchers)s})' % u.matchers(), '{{device_class}}')
) + { gridPos: { x: 0, y: 8, w: 4, h: 8 } },
OsdOverviewPieChartPanel(
{ 'Non-Encrypted': '#E5AC0E' }, '', 'OSD Objectstore Types'
)
.addTarget(
u.addTargetSchema(
- 'count(ceph_bluefs_wal_total_bytes)', 'bluestore', 'time_series', 2
+ 'count(ceph_bluefs_wal_total_bytes{%(matchers)s})' % u.matchers(), 'bluestore', 'time_series', 2
)
)
.addTarget(
u.addTargetSchema(
- 'absent(ceph_bluefs_wal_total_bytes)*count(ceph_osd_metadata)', 'filestore', 'time_series', 2
+ 'absent(ceph_bluefs_wal_total_bytes%(matchers)s) * count(ceph_osd_metadata{%(matchers)s})' % u.matchers(), 'filestore', 'time_series', 2
)
) + { gridPos: { x: 4, y: 8, w: 4, h: 8 } },
OsdOverviewPieChartPanel(
{}, 'The pie chart shows the various OSD sizes used within the cluster', 'OSD Size Summary'
)
.addTarget(u.addTargetSchema(
- 'count(ceph_osd_stat_bytes < 1099511627776)', '<1TB', 'time_series', 2
+ 'count(ceph_osd_stat_bytes{%(matchers)s} < 1099511627776)' % u.matchers(), '<1TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
- 'count(ceph_osd_stat_bytes >= 1099511627776 < 2199023255552)', '<2TB', 'time_series', 2
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 1099511627776 < 2199023255552)' % u.matchers(), '<2TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
- 'count(ceph_osd_stat_bytes >= 2199023255552 < 3298534883328)', '<3TB', 'time_series', 2
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 2199023255552 < 3298534883328)' % u.matchers(), '<3TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
- 'count(ceph_osd_stat_bytes >= 3298534883328 < 4398046511104)', '<4TB', 'time_series', 2
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 3298534883328 < 4398046511104)' % u.matchers(), '<4TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
- 'count(ceph_osd_stat_bytes >= 4398046511104 < 6597069766656)', '<6TB', 'time_series', 2
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 4398046511104 < 6597069766656)' % u.matchers(), '<6TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
- 'count(ceph_osd_stat_bytes >= 6597069766656 < 8796093022208)', '<8TB', 'time_series', 2
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 6597069766656 < 8796093022208)' % u.matchers(), '<8TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
- 'count(ceph_osd_stat_bytes >= 8796093022208 < 10995116277760)', '<10TB', 'time_series', 2
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 8796093022208 < 10995116277760)' % u.matchers(), '<10TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
- 'count(ceph_osd_stat_bytes >= 10995116277760 < 13194139533312)', '<12TB', 'time_series', 2
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 10995116277760 < 13194139533312)' % u.matchers(), '<12TB', 'time_series', 2
))
.addTarget(u.addTargetSchema(
- 'count(ceph_osd_stat_bytes >= 13194139533312)', '<12TB+', 'time_series', 2
+ 'count(ceph_osd_stat_bytes{%(matchers)s} >= 13194139533312)' % u.matchers(), '<12TB+', 'time_series', 2
)) + { gridPos: { x: 8, y: 8, w: 4, h: 8 } },
g.graphPanel.new(bars=true,
datasource='$datasource',
min='0',
nullPointMode='null')
.addTarget(u.addTargetSchema(
- 'ceph_osd_numpg', 'PGs per OSD', 'time_series', 1, true
+ 'ceph_osd_numpg{%(matchers)s}' % u.matchers(), 'PGs per OSD', 'time_series', 1, true
)) + { gridPos: { x: 12, y: 8, w: 8, h: 8 } },
OsdOverviewSingleStatPanel(
['#d44a3a', '#299c46'],
false,
'.75',
|||
- sum(ceph_bluestore_onode_hits) / (
- sum(ceph_bluestore_onode_hits) +
- sum(ceph_bluestore_onode_misses)
+ sum(ceph_bluestore_onode_hits{%(matchers)s}) / (
+ sum(ceph_bluestore_onode_hits{%(matchers)s}) +
+ sum(ceph_bluestore_onode_misses{%(matchers)s})
)
- |||,
+ ||| % u.matchers(),
20,
8,
4,
'short',
null,
null,
- 'round(sum(irate(ceph_pool_rd[30s])))',
+ 'round(sum(rate(ceph_pool_rd{%(matchers)s}[$__rate_interval])))' % u.matchers(),
'Reads',
0,
17,
8
)
.addTargets([u.addTargetSchema(
- 'round(sum(irate(ceph_pool_wr[30s])))', 'Writes'
+ 'round(sum(rate(ceph_pool_wr{%(matchers)s}[$__rate_interval])))' % u.matchers(), 'Writes'
)]),
]),
'osd-device-details.json':
'now-3h',
'',
16,
- [],
+ c.dashboardTags,
'',
{
refresh_intervals: ['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
'default',
label='Data Source')
)
+ .addTemplate(
+ u.addClusterTemplate()
+ )
+ .addTemplate(
+ u.addJobTemplate()
+ )
.addTemplate(
u.addTemplateSchema('osd',
'$datasource',
- 'label_values(ceph_osd_metadata,ceph_daemon)',
+ 'label_values(ceph_osd_metadata{%(matchers)s}, ceph_daemon)' % u.matchers(),
1,
false,
1,
's',
'Read (-) / Write (+)',
|||
- irate(ceph_osd_op_r_latency_sum{ceph_daemon=~"$osd"}[1m]) /
- on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m])
- |||,
+ rate(ceph_osd_op_r_latency_sum{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_r_latency_count{%(matchers)s}[$__rate_interval])
+ ||| % u.matchers(),
|||
- irate(ceph_osd_op_w_latency_sum{ceph_daemon=~"$osd"}[1m]) /
- on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m])
- |||,
+ rate(ceph_osd_op_w_latency_sum{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval]) /
+ on (ceph_daemon) rate(ceph_osd_op_w_latency_count{%(matchers)s}[$__rate_interval])
+ ||| % u.matchers(),
'read',
'write',
0,
'',
'short',
'Read (-) / Write (+)',
- 'irate(ceph_osd_op_r{ceph_daemon=~"$osd"}[1m])',
- 'irate(ceph_osd_op_w{ceph_daemon=~"$osd"}[1m])',
+ 'rate(ceph_osd_op_r{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % u.matchers(),
+ 'rate(ceph_osd_op_w{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % u.matchers(),
'Reads',
'Writes',
6,
'',
'bytes',
'Read (-) / Write (+)',
- 'irate(ceph_osd_op_r_out_bytes{ceph_daemon=~"$osd"}[1m])',
- 'irate(ceph_osd_op_w_in_bytes{ceph_daemon=~"$osd"}[1m])',
+ 'rate(ceph_osd_op_r_out_bytes{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % u.matchers(),
+ 'rate(ceph_osd_op_w_in_bytes{%(matchers)s, ceph_daemon=~"$osd"}[$__rate_interval])' % u.matchers(),
'Read Bytes',
'Write Bytes',
12,
|||
(
label_replace(
- irate(node_disk_read_time_seconds_total[1m]) / irate(node_disk_reads_completed_total[1m]),
+ rate(node_disk_read_time_seconds_total{%(clusterMatcher)s}[$__rate_interval]) /
+ rate(node_disk_reads_completed_total{%(clusterMatcher)s}[$__rate_interval]),
"instance", "$1", "instance", "([^:.]*).*"
) and on (instance, device) label_replace(
label_replace(
- ceph_disk_occupation_human{ceph_daemon=~"$osd"},
+ ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
"device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
)
- |||,
+ ||| % u.matchers(),
|||
(
label_replace(
- irate(node_disk_write_time_seconds_total[1m]) / irate(node_disk_writes_completed_total[1m]),
+ rate(node_disk_write_time_seconds_total{%(clusterMatcher)s}[$__rate_interval]) /
+ rate(node_disk_writes_completed_total{%(clusterMatcher)s}[$__rate_interval]),
"instance", "$1", "instance", "([^:.]*).*") and on (instance, device)
label_replace(
label_replace(
- ceph_disk_occupation_human{ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"
+ ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
)
- |||,
+ ||| % u.matchers(),
'{{instance}}/{{device}} Reads',
'{{instance}}/{{device}} Writes',
0,
'Read (-) / Write (+)',
|||
label_replace(
- irate(node_disk_writes_completed_total[1m]),
+ rate(node_disk_writes_completed_total{%(clusterMatcher)s}[$__rate_interval]),
"instance", "$1", "instance", "([^:.]*).*"
) and on (instance, device) label_replace(
label_replace(
- ceph_disk_occupation_human{ceph_daemon=~"$osd"},
+ ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
"device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
- |||,
+ ||| % u.matchers(),
|||
label_replace(
- irate(node_disk_reads_completed_total[1m]),
+ rate(node_disk_reads_completed_total{%(clusterMatcher)s}[$__rate_interval]),
"instance", "$1", "instance", "([^:.]*).*"
) and on (instance, device) label_replace(
label_replace(
- ceph_disk_occupation_human{ceph_daemon=~"$osd"},
+ ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
"device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
- |||,
+ ||| % u.matchers(),
'{{device}} on {{instance}} Writes',
'{{device}} on {{instance}} Reads',
6,
'Read (-) / Write (+)',
|||
label_replace(
- irate(node_disk_read_bytes_total[1m]), "instance", "$1", "instance", "([^:.]*).*"
+ rate(node_disk_read_bytes_total{%(clusterMatcher)s}[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*"
) and on (instance, device) label_replace(
label_replace(
- ceph_disk_occupation_human{ceph_daemon=~"$osd"},
+ ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
"device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
- |||,
+ ||| % u.matchers(),
|||
label_replace(
- irate(node_disk_written_bytes_total[1m]), "instance", "$1", "instance", "([^:.]*).*"
+ rate(node_disk_written_bytes_total{%(clusterMatcher)s}[$__rate_interval]), "instance", "$1", "instance", "([^:.]*).*"
) and on (instance, device) label_replace(
label_replace(
- ceph_disk_occupation_human{ceph_daemon=~"$osd"},
+ ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"},
"device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
- |||,
+ ||| % u.matchers(),
'{{instance}} {{device}} Reads',
'{{instance}} {{device}} Writes',
12,
.addTarget(u.addTargetSchema(
|||
label_replace(
- irate(node_disk_io_time_seconds_total[1m]),
+ rate(node_disk_io_time_seconds_total{%(clusterMatcher)s}[$__rate_interval]),
"instance", "$1", "instance", "([^:.]*).*"
) and on (instance, device) label_replace(
label_replace(
- ceph_disk_occupation_human{ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"
+ ceph_disk_occupation_human{%(matchers)s, ceph_daemon=~"$osd"}, "device", "$1", "device", "/dev/(.*)"
), "instance", "$1", "instance", "([^:.]*).*"
)
- |||,
+ ||| % u.matchers(),
'{{device}} on {{instance}}'
)) + { gridPos: { x: 18, y: 11, w: 6, h: 9 } },
]),
local g = import 'grafonnet/grafana.libsonnet';
local u = import 'utils.libsonnet';
+local c = (import '../mixin.libsonnet')._config;
{
grafanaDashboards+:: {
'now-1h',
'15s',
22,
- [],
+ c.dashboardTags,
'',
{
refresh_intervals: ['5s', '10s', '15s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
)
)
.addTemplate(
- g.template.datasource('datasource',
- 'prometheus',
- 'Dashboard1',
- label='Data Source')
+ g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
+ )
+ .addTemplate(
+ u.addClusterTemplate()
+ )
+ .addTemplate(
+ u.addJobTemplate()
)
.addTemplate(
g.template.custom(label='TopK',
'Pools',
'',
'avg',
- 'count(ceph_pool_metadata)',
+ 'count(ceph_pool_metadata{%(matchers)s})' % u.matchers(),
true,
'table',
0,
'Pools with Compression',
'Count of the pools that have compression enabled',
'current',
- 'count(ceph_pool_metadata{compression_mode!="none"})',
+ 'count(ceph_pool_metadata{%(matchers)s, compression_mode!="none"})' % u.matchers(),
null,
'',
3,
'Total Raw Capacity',
'Total raw capacity available to the cluster',
'current',
- 'sum(ceph_osd_stat_bytes)',
+ 'sum(ceph_osd_stat_bytes{%(matchers)s})' % u.matchers(),
null,
'',
6,
'Raw Capacity Consumed',
'Total raw capacity consumed by user data and associated overheads (metadata + redundancy)',
'current',
- 'sum(ceph_pool_bytes_used)',
+ 'sum(ceph_pool_bytes_used{%(matchers)s})' % u.matchers(),
true,
'',
9,
'Logical Stored ',
'Total of client data stored in the cluster',
'current',
- 'sum(ceph_pool_stored)',
+ 'sum(ceph_pool_stored{%(matchers)s})' % u.matchers(),
true,
'',
12,
'Compression Savings',
'A compression saving is determined as the data eligible to be compressed minus the capacity used to store the data after compression',
'current',
- 'sum(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used)',
+ |||
+ sum(
+ ceph_pool_compress_under_bytes{%(matchers)s} -
+ ceph_pool_compress_bytes_used{%(matchers)s}
+ )
+ ||| % u.matchers(),
null,
'',
15,
'current',
|||
(
- sum(ceph_pool_compress_under_bytes > 0) /
- sum(ceph_pool_stored_raw and ceph_pool_compress_under_bytes > 0)
+ sum(ceph_pool_compress_under_bytes{%(matchers)s} > 0) /
+ sum(ceph_pool_stored_raw{%(matchers)s} and ceph_pool_compress_under_bytes{%(matchers)s} > 0)
) * 100
- |||,
+ ||| % u.matchers(),
null,
'table',
18,
'Compression Factor',
'This factor describes the average ratio of data eligible to be compressed divided by the data actually stored. It does not account for data written that was ineligible for compression (too small, or compression yield too low)',
'current',
- 'sum(ceph_pool_compress_under_bytes > 0) / sum(ceph_pool_compress_bytes_used > 0)',
+ |||
+ sum(
+ ceph_pool_compress_under_bytes{%(matchers)s} > 0)
+ / sum(ceph_pool_compress_bytes_used{%(matchers)s} > 0
+ )
+ ||| % u.matchers(),
null,
'',
21,
[
u.addTargetSchema(
|||
- (ceph_pool_compress_under_bytes / ceph_pool_compress_bytes_used > 0) and on(pool_id) (
- ((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100 > 0.5
+ (
+ ceph_pool_compress_under_bytes{%(matchers)s} /
+ ceph_pool_compress_bytes_used{%(matchers)s} > 0
+ ) and on(pool_id) (
+ (
+ (ceph_pool_compress_under_bytes{%(matchers)s} > 0) /
+ ceph_pool_stored_raw{%(matchers)s}
+ ) * 100 > 0.5
)
- |||,
+ ||| % u.matchers(),
'A',
'table',
1,
true
),
u.addTargetSchema(
- 'ceph_pool_max_avail * on(pool_id) group_left(name) ceph_pool_metadata',
+ |||
+ ceph_pool_max_avail{%(matchers)s} *
+ on(pool_id) group_left(name) ceph_pool_metadata{%(matchers)s}
+ ||| % u.matchers(),
'B',
'table',
1,
true
),
u.addTargetSchema(
- '((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100',
+ |||
+ (
+ (ceph_pool_compress_under_bytes{%(matchers)s} > 0) /
+ ceph_pool_stored_raw{%(matchers)s}
+ ) * 100
+ ||| % u.matchers(),
'C',
'table',
1,
true
),
u.addTargetSchema(
- '(ceph_pool_percent_used * on(pool_id) group_left(name) ceph_pool_metadata)',
+ |||
+ ceph_pool_percent_used{%(matchers)s} *
+ on(pool_id) group_left(name) ceph_pool_metadata{%(matchers)s}
+ ||| % u.matchers(),
'D',
'table',
1,
true
),
u.addTargetSchema(
- '(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used > 0)',
+ |||
+ ceph_pool_compress_under_bytes{%(matchers)s} -
+ ceph_pool_compress_bytes_used{%(matchers)s} > 0
+ ||| % u.matchers(),
'E',
'table',
1,
true
),
u.addTargetSchema(
- 'delta(ceph_pool_stored[5d])', 'F', 'table', 1, true
+ 'delta(ceph_pool_stored{%(matchers)s}[5d])' % u.matchers(), 'F', 'table', 1, true
),
u.addTargetSchema(
- 'rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])',
+ |||
+ rate(ceph_pool_rd{%(matchers)s}[$__rate_interval])
+ + rate(ceph_pool_wr{%(matchers)s}[$__rate_interval])
+ ||| % u.matchers(),
'G',
'table',
1,
true
),
u.addTargetSchema(
- 'rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])',
+ |||
+ rate(ceph_pool_rd_bytes{%(matchers)s}[$__rate_interval]) +
+ rate(ceph_pool_wr_bytes{%(matchers)s}[$__rate_interval])
+ ||| % u.matchers(),
'H',
'table',
1,
true
),
u.addTargetSchema(
- 'ceph_pool_metadata', 'I', 'table', 1, true
+ 'ceph_pool_metadata{%(matchers)s}' % u.matchers(), 'I', 'table', 1, true
),
u.addTargetSchema(
- 'ceph_pool_stored * on(pool_id) group_left ceph_pool_metadata',
+ 'ceph_pool_stored{%(matchers)s} * on(pool_id) group_left ceph_pool_metadata{%(matchers)s}' % u.matchers(),
'J',
'table',
1,
true
),
u.addTargetSchema(
- 'ceph_pool_metadata{compression_mode!="none"}', 'K', 'table', 1, true
+ 'ceph_pool_metadata{%(matchers)s, compression_mode!="none"}' % u.matchers(), 'K', 'table', 1, true
),
u.addTargetSchema('', 'L', '', '', null),
]
|||
topk($topk,
round(
- (rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])),
- 1
- ) * on(pool_id) group_left(instance,name) ceph_pool_metadata)
- |||,
+ (
+ rate(ceph_pool_rd{%(matchers)s}[$__rate_interval]) +
+ rate(ceph_pool_wr{%(matchers)s}[$__rate_interval])
+ ), 1
+ ) * on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s})
+ ||| % u.matchers(),
'{{name}} ',
0,
9,
u.addTargetSchema(
|||
topk($topk,
- rate(ceph_pool_wr[30s]) +
- on(pool_id) group_left(instance,name) ceph_pool_metadata
+ rate(ceph_pool_wr{%(matchers)s}[$__rate_interval]) +
+ on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s}
)
- |||,
+ ||| % u.matchers(),
'{{name}} - write'
)
),
'Throughput',
|||
topk($topk,
- (rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])) *
- on(pool_id) group_left(instance, name) ceph_pool_metadata
+ (
+ rate(ceph_pool_rd_bytes{%(matchers)s}[$__rate_interval]) +
+ rate(ceph_pool_wr_bytes{%(matchers)s}[$__rate_interval])
+ ) * on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s}
)
- |||,
+ ||| % u.matchers(),
'{{name}}',
12,
9,
'Historical view of capacity usage, to help identify growth and trends in pool consumption',
'bytes',
'Capacity Used',
- 'ceph_pool_bytes_used * on(pool_id) group_right ceph_pool_metadata',
+ 'ceph_pool_bytes_used{%(matchers)s} * on(pool_id) group_right ceph_pool_metadata{%(matchers)s}' % u.matchers(),
'{{name}}',
0,
17,
'now-1h',
'15s',
22,
- [],
+ c.dashboardTags,
'',
{
refresh_intervals: ['5s', '10s', '15s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
)
)
.addTemplate(
- g.template.datasource('datasource',
- 'prometheus',
- 'Prometheus admin.virt1.home.fajerski.name:9090',
- label='Data Source')
+ g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
+ )
+ .addTemplate(
+ u.addClusterTemplate()
+ )
+ .addTemplate(
+ u.addJobTemplate()
)
.addTemplate(
u.addTemplateSchema('pool_name',
'$datasource',
- 'label_values(ceph_pool_metadata,name)',
+ 'label_values(ceph_pool_metadata{%(matchers)s}, name)' % u.matchers(),
1,
false,
1,
true,
'.7,.8',
|||
- (ceph_pool_stored / (ceph_pool_stored + ceph_pool_max_avail)) *
- on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}
- |||,
+ (ceph_pool_stored{%(matchers)s} / (ceph_pool_stored{%(matchers)s} + ceph_pool_max_avail{%(matchers)s})) *
+ on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ ||| % u.matchers(),
'time_series',
0,
0,
PoolDetailSingleStatPanel(
's',
'Time till full',
- 'Time till pool is full assuming the average fill rate of the last 6 hours',
+ 'Time till pool is full assuming the average fill rate of the last 4 hours',
false,
100,
false,
'',
'current',
|||
- (ceph_pool_max_avail / deriv(ceph_pool_stored[6h])) *
- on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"} > 0
- |||,
+ (ceph_pool_max_avail{%(matchers)s} / deriv(ceph_pool_stored{%(matchers)s}[6h])) *
+ on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} > 0
+ ||| % u.matchers(),
'time_series',
7,
0,
'ops',
'Objects out(-) / in(+) ',
|||
- deriv(ceph_pool_objects[1m]) *
- on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}
- |||,
+ deriv(ceph_pool_objects{%(matchers)s}[1m]) *
+ on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ ||| % u.matchers(),
'Objects per second',
12,
0,
'iops',
'Read (-) / Write (+)',
|||
- irate(ceph_pool_rd[1m]) *
- on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}
- |||,
+ rate(ceph_pool_rd{%(matchers)s}[$__rate_interval]) *
+ on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ ||| % u.matchers(),
'reads',
0,
7,
.addTarget(
u.addTargetSchema(
|||
- irate(ceph_pool_wr[1m]) *
- on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}
- |||,
+ rate(ceph_pool_wr{%(matchers)s}[$__rate_interval]) *
+ on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ ||| % u.matchers(),
'writes'
)
),
'Bps',
'Read (-) / Write (+)',
|||
- irate(ceph_pool_rd_bytes[1m]) +
- on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~"$pool_name"}
- |||,
+ rate(ceph_pool_rd_bytes{%(matchers)s}[$__rate_interval]) +
+ on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ ||| % u.matchers(),
'reads',
12,
7,
.addTarget(
u.addTargetSchema(
|||
- irate(ceph_pool_wr_bytes[1m]) +
- on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}
- |||,
+ rate(ceph_pool_wr_bytes{%(matchers)s}[$__rate_interval]) +
+ on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ ||| % u.matchers(),
'writes'
)
),
'short',
'Objects',
|||
- ceph_pool_objects *
- on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~"$pool_name"}
- |||,
+ ceph_pool_objects{%(matchers)s} *
+ on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"}
+ ||| % u.matchers(),
'Number of Objects',
0,
14,
local g = import 'grafonnet/grafana.libsonnet';
local u = import 'utils.libsonnet';
+local c = (import '../mixin.libsonnet')._config;
{
grafanaDashboards+:: {
null,
0,
1,
- '$Datasource')
+ '$datasource')
.addTargets(
[
u.addTargetSchema(expr1,
'now-1h',
false,
16,
- [],
+ c.dashboardTags,
'',
{
refresh_intervals: ['5s', '10s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
type='panel', id='graph', name='Graph', version='5.0.0'
)
.addTemplate(
- g.template.datasource('Datasource', 'prometheus', 'default', label=null)
+ g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
+ )
+ .addTemplate(
+ u.addClusterTemplate()
+ )
+ .addTemplate(
+ u.addJobTemplate()
)
.addTemplate(
- u.addTemplateSchema('Pool',
- '$Datasource',
+ u.addTemplateSchema('pool',
+ '$datasource',
'label_values(pool)',
1,
false,
'')
)
.addTemplate(
- u.addTemplateSchema('Image',
- '$Datasource',
+ u.addTemplateSchema('image',
+ '$datasource',
'label_values(image)',
1,
false,
RbdDetailsPanel(
'IOPS',
'iops',
- 'irate(ceph_rbd_write_ops{pool="$Pool", image="$Image"}[30s])',
- 'irate(ceph_rbd_read_ops{pool="$Pool", image="$Image"}[30s])',
+ 'rate(ceph_rbd_write_ops{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % u.matchers()
+ ,
+ 'rate(ceph_rbd_read_ops{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % u.matchers(),
0,
0,
8,
RbdDetailsPanel(
'Throughput',
'Bps',
- 'irate(ceph_rbd_write_bytes{pool="$Pool", image="$Image"}[30s])',
- 'irate(ceph_rbd_read_bytes{pool="$Pool", image="$Image"}[30s])',
+ 'rate(ceph_rbd_write_bytes{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % u.matchers(),
+ 'rate(ceph_rbd_read_bytes{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])' % u.matchers(),
8,
0,
8,
'Average Latency',
'ns',
|||
- irate(ceph_rbd_write_latency_sum{pool="$Pool", image="$Image"}[30s]) /
- irate(ceph_rbd_write_latency_count{pool="$Pool", image="$Image"}[30s])
- |||,
+ rate(ceph_rbd_write_latency_sum{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval]) /
+ rate(ceph_rbd_write_latency_count{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])
+ ||| % u.matchers(),
|||
- irate(ceph_rbd_read_latency_sum{pool="$Pool", image="$Image"}[30s]) /
- irate(ceph_rbd_read_latency_count{pool="$Pool", image="$Image"}[30s])
- |||,
+ rate(ceph_rbd_read_latency_sum{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval]) /
+ rate(ceph_rbd_read_latency_count{%(matchers)s, pool="$pool", image="$image"}[$__rate_interval])
+ ||| % u.matchers(),
16,
0,
8,
'now-1h',
'30s',
16,
- ['overview'],
+ c.dashboardTags + ['overview'],
'',
{
refresh_intervals: ['5s', '10s', '15s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
type='panel', id='table', name='Table', version='5.0.0'
)
.addTemplate(
- g.template.datasource('datasource',
- 'prometheus',
- 'default',
- label='Data Source')
+ g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
+ )
+ .addTemplate(
+ u.addClusterTemplate()
+ )
+ .addTemplate(
+ u.addJobTemplate()
)
.addPanels([
RbdOverviewPanel(
'IOPS',
'short',
- 'round(sum(irate(ceph_rbd_write_ops[30s])))',
- 'round(sum(irate(ceph_rbd_read_ops[30s])))',
+ 'round(sum(rate(ceph_rbd_write_ops{%(matchers)s}[$__rate_interval])))' % u.matchers(),
+ 'round(sum(rate(ceph_rbd_read_ops{%(matchers)s}[$__rate_interval])))' % u.matchers(),
'Writes',
'Reads',
0,
RbdOverviewPanel(
'Throughput',
'Bps',
- 'round(sum(irate(ceph_rbd_write_bytes[30s])))',
- 'round(sum(irate(ceph_rbd_read_bytes[30s])))',
+ 'round(sum(rate(ceph_rbd_write_bytes{%(matchers)s}[$__rate_interval])))' % u.matchers(),
+ 'round(sum(rate(ceph_rbd_read_bytes{%(matchers)s}[$__rate_interval])))' % u.matchers(),
'Write',
'Read',
8,
'ns',
|||
round(
- sum(irate(ceph_rbd_write_latency_sum[30s])) /
- sum(irate(ceph_rbd_write_latency_count[30s]))
+ sum(rate(ceph_rbd_write_latency_sum{%(matchers)s}[$__rate_interval])) /
+ sum(rate(ceph_rbd_write_latency_count{%(matchers)s}[$__rate_interval]))
)
- |||,
+ ||| % u.matchers(),
|||
round(
- sum(irate(ceph_rbd_read_latency_sum[30s])) /
- sum(irate(ceph_rbd_read_latency_count[30s]))
+ sum(rate(ceph_rbd_read_latency_sum{%(matchers)s}[$__rate_interval])) /
+ sum(rate(ceph_rbd_read_latency_count{%(matchers)s}[$__rate_interval]))
)
- |||,
+ ||| % u.matchers(),
'Write',
'Read',
16,
topk(10,
(
sort((
- irate(ceph_rbd_write_ops[30s]) +
- on (image, pool, namespace) irate(ceph_rbd_read_ops[30s])
+ rate(ceph_rbd_write_ops{%(matchers)s}[$__rate_interval]) +
+ on (image, pool, namespace) rate(ceph_rbd_read_ops{%(matchers)s}[$__rate_interval])
))
)
)
- |||,
+ ||| % u.matchers(),
'',
'table',
1,
topk(10,
sort(
sum(
- irate(ceph_rbd_read_bytes[30s]) + irate(ceph_rbd_write_bytes[30s])
+ rate(ceph_rbd_read_bytes{%(matchers)s}[$__rate_interval]) +
+ rate(ceph_rbd_write_bytes{%(matchers)s}[$__rate_interval])
) by (pool, image, namespace)
)
)
- |||,
+ ||| % u.matchers(),
'',
'table',
1,
|||
topk(10,
sum(
- irate(ceph_rbd_write_latency_sum[30s]) / clamp_min(irate(ceph_rbd_write_latency_count[30s]), 1) +
- irate(ceph_rbd_read_latency_sum[30s]) / clamp_min(irate(ceph_rbd_read_latency_count[30s]), 1)
+ rate(ceph_rbd_write_latency_sum{%(matchers)s}[$__rate_interval]) /
+ clamp_min(rate(ceph_rbd_write_latency_count{%(matchers)s}[$__rate_interval]), 1) +
+ rate(ceph_rbd_read_latency_sum{%(matchers)s}[$__rate_interval]) /
+ clamp_min(rate(ceph_rbd_read_latency_count{%(matchers)s}[$__rate_interval]), 1)
) by (pool, image, namespace)
)
- |||,
+ ||| % u.matchers(),
'',
'table',
1,
local g = import 'grafonnet/grafana.libsonnet';
local u = import 'utils.libsonnet';
+local c = (import '../mixin.libsonnet')._config;
{
grafanaDashboards+:: {
1,
'$datasource')
.addTargets(
- [u.addTargetSchema('sum by (source_zone) (rate(%s[30s]))' % rgwMetric,
- '{{source_zone}}')]
+ [
+ u.addTargetSchema(
+ 'sum by (source_zone) (rate(%(rgwMetric)s{%(matchers)s}[$__rate_interval]))'
+ % (u.matchers() + { rgwMetric: rgwMetric }),
+ '{{source_zone}}'
+ ),
+ ]
) + { gridPos: { x: x, y: y, w: w, h: h } };
u.dashboardSchema(
'now-1h',
'15s',
16,
- ['overview'],
+ c.dashboardTags + ['overview'],
'',
{
refresh_intervals: ['5s', '10s', '15s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
type='panel', id='graph', name='Graph', version='5.0.0'
)
.addTemplate(
- u.addTemplateSchema('rgw_servers', '$datasource', 'prometehus', 1, true, 1, '', '')
+ g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
)
.addTemplate(
- g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
+ u.addClusterTemplate()
+ )
+ .addTemplate(
+ u.addJobTemplate()
+ )
+ .addTemplate(
+ u.addTemplateSchema(
+ 'rgw_servers',
+ '$datasource',
+ 'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % u.matchers(),
+ 1,
+ true,
+ 1,
+ '',
+ 'RGW Server'
+ )
)
.addPanels([
RgwSyncOverviewPanel(
'now-1h',
'15s',
16,
- ['overview'],
+ c.dashboardTags + ['overview'],
'',
{
refresh_intervals: ['5s', '10s', '15s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
.addRequired(
type='panel', id='graph', name='Graph', version='5.0.0'
)
+ .addTemplate(
+ g.template.datasource('datasource',
+ 'prometheus',
+ 'default',
+ label='Data Source')
+ )
+ .addTemplate(
+ u.addClusterTemplate()
+ )
+ .addTemplate(
+ u.addJobTemplate()
+ )
.addTemplate(
u.addTemplateSchema(
'rgw_servers',
'$datasource',
- 'label_values(ceph_rgw_metadata, ceph_daemon)',
+ 'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % u.matchers(),
1,
true,
1,
'',
- ''
+ 'RGW Server'
)
)
.addTemplate(
u.addTemplateSchema(
'code',
'$datasource',
- 'label_values(haproxy_server_http_responses_total{instance=~"$ingress_service"}, code)',
+ 'label_values(haproxy_server_http_responses_total{job=~"$job_haproxy", instance=~"$ingress_service"}, code)',
1,
true,
1,
''
)
)
+ .addTemplate(
+ u.addTemplateSchema(
+ 'job_haproxy',
+ '$datasource',
+ 'label_values(haproxy_server_status, job)',
+ 1,
+ true,
+ 1,
+ 'job haproxy',
+ '(.*)',
+ multi=true,
+ allValues='.+',
+ ),
+ )
.addTemplate(
u.addTemplateSchema(
'ingress_service',
'$datasource',
- 'label_values(haproxy_server_status, instance)',
+ 'label_values(haproxy_server_status{job=~"$job_haproxy"}, instance)',
1,
true,
1,
''
)
)
- .addTemplate(
- g.template.datasource('datasource',
- 'prometheus',
- 'default',
- label='Data Source')
- )
.addPanels([
u.addRowSchema(false,
true,
's',
'short',
|||
- rate(ceph_rgw_get_initial_lat_sum[30s]) /
- rate(ceph_rgw_get_initial_lat_count[30s]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata
- |||,
+ rate(ceph_rgw_get_initial_lat_sum{%(matchers)s}[$__rate_interval]) /
+ rate(ceph_rgw_get_initial_lat_count{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s}
+ ||| % u.matchers(),
'GET AVG',
0,
1,
[
u.addTargetSchema(
|||
- rate(ceph_rgw_put_initial_lat_sum[30s]) /
- rate(ceph_rgw_put_initial_lat_count[30s]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata
- |||,
+ rate(ceph_rgw_put_initial_lat_sum{%(matchers)s}[$__rate_interval]) /
+ rate(ceph_rgw_put_initial_lat_count{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s}
+ ||| % u.matchers(),
'PUT AVG'
),
]
|||
sum by (rgw_host) (
label_replace(
- rate(ceph_rgw_req[30s]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,
+ rate(ceph_rgw_req{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
)
)
- |||,
+ ||| % u.matchers(),
'{{rgw_host}}',
8,
1,
'short',
|||
label_replace(
- rate(ceph_rgw_get_initial_lat_sum[30s]) /
- rate(ceph_rgw_get_initial_lat_count[30s]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,
+ rate(ceph_rgw_get_initial_lat_sum{%(matchers)s}[$__rate_interval]) /
+ rate(ceph_rgw_get_initial_lat_count{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
)
- |||,
+ ||| % u.matchers(),
'{{rgw_host}}',
15,
1,
'Total bytes transferred in/out of all radosgw instances within the cluster',
'bytes',
'short',
- 'sum(rate(ceph_rgw_get_b[30s]))',
+ 'sum(rate(ceph_rgw_get_b{%(matchers)s}[$__rate_interval]))' % u.matchers(),
'GETs',
0,
8,
8,
6
).addTargets(
- [u.addTargetSchema('sum(rate(ceph_rgw_put_b[30s]))',
+ [u.addTargetSchema('sum(rate(ceph_rgw_put_b{%(matchers)s}[$__rate_interval]))' % u.matchers(),
'PUTs')]
),
RgwOverviewPanel(
'short',
|||
label_replace(sum by (instance_id) (
- rate(ceph_rgw_get_b[30s]) +
- rate(ceph_rgw_put_b[30s])) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,
+ rate(ceph_rgw_get_b{%(matchers)s}[$__rate_interval]) +
+ rate(ceph_rgw_put_b{%(matchers)s}[$__rate_interval])) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
)
- |||,
+ ||| % u.matchers(),
'{{rgw_host}}',
8,
8,
'short',
|||
label_replace(
- rate(ceph_rgw_put_initial_lat_sum[30s]) /
- rate(ceph_rgw_put_initial_lat_count[30s]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,
+ rate(ceph_rgw_put_initial_lat_sum{%(matchers)s}[$__rate_interval]) /
+ rate(ceph_rgw_put_initial_lat_count{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
)
- |||,
+ ||| % u.matchers(),
'{{rgw_host}}',
15,
8,
'short',
|||
sum(
- irate(
- haproxy_frontend_http_responses_total{code=~"$code", instance=~"$ingress_service", proxy=~"frontend"}[5m]
+ rate(
+ haproxy_frontend_http_responses_total{code=~"$code", job=~"$job_haproxy", instance=~"$ingress_service", proxy=~"frontend"}[$__rate_interval]
)
) by (code)
|||,
u.addTargetSchema(
|||
sum(
- irate(
- haproxy_backend_http_responses_total{code=~"$code", instance=~"$ingress_service", proxy=~"backend"}[5m]
+ rate(
+ haproxy_backend_http_responses_total{code=~"$code", job=~"$job_haproxy", instance=~"$ingress_service", proxy=~"backend"}[$__rate_interval]
)
) by (code)
|||, 'Backend {{ code }}'
'short',
|||
sum(
- irate(
- haproxy_frontend_http_requests_total{proxy=~"frontend", instance=~"$ingress_service"}[5m]
+ rate(
+ haproxy_frontend_http_requests_total{proxy=~"frontend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
)
) by (instance)
|||,
u.addTargetSchema(
|||
sum(
- irate(
- haproxy_backend_response_errors_total{proxy=~"backend",instance=~"$ingress_service"}[5m]
+ rate(
+ haproxy_backend_response_errors_total{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
)
) by (instance)
|||, 'Response errors', 'time_series', 2
u.addTargetSchema(
|||
sum(
- irate(
- haproxy_frontend_request_errors_total{proxy=~"frontend", instance=~"$ingress_service"}[5m]
+ rate(
+ haproxy_frontend_request_errors_total{proxy=~"frontend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
)
) by (instance)
|||, 'Requests errors'
u.addTargetSchema(
|||
sum(
- irate(
- haproxy_backend_redispatch_warnings_total{proxy=~"backend", instance=~"$ingress_service"}[5m]
+ rate(
+ haproxy_backend_redispatch_warnings_total{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
)
) by (instance)
|||, 'Backend redispatch', 'time_series', 2
u.addTargetSchema(
|||
sum(
- irate(
- haproxy_backend_retry_warnings_total{proxy=~"backend", instance=~"$ingress_service"}[5m]
+ rate(
+ haproxy_backend_retry_warnings_total{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
)
) by (instance)
|||, 'Backend retry', 'time_series', 2
u.addTargetSchema(
|||
sum(
- irate(
- haproxy_frontend_requests_denied_total{proxy=~"frontend", instance=~"$ingress_service"}[5m]
+ rate(
+ haproxy_frontend_requests_denied_total{proxy=~"frontend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
)
) by (instance)
|||, 'Request denied', 'time_series', 2
u.addTargetSchema(
|||
sum(
- haproxy_backend_current_queue{proxy=~"backend", instance=~"$ingress_service"}
+ haproxy_backend_current_queue{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}
) by (instance)
|||, 'Backend Queued', 'time_series', 2
),
'short',
|||
sum(
- irate(
- haproxy_frontend_connections_total{proxy=~"frontend", instance=~"$ingress_service"}[5m]
+ rate(
+ haproxy_frontend_connections_total{proxy=~"frontend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
)
) by (instance)
|||,
u.addTargetSchema(
|||
sum(
- irate(
- haproxy_backend_connection_attempts_total{proxy=~"backend", instance=~"$ingress_service"}[5m]
+ rate(
+ haproxy_backend_connection_attempts_total{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
)
) by (instance)
|||, 'Back'
u.addTargetSchema(
|||
sum(
- irate(
- haproxy_backend_connection_errors_total{proxy=~"backend", instance=~"$ingress_service"}[5m]
+ rate(
+ haproxy_backend_connection_errors_total{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
)
) by (instance)
|||, 'Back errors'
'short',
|||
sum(
- irate(
- haproxy_frontend_bytes_in_total{proxy=~"frontend", instance=~"$ingress_service"}[5m]
+ rate(
+ haproxy_frontend_bytes_in_total{proxy=~"frontend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
) * 8
) by (instance)
|||,
u.addTargetSchema(
|||
sum(
- irate(
- haproxy_frontend_bytes_out_total{proxy=~"frontend", instance=~"$ingress_service"}[5m]
+ rate(
+ haproxy_frontend_bytes_out_total{proxy=~"frontend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
) * 8
) by (instance)
|||, 'OUT Front', 'time_series', 2
u.addTargetSchema(
|||
sum(
- irate(
- haproxy_backend_bytes_in_total{proxy=~"backend", instance=~"$ingress_service"}[5m]
+ rate(
+ haproxy_backend_bytes_in_total{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
) * 8
) by (instance)
|||, 'IN Back', 'time_series', 2
u.addTargetSchema(
|||
sum(
- irate(
- haproxy_backend_bytes_out_total{proxy=~"backend", instance=~"$ingress_service"}[5m]
+ rate(
+ haproxy_backend_bytes_out_total{proxy=~"backend", job=~"$job_haproxy", instance=~"$ingress_service"}[$__rate_interval]
) * 8
) by (instance)
|||, 'OUT Back', 'time_series', 2
'now-1h',
'15s',
16,
- ['overview'],
+ c.dashboardTags + ['overview'],
'',
{
refresh_intervals: ['5s', '10s', '15s', '30s', '1m', '5m', '15m', '30m', '1h', '2h', '1d'],
'default',
label='Data Source')
)
+ .addTemplate(
+ u.addClusterTemplate()
+ )
+ .addTemplate(
+ u.addJobTemplate()
+ )
.addTemplate(
u.addTemplateSchema('rgw_servers',
'$datasource',
- 'label_values(ceph_rgw_metadata, ceph_daemon)',
+ 'label_values(ceph_rgw_metadata{%(matchers)s}, ceph_daemon)' % u.matchers(),
1,
true,
1,
'short',
|||
sum by (instance_id) (
- rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s])
- ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}
- |||,
+ rate(ceph_rgw_get_initial_lat_sum{%(matchers)s}[$__rate_interval]) /
+ rate(ceph_rgw_get_initial_lat_count{%(matchers)s}[$__rate_interval])
+ ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % u.matchers(),
|||
sum by (instance_id) (
- rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s])
- ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}
- |||,
+ rate(ceph_rgw_put_initial_lat_sum{%(matchers)s}[$__rate_interval]) /
+ rate(ceph_rgw_put_initial_lat_count{%(matchers)s}[$__rate_interval])
+ ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % u.matchers(),
'GET {{ceph_daemon}}',
'PUT {{ceph_daemon}}',
0,
'bytes',
'short',
|||
- rate(ceph_rgw_get_b[30s]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}
- |||,
+ rate(ceph_rgw_get_b{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % u.matchers(),
|||
- rate(ceph_rgw_put_b[30s]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}
- |||,
+ rate(ceph_rgw_put_b{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon)
+ ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % u.matchers(),
'GETs {{ceph_daemon}}',
'PUTs {{ceph_daemon}}',
6,
'short',
'short',
|||
- rate(ceph_rgw_failed_req[30s]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}
- |||,
+ rate(ceph_rgw_failed_req{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s,ceph_daemon=~"$rgw_servers"}
+ ||| % u.matchers(),
|||
- rate(ceph_rgw_get[30s]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}
- |||,
+ rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % u.matchers(),
'Requests Failed {{ceph_daemon}}',
'GETs {{ceph_daemon}}',
13,
[
u.addTargetSchema(
|||
- rate(ceph_rgw_put[30s]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}
- |||,
+ rate(ceph_rgw_put{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % u.matchers(),
'PUTs {{ceph_daemon}}'
),
u.addTargetSchema(
|||
(
- rate(ceph_rgw_req[30s]) -
- (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))
- ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}
- |||,
+ rate(ceph_rgw_req{%(matchers)s}[$__rate_interval]) -
+ (
+ rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) +
+ rate(ceph_rgw_put{%(matchers)s}[$__rate_interval])
+ )
+ ) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % u.matchers(),
'Other {{ceph_daemon}}'
),
]
)
.addTarget(u.addTargetSchema(
|||
- rate(ceph_rgw_failed_req[30s]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}
- |||,
+ rate(ceph_rgw_failed_req{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % u.matchers(),
'Failures {{ceph_daemon}}'
))
.addTarget(u.addTargetSchema(
|||
- rate(ceph_rgw_get[30s]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}
- |||,
+ rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % u.matchers(),
'GETs {{ceph_daemon}}'
))
.addTarget(u.addTargetSchema(
|||
- rate(ceph_rgw_put[30s]) *
- on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}
- |||,
+ rate(ceph_rgw_put{%(matchers)s}[$__rate_interval]) *
+ on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % u.matchers(),
'PUTs {{ceph_daemon}}'
))
.addTarget(u.addTargetSchema(
|||
(
- rate(ceph_rgw_req[30s]) -
- (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))
+ rate(ceph_rgw_req{%(matchers)s}[$__rate_interval]) -
+ (
+ rate(ceph_rgw_get{%(matchers)s}[$__rate_interval]) +
+ rate(ceph_rgw_put{%(matchers)s}[$__rate_interval])
+ )
) * on (instance_id) group_left (ceph_daemon)
- ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}
- |||,
+ ceph_rgw_metadata{%(matchers)s, ceph_daemon=~"$rgw_servers"}
+ ||| % u.matchers(),
'Other (DELETE,LIST) {{ceph_daemon}}'
)) + { gridPos: { x: 20, y: 1, w: 4, h: 8 } },
]),
local g = import 'grafonnet/grafana.libsonnet';
+local c = (import '../mixin.libsonnet')._config;
{
dashboardSchema(title,
includeAll,
sort,
label,
- regex)::
+ regex,
+ hide='',
+ multi=false,
+ allValues=null)::
g.template.new(name=name,
datasource=datasource,
query=query,
includeAll=includeAll,
sort=sort,
label=label,
- regex=regex),
+ regex=regex,
+ hide=hide,
+ multi=multi,
+ allValues=allValues),
addAnnotationSchema(builtIn,
datasource,
unit: unit,
valueMaps: valueMaps,
},
+
+ matchers()::
+ local jobMatcher = 'job=~"$job"';
+ local clusterMatcher = '%s=~"$cluster"' % c.clusterLabel;
+ {
+ // Common labels
+ jobMatcher: jobMatcher,
+ clusterMatcher: clusterMatcher,
+ matchers: '%s, %s' % [jobMatcher, clusterMatcher],
+ },
+
+ addClusterTemplate()::
+ $.addTemplateSchema(
+ 'cluster',
+ '$datasource',
+ 'label_values(ceph_osd_metadata, cluster)',
+ 1,
+ true,
+ 1,
+ 'cluster',
+ '(.*)',
+ if !c.showMultiCluster then 'variable' else '',
+ multi=true,
+ allValues='.+',
+ ),
+
+ addJobTemplate()::
+ $.addTemplateSchema(
+ 'job',
+ '$datasource',
+ 'label_values(ceph_osd_metadata{%(clusterMatcher)s}, job)' % $.matchers(),
+ 1,
+ true,
+ 1,
+ 'job',
+ '(.*)',
+ multi=true,
+ allValues='.+',
+ ),
}
--- /dev/null
+exclusions:
+ template-instance-rule:
+ reason: "Instance template not needed because of ceph-mgr leader election."
+ target-instance-rule:
+ reason: "Instance matcher not needed because of ceph-mgr leader election."
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(ceph_objecter_op_r{ceph_daemon=~\"($mds_servers).*\"}[1m]))",
+ "expr": "sum(rate(ceph_objecter_op_r{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Read Ops",
"refId": "A"
},
{
- "expr": "sum(rate(ceph_objecter_op_w{ceph_daemon=~\"($mds_servers).*\"}[1m]))",
+ "expr": "sum(rate(ceph_objecter_op_w{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Write Ops",
"steppedLine": false,
"targets": [
{
- "expr": "ceph_mds_server_handle_client_request{ceph_daemon=~\"($mds_servers).*\"}",
+ "expr": "ceph_mds_server_handle_client_request{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"($mds_servers).*\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{ceph_daemon}}",
"rows": [ ],
"schemaVersion": 16,
"style": "dark",
- "tags": [ ],
+ "tags": [
+ "ceph-mixin"
+ ],
"templating": {
"list": [
{
"regex": "",
"type": "datasource"
},
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"allValue": null,
"current": { },
"multi": false,
"name": "mds_servers",
"options": [ ],
- "query": "label_values(ceph_mds_inodes, ceph_daemon)",
+ "query": "label_values(ceph_mds_inodes{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)",
"refresh": 1,
"regex": "",
"sort": 1,
"tableColumn": "",
"targets": [
{
- "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{hostname='$ceph_hosts'}))",
+ "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{job=~\"$job\", cluster=~\"$cluster\", hostname='$ceph_hosts'}))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"steppedLine": false,
"targets": [
{
- "expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[1m])\n) / (\n scalar(\n sum(irate(node_cpu{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[1m]) or\n irate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[1m]))\n ) * 100\n)\n",
+ "expr": "sum by (mode) (\n rate(node_cpu{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[$__rate_interval]) or\n rate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[$__rate_interval])\n) / (\n scalar(\n sum(rate(node_cpu{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]))\n ) * 100\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{mode}}",
"steppedLine": false,
"targets": [
{
- "expr": "sum by (device) (\n irate(\n node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[1m]\n )\n)\n",
+ "expr": "sum by (device) (\n rate(\n node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval]\n )\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}.rx",
"refId": "A"
},
{
- "expr": "sum by (device) (\n irate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[1m])\n)\n",
+ "expr": "sum by (device) (\n rate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval])\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}.tx",
"steppedLine": false,
"targets": [
{
- "expr": "irate(node_network_receive_drop{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m]) or\n irate(node_network_receive_drop_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m])\n",
+ "expr": "rate(node_network_receive_drop{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_receive_drop_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}.rx",
"refId": "A"
},
{
- "expr": "irate(node_network_transmit_drop{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m]) or\n irate(node_network_transmit_drop_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m])\n",
+ "expr": "rate(node_network_transmit_drop{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_transmit_drop_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}.tx",
"tableColumn": "",
"targets": [
{
- "expr": "sum(\n ceph_osd_stat_bytes and\n on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}\n)\n",
+ "expr": "sum(\n ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} and\n on (ceph_daemon) ceph_disk_occupation{job=~\"$job\", cluster=~\"$cluster\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"steppedLine": false,
"targets": [
{
- "expr": "irate(node_network_receive_errs{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m]) or\n irate(node_network_receive_errs_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m])\n",
+ "expr": "rate(node_network_receive_errs{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_receive_errs_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}.rx",
"refId": "A"
},
{
- "expr": "irate(node_network_transmit_errs{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m]) or\n irate(node_network_transmit_errs_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[1m])\n",
+ "expr": "rate(node_network_transmit_errs{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_transmit_errs_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}.tx",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n (\n irate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n (\n rate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}}) writes",
"refId": "A"
},
{
- "expr": "label_replace(\n (\n irate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]) or\n irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human,\"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n (\n rate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\"},\"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}}) reads",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n (\n irate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]) or\n irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n",
+ "expr": "label_replace(\n (\n rate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}}) write",
"refId": "A"
},
{
- "expr": "label_replace(\n (\n irate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]) or\n irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m])\n ),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n",
+ "expr": "label_replace(\n (\n rate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}}) read",
"steppedLine": false,
"targets": [
{
- "expr": "max by(instance, device) (label_replace(\n (irate(node_disk_write_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m])) /\n clamp_min(irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]), 0.001) or\n (irate(node_disk_read_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m])) /\n clamp_min(irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]), 0.001),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "max by(instance, device) (label_replace(\n (rate(node_disk_write_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])) /\n clamp_min(rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]), 0.001) or\n (rate(node_disk_read_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])) /\n clamp_min(rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]), 0.001),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}})",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n (\n (irate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]) / 10) or\n irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[5m]) * 100\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n (\n (rate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) / 10) or\n rate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) * 100\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}}({{ceph_daemon}})",
"schemaVersion": 16,
"style": "dark",
"tags": [
+ "ceph-mixin",
"overview"
],
"templating": {
"regex": "",
"type": "datasource"
},
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"allValue": null,
"current": { },
"multi": false,
"name": "ceph_hosts",
"options": [ ],
- "query": "label_values(node_scrape_collector_success, instance) ",
+ "query": "label_values({cluster=~\"$cluster\"}, instance)",
"refresh": 1,
"regex": "([^.:]*).*",
"sort": 3,
"tableColumn": "",
"targets": [
{
- "expr": "count(sum by (hostname) (ceph_osd_metadata))",
+ "expr": "count(sum by (hostname) (ceph_osd_metadata{job=~\"$job\", cluster=~\"$cluster\"}))",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"tableColumn": "",
"targets": [
{
- "expr": "avg(1 - (\n avg by(instance) (\n irate(node_cpu_seconds_total{mode=\\'idle\\',instance=~\\\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\\\"}[1m]) or\n irate(node_cpu{mode=\\'idle\\',instance=~\\\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\\\"}[1m])\n )\n))\n",
+ "expr": "avg(1 - (\n avg by(instance) (\n rate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval]) or\n rate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval])\n )\n))\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"tableColumn": "",
"targets": [
{
- "expr": "avg ((\n (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) - ((\n node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (\n node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n )\n )\n)\n(\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"}\n))\n",
+ "expr": "avg ((\n (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) - ((\n node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (\n node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n )\n )\n) / (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"}\n))\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"tableColumn": "",
"targets": [
{
- "expr": "sum ((\n irate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[5m]) or\n irate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[5m])\n) + (\n irate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[5m]) or\n irate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[5m])\n))\n",
+ "expr": "sum ((\n rate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[$__rate_interval])\n) + (\n rate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[$__rate_interval])\n))\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"tableColumn": "",
"targets": [
{
- "expr": "avg (\n label_replace(\n (irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{instance=~\"($osd_hosts).*\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n",
+ "expr": "avg (\n label_replace(\n (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or\n (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", instance=~\"($osd_hosts).*\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"tableColumn": "",
"targets": [
{
- "expr": "sum (\n (\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n )\n",
+ "expr": "sum (\n (\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n)\n",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"steppedLine": false,
"targets": [
{
- "expr": "topk(10,\n 100 * (\n 1 - (\n avg by(instance) (\n irate(node_cpu_seconds_total{mode=\\'idle\\',instance=~\\\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\\\"}[1m]) or\n irate(node_cpu{mode=\\'idle\\',instance=~\\\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\\\"}[1m])\n )\n )\n )\n)\n",
+ "expr": "topk(10,\n 100 * (\n 1 - (\n avg by(instance) (\n rate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval]) or\n rate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval])\n )\n )\n )\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"steppedLine": false,
"targets": [
{
- "expr": "topk(10, (sum by(instance) (\n(\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m])\n) +\n(\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m])\n) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\"))\n))\n",
+ "expr": "topk(10, (sum by(instance) (\n(\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) +\n(\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\"))\n))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"rows": [ ],
"schemaVersion": 16,
"style": "dark",
- "tags": [ ],
+ "tags": [
+ "ceph-mixin"
+ ],
"templating": {
"list": [
{
"regex": "",
"type": "datasource"
},
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"allValue": null,
"current": { },
"multi": false,
"name": "osd_hosts",
"options": [ ],
- "query": "label_values(ceph_disk_occupation, exported_instance)",
+ "query": "label_values(ceph_disk_occupation{job=~\"$job\", cluster=~\"$cluster\"}, exported_instance)",
"refresh": 1,
"regex": "([^.]*).*",
"sort": 1,
"multi": false,
"name": "mon_hosts",
"options": [ ],
- "query": "label_values(ceph_mon_metadata, ceph_daemon)",
+ "query": "label_values(ceph_mon_metadata{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)",
"refresh": 1,
"regex": "mon.(.*)",
"sort": 1,
"multi": false,
"name": "mds_hosts",
"options": [ ],
- "query": "label_values(ceph_mds_inodes, ceph_daemon)",
+ "query": "label_values(ceph_mds_inodes{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)",
"refresh": 1,
"regex": "mds.(.*)",
"sort": 1,
"multi": false,
"name": "rgw_hosts",
"options": [ ],
- "query": "label_values(ceph_rgw_metadata, ceph_daemon)",
+ "query": "label_values(ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)",
"refresh": 1,
"regex": "rgw.(.*)",
"sort": 1,
"steppedLine": false,
"targets": [
{
- "expr": "irate(ceph_osd_op_r_latency_sum{ceph_daemon=~\"$osd\"}[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m])\n",
+ "expr": "rate(ceph_osd_op_r_latency_sum{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "read",
"refId": "A"
},
{
- "expr": "irate(ceph_osd_op_w_latency_sum{ceph_daemon=~\"$osd\"}[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m])\n",
+ "expr": "rate(ceph_osd_op_w_latency_sum{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "write",
"steppedLine": false,
"targets": [
{
- "expr": "irate(ceph_osd_op_r{ceph_daemon=~\"$osd\"}[1m])",
+ "expr": "rate(ceph_osd_op_r{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Reads",
"refId": "A"
},
{
- "expr": "irate(ceph_osd_op_w{ceph_daemon=~\"$osd\"}[1m])",
+ "expr": "rate(ceph_osd_op_w{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Writes",
"steppedLine": false,
"targets": [
{
- "expr": "irate(ceph_osd_op_r_out_bytes{ceph_daemon=~\"$osd\"}[1m])",
+ "expr": "rate(ceph_osd_op_r_out_bytes{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Read Bytes",
"refId": "A"
},
{
- "expr": "irate(ceph_osd_op_w_in_bytes{ceph_daemon=~\"$osd\"}[1m])",
+ "expr": "rate(ceph_osd_op_w_in_bytes{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Write Bytes",
"steppedLine": false,
"targets": [
{
- "expr": "(\n label_replace(\n irate(node_disk_read_time_seconds_total[1m]) / irate(node_disk_reads_completed_total[1m]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n",
+ "expr": "(\n label_replace(\n rate(node_disk_read_time_seconds_total{cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(node_disk_reads_completed_total{cluster=~\"$cluster\"}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}/{{device}} Reads",
"refId": "A"
},
{
- "expr": "(\n label_replace(\n irate(node_disk_write_time_seconds_total[1m]) / irate(node_disk_writes_completed_total[1m]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n",
+ "expr": "(\n label_replace(\n rate(node_disk_write_time_seconds_total{cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(node_disk_writes_completed_total{cluster=~\"$cluster\"}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}/{{device}} Writes",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n irate(node_disk_writes_completed_total[1m]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n rate(node_disk_writes_completed_total{cluster=~\"$cluster\"}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}} on {{instance}} Writes",
"refId": "A"
},
{
- "expr": "label_replace(\n irate(node_disk_reads_completed_total[1m]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n rate(node_disk_reads_completed_total{cluster=~\"$cluster\"}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}} on {{instance}} Reads",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n irate(node_disk_read_bytes_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n rate(node_disk_read_bytes_total{cluster=~\"$cluster\"}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}} {{device}} Reads",
"refId": "A"
},
{
- "expr": "label_replace(\n irate(node_disk_written_bytes_total[1m]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n rate(node_disk_written_bytes_total{cluster=~\"$cluster\"}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}} {{device}} Writes",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n irate(node_disk_io_time_seconds_total[1m]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
+ "expr": "label_replace(\n rate(node_disk_io_time_seconds_total{cluster=~\"$cluster\"}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device}} on {{instance}}",
"rows": [ ],
"schemaVersion": 16,
"style": "dark",
- "tags": [ ],
+ "tags": [
+ "ceph-mixin"
+ ],
"templating": {
"list": [
{
"regex": "",
"type": "datasource"
},
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"allValue": null,
"current": { },
"multi": false,
"name": "osd",
"options": [ ],
- "query": "label_values(ceph_osd_metadata,ceph_daemon)",
+ "query": "label_values(ceph_osd_metadata{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)",
"refresh": 1,
"regex": "(.*)",
"sort": 1,
"steppedLine": false,
"targets": [
{
- "expr": "avg (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)",
+ "expr": "avg (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) * 1000\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "AVG read",
"refId": "A"
},
{
- "expr": "max(\n irate(ceph_osd_op_r_latency_sum[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000\n)\n",
+ "expr": "max(\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) * 1000\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "MAX read",
"refId": "B"
},
{
- "expr": "quantile(0.95,\n (\n irate(ceph_osd_op_r_latency_sum[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m])\n * 1000\n )\n)\n",
+ "expr": "quantile(0.95,\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n * 1000\n )\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "@95%ile",
],
"targets": [
{
- "expr": "topk(10,\n (sort(\n (\n irate(ceph_osd_op_r_latency_sum[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) *\n 1000\n )\n ))\n)\n",
+ "expr": "topk(10,\n (sort(\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n 1000\n )\n ))\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"steppedLine": false,
"targets": [
{
- "expr": "avg(\n irate(ceph_osd_op_w_latency_sum[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m])\n * 1000\n)\n",
+ "expr": "avg(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n * 1000\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "AVG write",
"refId": "A"
},
{
- "expr": "max(\n irate(ceph_osd_op_w_latency_sum[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) *\n 1000\n)\n",
+ "expr": "max(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n 1000\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "MAX write",
"refId": "B"
},
{
- "expr": "quantile(0.95, (\n irate(ceph_osd_op_w_latency_sum[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) *\n 1000\n))\n",
+ "expr": "quantile(0.95, (\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n 1000\n))\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "@95%ile write",
],
"targets": [
{
- "expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_w_latency_sum[1m]) /\n on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) *\n 1000)\n ))\n)\n",
+ "expr": "topk(10,\n (sort(\n (rate(ceph_osd_op_w_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n 1000)\n ))\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"pieType": "pie",
"targets": [
{
- "expr": "count by (device_class) (ceph_osd_metadata)",
+ "expr": "count by (device_class) (ceph_osd_metadata{job=~\"$job\", cluster=~\"$cluster\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{device_class}}",
"pieType": "pie",
"targets": [
{
- "expr": "count(ceph_bluefs_wal_total_bytes)",
+ "expr": "count(ceph_bluefs_wal_total_bytes{job=~\"$job\", cluster=~\"$cluster\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "bluestore",
"refId": "A"
},
{
- "expr": "absent(ceph_bluefs_wal_total_bytes)*count(ceph_osd_metadata)",
+ "expr": "absent(ceph_bluefs_wal_total_bytesjob=~\"$job\", cluster=~\"$cluster\") * count(ceph_osd_metadata{job=~\"$job\", cluster=~\"$cluster\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "filestore",
"pieType": "pie",
"targets": [
{
- "expr": "count(ceph_osd_stat_bytes < 1099511627776)",
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} < 1099511627776)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<1TB",
"refId": "A"
},
{
- "expr": "count(ceph_osd_stat_bytes >= 1099511627776 < 2199023255552)",
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 1099511627776 < 2199023255552)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<2TB",
"refId": "B"
},
{
- "expr": "count(ceph_osd_stat_bytes >= 2199023255552 < 3298534883328)",
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 2199023255552 < 3298534883328)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<3TB",
"refId": "C"
},
{
- "expr": "count(ceph_osd_stat_bytes >= 3298534883328 < 4398046511104)",
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 3298534883328 < 4398046511104)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<4TB",
"refId": "D"
},
{
- "expr": "count(ceph_osd_stat_bytes >= 4398046511104 < 6597069766656)",
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 4398046511104 < 6597069766656)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<6TB",
"refId": "E"
},
{
- "expr": "count(ceph_osd_stat_bytes >= 6597069766656 < 8796093022208)",
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 6597069766656 < 8796093022208)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<8TB",
"refId": "F"
},
{
- "expr": "count(ceph_osd_stat_bytes >= 8796093022208 < 10995116277760)",
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 8796093022208 < 10995116277760)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<10TB",
"refId": "G"
},
{
- "expr": "count(ceph_osd_stat_bytes >= 10995116277760 < 13194139533312)",
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 10995116277760 < 13194139533312)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<12TB",
"refId": "H"
},
{
- "expr": "count(ceph_osd_stat_bytes >= 13194139533312)",
+ "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 13194139533312)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "<12TB+",
"steppedLine": false,
"targets": [
{
- "expr": "ceph_osd_numpg",
+ "expr": "ceph_osd_numpg{job=~\"$job\", cluster=~\"$cluster\"}",
"format": "time_series",
"instant": true,
"intervalFactor": 1,
"tableColumn": "",
"targets": [
{
- "expr": "sum(ceph_bluestore_onode_hits) / (\n sum(ceph_bluestore_onode_hits) +\n sum(ceph_bluestore_onode_misses)\n)\n",
+ "expr": "sum(ceph_bluestore_onode_hits{job=~\"$job\", cluster=~\"$cluster\"}) / (\n sum(ceph_bluestore_onode_hits{job=~\"$job\", cluster=~\"$cluster\"}) +\n sum(ceph_bluestore_onode_misses{job=~\"$job\", cluster=~\"$cluster\"})\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"steppedLine": false,
"targets": [
{
- "expr": "round(sum(irate(ceph_pool_rd[30s])))",
+ "expr": "round(sum(rate(ceph_pool_rd{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Reads",
"refId": "A"
},
{
- "expr": "round(sum(irate(ceph_pool_wr[30s])))",
+ "expr": "round(sum(rate(ceph_pool_wr{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Writes",
"rows": [ ],
"schemaVersion": 16,
"style": "dark",
- "tags": [ ],
+ "tags": [
+ "ceph-mixin"
+ ],
"templating": {
"list": [
{
"refresh": 1,
"regex": "",
"type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
}
]
},
"tableColumn": "",
"targets": [
{
- "expr": "(ceph_pool_stored / (ceph_pool_stored + ceph_pool_max_avail)) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}\n",
+ "expr": "(ceph_pool_stored{job=~\"$job\", cluster=~\"$cluster\"} / (ceph_pool_stored{job=~\"$job\", cluster=~\"$cluster\"} + ceph_pool_max_avail{job=~\"$job\", cluster=~\"$cluster\"})) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"#d44a3a"
],
"datasource": "$datasource",
- "description": "Time till pool is full assuming the average fill rate of the last 6 hours",
+ "description": "Time till pool is full assuming the average fill rate of the last 4 hours",
"format": "s",
"gauge": {
"maxValue": false,
"tableColumn": "",
"targets": [
{
- "expr": "(ceph_pool_max_avail / deriv(ceph_pool_stored[6h])) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"} > 0\n",
+ "expr": "(ceph_pool_max_avail{job=~\"$job\", cluster=~\"$cluster\"} / deriv(ceph_pool_stored{job=~\"$job\", cluster=~\"$cluster\"}[6h])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"} > 0\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "",
"steppedLine": false,
"targets": [
{
- "expr": "deriv(ceph_pool_objects[1m]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}\n",
+ "expr": "deriv(ceph_pool_objects{job=~\"$job\", cluster=~\"$cluster\"}[1m]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Objects per second",
"steppedLine": false,
"targets": [
{
- "expr": "irate(ceph_pool_rd[1m]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}\n",
+ "expr": "rate(ceph_pool_rd{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "reads",
"refId": "A"
},
{
- "expr": "irate(ceph_pool_wr[1m]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}\n",
+ "expr": "rate(ceph_pool_wr{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "writes",
"steppedLine": false,
"targets": [
{
- "expr": "irate(ceph_pool_rd_bytes[1m]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{name=~\"$pool_name\"}\n",
+ "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "reads",
"refId": "A"
},
{
- "expr": "irate(ceph_pool_wr_bytes[1m]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}\n",
+ "expr": "rate(ceph_pool_wr_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "writes",
"steppedLine": false,
"targets": [
{
- "expr": "ceph_pool_objects *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}\n",
+ "expr": "ceph_pool_objects{job=~\"$job\", cluster=~\"$cluster\"} *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Number of Objects",
"rows": [ ],
"schemaVersion": 22,
"style": "dark",
- "tags": [ ],
+ "tags": [
+ "ceph-mixin"
+ ],
"templating": {
"list": [
{
"current": {
- "text": "Prometheus admin.virt1.home.fajerski.name:9090",
- "value": "Prometheus admin.virt1.home.fajerski.name:9090"
+ "text": "default",
+ "value": "default"
},
"hide": 0,
"label": "Data Source",
"regex": "",
"type": "datasource"
},
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"allValue": null,
"current": { },
"multi": false,
"name": "pool_name",
"options": [ ],
- "query": "label_values(ceph_pool_metadata,name)",
+ "query": "label_values(ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}, name)",
"refresh": 1,
"regex": "",
"sort": 1,
"tableColumn": "",
"targets": [
{
- "expr": "count(ceph_pool_metadata)",
+ "expr": "count(ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"})",
"format": "table",
"instant": true,
"intervalFactor": 1,
"tableColumn": "",
"targets": [
{
- "expr": "count(ceph_pool_metadata{compression_mode!=\"none\"})",
+ "expr": "count(ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", compression_mode!=\"none\"})",
"format": "",
"intervalFactor": 1,
"legendFormat": "",
"tableColumn": "",
"targets": [
{
- "expr": "sum(ceph_osd_stat_bytes)",
+ "expr": "sum(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"})",
"format": "",
"intervalFactor": 1,
"legendFormat": "",
"tableColumn": "",
"targets": [
{
- "expr": "sum(ceph_pool_bytes_used)",
+ "expr": "sum(ceph_pool_bytes_used{job=~\"$job\", cluster=~\"$cluster\"})",
"format": "",
"instant": true,
"intervalFactor": 1,
"tableColumn": "",
"targets": [
{
- "expr": "sum(ceph_pool_stored)",
+ "expr": "sum(ceph_pool_stored{job=~\"$job\", cluster=~\"$cluster\"})",
"format": "",
"instant": true,
"intervalFactor": 1,
"tableColumn": "",
"targets": [
{
- "expr": "sum(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used)",
+ "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\", cluster=~\"$cluster\"}\n)\n",
"format": "",
"intervalFactor": 1,
"legendFormat": "",
"tableColumn": "",
"targets": [
{
- "expr": "(\n sum(ceph_pool_compress_under_bytes > 0) /\n sum(ceph_pool_stored_raw and ceph_pool_compress_under_bytes > 0)\n) * 100\n",
+ "expr": "(\n sum(ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} > 0) /\n sum(ceph_pool_stored_raw{job=~\"$job\", cluster=~\"$cluster\"} and ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} > 0)\n) * 100\n",
"format": "table",
"intervalFactor": 1,
"legendFormat": "",
"tableColumn": "",
"targets": [
{
- "expr": "sum(ceph_pool_compress_under_bytes > 0) / sum(ceph_pool_compress_bytes_used > 0)",
+ "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} > 0)\n / sum(ceph_pool_compress_bytes_used{job=~\"$job\", cluster=~\"$cluster\"} > 0\n)\n",
"format": "",
"intervalFactor": 1,
"legendFormat": "",
],
"targets": [
{
- "expr": "(ceph_pool_compress_under_bytes / ceph_pool_compress_bytes_used > 0) and on(pool_id) (\n ((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100 > 0.5\n)\n",
+ "expr": "(\n ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} /\n ceph_pool_compress_bytes_used{job=~\"$job\", cluster=~\"$cluster\"} > 0\n) and on(pool_id) (\n (\n (ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\", cluster=~\"$cluster\"}\n ) * 100 > 0.5\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "A"
},
{
- "expr": "ceph_pool_max_avail * on(pool_id) group_left(name) ceph_pool_metadata",
+ "expr": "ceph_pool_max_avail{job=~\"$job\", cluster=~\"$cluster\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "B"
},
{
- "expr": "((ceph_pool_compress_under_bytes > 0) / ceph_pool_stored_raw) * 100",
+ "expr": "(\n (ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\", cluster=~\"$cluster\"}\n) * 100\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "C"
},
{
- "expr": "(ceph_pool_percent_used * on(pool_id) group_left(name) ceph_pool_metadata)",
+ "expr": "ceph_pool_percent_used{job=~\"$job\", cluster=~\"$cluster\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "D"
},
{
- "expr": "(ceph_pool_compress_under_bytes - ceph_pool_compress_bytes_used > 0)",
+ "expr": "ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\", cluster=~\"$cluster\"} > 0\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "E"
},
{
- "expr": "delta(ceph_pool_stored[5d])",
+ "expr": "delta(ceph_pool_stored{job=~\"$job\", cluster=~\"$cluster\"}[5d])",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "F"
},
{
- "expr": "rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])",
+ "expr": "rate(ceph_pool_rd{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n + rate(ceph_pool_wr{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "G"
},
{
- "expr": "rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])",
+ "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "H"
},
{
- "expr": "ceph_pool_metadata",
+ "expr": "ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "I"
},
{
- "expr": "ceph_pool_stored * on(pool_id) group_left ceph_pool_metadata",
+ "expr": "ceph_pool_stored{job=~\"$job\", cluster=~\"$cluster\"} * on(pool_id) group_left ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}",
"format": "table",
"instant": true,
"intervalFactor": 1,
"refId": "J"
},
{
- "expr": "ceph_pool_metadata{compression_mode!=\"none\"}",
+ "expr": "ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", compression_mode!=\"none\"}",
"format": "table",
"instant": true,
"intervalFactor": 1,
"steppedLine": false,
"targets": [
{
- "expr": "topk($topk,\n round(\n (rate(ceph_pool_rd[30s]) + rate(ceph_pool_wr[30s])),\n 1\n ) * on(pool_id) group_left(instance,name) ceph_pool_metadata)\n",
+ "expr": "topk($topk,\n round(\n (\n rate(ceph_pool_rd{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n rate(ceph_pool_wr{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n ), 1\n ) * on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"})\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{name}} ",
"refId": "A"
},
{
- "expr": "topk($topk,\n rate(ceph_pool_wr[30s]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata\n)\n",
+ "expr": "topk($topk,\n rate(ceph_pool_wr{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{name}} - write",
"steppedLine": false,
"targets": [
{
- "expr": "topk($topk,\n (rate(ceph_pool_rd_bytes[30s]) + rate(ceph_pool_wr_bytes[30s])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata\n)\n",
+ "expr": "topk($topk,\n (\n rate(ceph_pool_rd_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n ) * on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{name}}",
"steppedLine": false,
"targets": [
{
- "expr": "ceph_pool_bytes_used * on(pool_id) group_right ceph_pool_metadata",
+ "expr": "ceph_pool_bytes_used{job=~\"$job\", cluster=~\"$cluster\"} * on(pool_id) group_right ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{name}}",
"rows": [ ],
"schemaVersion": 22,
"style": "dark",
- "tags": [ ],
+ "tags": [
+ "ceph-mixin"
+ ],
"templating": {
"list": [
{
"current": {
- "text": "Dashboard1",
- "value": "Dashboard1"
+ "text": "default",
+ "value": "default"
},
"hide": 0,
"label": "Data Source",
"regex": "",
"type": "datasource"
},
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"allValue": null,
"current": {
"steppedLine": false,
"targets": [
{
- "expr": "sum by (instance_id) (\n rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "sum by (instance_id) (\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GET {{ceph_daemon}}",
"refId": "A"
},
{
- "expr": "sum by (instance_id) (\n rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "sum by (instance_id) (\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUT {{ceph_daemon}}",
"steppedLine": false,
"targets": [
{
- "expr": "rate(ceph_rgw_get_b[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_get_b{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GETs {{ceph_daemon}}",
"refId": "A"
},
{
- "expr": "rate(ceph_rgw_put_b[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_put_b{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUTs {{ceph_daemon}}",
"steppedLine": false,
"targets": [
{
- "expr": "rate(ceph_rgw_failed_req[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_failed_req{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\",ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Requests Failed {{ceph_daemon}}",
"refId": "A"
},
{
- "expr": "rate(ceph_rgw_get[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_get{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GETs {{ceph_daemon}}",
"refId": "B"
},
{
- "expr": "rate(ceph_rgw_put[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_put{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUTs {{ceph_daemon}}",
"refId": "C"
},
{
- "expr": "(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "(\n rate(ceph_rgw_req{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Other {{ceph_daemon}}",
"pieType": "pie",
"targets": [
{
- "expr": "rate(ceph_rgw_failed_req[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_failed_req{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Failures {{ceph_daemon}}",
"refId": "A"
},
{
- "expr": "rate(ceph_rgw_get[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_get{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GETs {{ceph_daemon}}",
"refId": "B"
},
{
- "expr": "rate(ceph_rgw_put[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "rate(ceph_rgw_put{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUTs {{ceph_daemon}}",
"refId": "C"
},
{
- "expr": "(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}\n",
+ "expr": "(\n rate(ceph_rgw_req{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}",
"schemaVersion": 16,
"style": "dark",
"tags": [
+ "ceph-mixin",
"overview"
],
"templating": {
"regex": "",
"type": "datasource"
},
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"allValue": null,
"current": { },
"multi": false,
"name": "rgw_servers",
"options": [ ],
- "query": "label_values(ceph_rgw_metadata, ceph_daemon)",
+ "query": "label_values(ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)",
"refresh": 1,
"regex": "",
"sort": 1,
"steppedLine": false,
"targets": [
{
- "expr": "rate(ceph_rgw_get_initial_lat_sum[30s]) /\n rate(ceph_rgw_get_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata\n",
+ "expr": "rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GET AVG",
"refId": "A"
},
{
- "expr": "rate(ceph_rgw_put_initial_lat_sum[30s]) /\n rate(ceph_rgw_put_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata\n",
+ "expr": "rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"}\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUT AVG",
"steppedLine": false,
"targets": [
{
- "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n",
+ "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum[30s]) /\n rate(ceph_rgw_get_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
"steppedLine": false,
"targets": [
{
- "expr": "sum(rate(ceph_rgw_get_b[30s]))",
+ "expr": "sum(rate(ceph_rgw_get_b{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "GETs",
"refId": "A"
},
{
- "expr": "sum(rate(ceph_rgw_put_b[30s]))",
+ "expr": "sum(rate(ceph_rgw_put_b{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "PUTs",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b[30s]) +\n rate(ceph_rgw_put_b[30s])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n rate(ceph_rgw_put_b{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
"steppedLine": false,
"targets": [
{
- "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum[30s]) /\n rate(ceph_rgw_put_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+ "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{rgw_host}}",
"steppedLine": false,
"targets": [
{
- "expr": "sum(\n irate(\n haproxy_frontend_http_responses_total{code=~\"$code\", instance=~\"$ingress_service\", proxy=~\"frontend\"}[5m]\n )\n) by (code)\n",
+ "expr": "sum(\n rate(\n haproxy_frontend_http_responses_total{code=~\"$code\", job=~\"$job_haproxy\", instance=~\"$ingress_service\", proxy=~\"frontend\"}[$__rate_interval]\n )\n) by (code)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Frontend {{ code }}",
"refId": "A"
},
{
- "expr": "sum(\n irate(\n haproxy_backend_http_responses_total{code=~\"$code\", instance=~\"$ingress_service\", proxy=~\"backend\"}[5m]\n )\n) by (code)\n",
+ "expr": "sum(\n rate(\n haproxy_backend_http_responses_total{code=~\"$code\", job=~\"$job_haproxy\", instance=~\"$ingress_service\", proxy=~\"backend\"}[$__rate_interval]\n )\n) by (code)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Backend {{ code }}",
"steppedLine": false,
"targets": [
{
- "expr": "sum(\n irate(\n haproxy_frontend_http_requests_total{proxy=~\"frontend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n",
+ "expr": "sum(\n rate(\n haproxy_frontend_http_requests_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Requests",
"refId": "A"
},
{
- "expr": "sum(\n irate(\n haproxy_backend_response_errors_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n",
+ "expr": "sum(\n rate(\n haproxy_backend_response_errors_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Response errors",
"refId": "B"
},
{
- "expr": "sum(\n irate(\n haproxy_frontend_request_errors_total{proxy=~\"frontend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n",
+ "expr": "sum(\n rate(\n haproxy_frontend_request_errors_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Requests errors",
"refId": "C"
},
{
- "expr": "sum(\n irate(\n haproxy_backend_redispatch_warnings_total{proxy=~\"backend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n",
+ "expr": "sum(\n rate(\n haproxy_backend_redispatch_warnings_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Backend redispatch",
"refId": "D"
},
{
- "expr": "sum(\n irate(\n haproxy_backend_retry_warnings_total{proxy=~\"backend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n",
+ "expr": "sum(\n rate(\n haproxy_backend_retry_warnings_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Backend retry",
"refId": "E"
},
{
- "expr": "sum(\n irate(\n haproxy_frontend_requests_denied_total{proxy=~\"frontend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n",
+ "expr": "sum(\n rate(\n haproxy_frontend_requests_denied_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Request denied",
"refId": "F"
},
{
- "expr": "sum(\n haproxy_backend_current_queue{proxy=~\"backend\", instance=~\"$ingress_service\"}\n) by (instance)\n",
+ "expr": "sum(\n haproxy_backend_current_queue{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}\n) by (instance)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Backend Queued",
"steppedLine": false,
"targets": [
{
- "expr": "sum(\n irate(\n haproxy_frontend_connections_total{proxy=~\"frontend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n",
+ "expr": "sum(\n rate(\n haproxy_frontend_connections_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Front",
"refId": "A"
},
{
- "expr": "sum(\n irate(\n haproxy_backend_connection_attempts_total{proxy=~\"backend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n",
+ "expr": "sum(\n rate(\n haproxy_backend_connection_attempts_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Back",
"refId": "B"
},
{
- "expr": "sum(\n irate(\n haproxy_backend_connection_errors_total{proxy=~\"backend\", instance=~\"$ingress_service\"}[5m]\n )\n) by (instance)\n",
+ "expr": "sum(\n rate(\n haproxy_backend_connection_errors_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Back errors",
"steppedLine": false,
"targets": [
{
- "expr": "sum(\n irate(\n haproxy_frontend_bytes_in_total{proxy=~\"frontend\", instance=~\"$ingress_service\"}[5m]\n ) * 8\n) by (instance)\n",
+ "expr": "sum(\n rate(\n haproxy_frontend_bytes_in_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "IN Front",
"refId": "A"
},
{
- "expr": "sum(\n irate(\n haproxy_frontend_bytes_out_total{proxy=~\"frontend\", instance=~\"$ingress_service\"}[5m]\n ) * 8\n) by (instance)\n",
+ "expr": "sum(\n rate(\n haproxy_frontend_bytes_out_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "OUT Front",
"refId": "B"
},
{
- "expr": "sum(\n irate(\n haproxy_backend_bytes_in_total{proxy=~\"backend\", instance=~\"$ingress_service\"}[5m]\n ) * 8\n) by (instance)\n",
+ "expr": "sum(\n rate(\n haproxy_backend_bytes_in_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "IN Back",
"refId": "C"
},
{
- "expr": "sum(\n irate(\n haproxy_backend_bytes_out_total{proxy=~\"backend\", instance=~\"$ingress_service\"}[5m]\n ) * 8\n) by (instance)\n",
+ "expr": "sum(\n rate(\n haproxy_backend_bytes_out_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "OUT Back",
"schemaVersion": 16,
"style": "dark",
"tags": [
+ "ceph-mixin",
"overview"
],
"templating": {
"list": [
+ {
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"allValue": null,
"current": { },
"multi": false,
"name": "rgw_servers",
"options": [ ],
- "query": "label_values(ceph_rgw_metadata, ceph_daemon)",
+ "query": "label_values(ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)",
"refresh": 1,
- "regex": "",
+ "regex": "RGW Server",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"multi": false,
"name": "code",
"options": [ ],
- "query": "label_values(haproxy_server_http_responses_total{instance=~\"$ingress_service\"}, code)",
+ "query": "label_values(haproxy_server_http_responses_total{job=~\"$job_haproxy\", instance=~\"$ingress_service\"}, code)",
"refresh": 1,
"regex": "",
"sort": 1,
"useTags": false
},
{
- "allValue": null,
+ "allValue": ".+",
"current": { },
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
- "label": "Ingress Service",
- "multi": false,
- "name": "ingress_service",
+ "label": "job haproxy",
+ "multi": true,
+ "name": "job_haproxy",
"options": [ ],
- "query": "label_values(haproxy_server_status, instance)",
+ "query": "label_values(haproxy_server_status, job)",
"refresh": 1,
- "regex": "",
+ "regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"useTags": false
},
{
- "current": {
- "text": "default",
- "value": "default"
- },
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
"hide": 0,
- "label": "Data Source",
- "name": "datasource",
+ "includeAll": true,
+ "label": "Ingress Service",
+ "multi": false,
+ "name": "ingress_service",
"options": [ ],
- "query": "prometheus",
+ "query": "label_values(haproxy_server_status{job=~\"$job_haproxy\"}, instance)",
"refresh": 1,
"regex": "",
- "type": "datasource"
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
}
]
},
"steppedLine": false,
"targets": [
{
- "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum[30s]))",
+ "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"steppedLine": false,
"targets": [
{
- "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count[30s]))",
+ "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"steppedLine": false,
"targets": [
{
- "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum[30s]))",
+ "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"steppedLine": false,
"targets": [
{
- "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors[30s]))",
+ "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{source_zone}}",
"schemaVersion": 16,
"style": "dark",
"tags": [
+ "ceph-mixin",
"overview"
],
"templating": {
"list": [
{
- "allValue": null,
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "hide": 0,
+ "label": "Data Source",
+ "name": "datasource",
+ "options": [ ],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
"current": { },
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
- "label": "",
- "multi": false,
- "name": "rgw_servers",
+ "label": "job",
+ "multi": true,
+ "name": "job",
"options": [ ],
- "query": "prometehus",
+ "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)",
"refresh": 1,
- "regex": "",
+ "regex": "(.*)",
"sort": 1,
"tagValuesQuery": "",
"tags": [ ],
"useTags": false
},
{
- "current": {
- "text": "default",
- "value": "default"
- },
+ "allValue": null,
+ "current": { },
+ "datasource": "$datasource",
"hide": 0,
- "label": "Data Source",
- "name": "datasource",
+ "includeAll": true,
+ "label": "",
+ "multi": false,
+ "name": "rgw_servers",
"options": [ ],
- "query": "prometheus",
+ "query": "label_values(ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)",
"refresh": 1,
- "regex": "",
- "type": "datasource"
+ "regex": "RGW Server",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
}
]
},
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "$Datasource",
+ "datasource": "$datasource",
"description": "",
"fill": 1,
"fillGradient": 0,
"steppedLine": false,
"targets": [
{
- "expr": "irate(ceph_rbd_write_ops{pool=\"$Pool\", image=\"$Image\"}[30s])",
+ "expr": "rate(ceph_rbd_write_ops{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Write",
"refId": "A"
},
{
- "expr": "irate(ceph_rbd_read_ops{pool=\"$Pool\", image=\"$Image\"}[30s])",
+ "expr": "rate(ceph_rbd_read_ops{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Read",
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "$Datasource",
+ "datasource": "$datasource",
"description": "",
"fill": 1,
"fillGradient": 0,
"steppedLine": false,
"targets": [
{
- "expr": "irate(ceph_rbd_write_bytes{pool=\"$Pool\", image=\"$Image\"}[30s])",
+ "expr": "rate(ceph_rbd_write_bytes{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Write",
"refId": "A"
},
{
- "expr": "irate(ceph_rbd_read_bytes{pool=\"$Pool\", image=\"$Image\"}[30s])",
+ "expr": "rate(ceph_rbd_read_bytes{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Read",
"bars": false,
"dashLength": 10,
"dashes": false,
- "datasource": "$Datasource",
+ "datasource": "$datasource",
"description": "",
"fill": 1,
"fillGradient": 0,
"steppedLine": false,
"targets": [
{
- "expr": "irate(ceph_rbd_write_latency_sum{pool=\"$Pool\", image=\"$Image\"}[30s]) /\n irate(ceph_rbd_write_latency_count{pool=\"$Pool\", image=\"$Image\"}[30s])\n",
+ "expr": "rate(ceph_rbd_write_latency_sum{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_write_latency_count{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Write",
"refId": "A"
},
{
- "expr": "irate(ceph_rbd_read_latency_sum{pool=\"$Pool\", image=\"$Image\"}[30s]) /\n irate(ceph_rbd_read_latency_count{pool=\"$Pool\", image=\"$Image\"}[30s])\n",
+ "expr": "rate(ceph_rbd_read_latency_sum{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_read_latency_count{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{pool}} Read",
"rows": [ ],
"schemaVersion": 16,
"style": "dark",
- "tags": [ ],
+ "tags": [
+ "ceph-mixin"
+ ],
"templating": {
"list": [
{
"value": "default"
},
"hide": 0,
- "label": null,
- "name": "Datasource",
+ "label": "Data Source",
+ "name": "datasource",
"options": [ ],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
{
"allValue": null,
"current": { },
- "datasource": "$Datasource",
+ "datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "",
"multi": false,
- "name": "Pool",
+ "name": "pool",
"options": [ ],
"query": "label_values(pool)",
"refresh": 1,
{
"allValue": null,
"current": { },
- "datasource": "$Datasource",
+ "datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "",
"multi": false,
- "name": "Image",
+ "name": "image",
"options": [ ],
"query": "label_values(image)",
"refresh": 1,
"steppedLine": false,
"targets": [
{
- "expr": "round(sum(irate(ceph_rbd_write_ops[30s])))",
+ "expr": "round(sum(rate(ceph_rbd_write_ops{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Writes",
"refId": "A"
},
{
- "expr": "round(sum(irate(ceph_rbd_read_ops[30s])))",
+ "expr": "round(sum(rate(ceph_rbd_read_ops{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Reads",
"steppedLine": false,
"targets": [
{
- "expr": "round(sum(irate(ceph_rbd_write_bytes[30s])))",
+ "expr": "round(sum(rate(ceph_rbd_write_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Write",
"refId": "A"
},
{
- "expr": "round(sum(irate(ceph_rbd_read_bytes[30s])))",
+ "expr": "round(sum(rate(ceph_rbd_read_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])))",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Read",
"steppedLine": false,
"targets": [
{
- "expr": "round(\n sum(irate(ceph_rbd_write_latency_sum[30s])) /\n sum(irate(ceph_rbd_write_latency_count[30s]))\n)\n",
+ "expr": "round(\n sum(rate(ceph_rbd_write_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_write_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Write",
"refId": "A"
},
{
- "expr": "round(\n sum(irate(ceph_rbd_read_latency_sum[30s])) /\n sum(irate(ceph_rbd_read_latency_count[30s]))\n)\n",
+ "expr": "round(\n sum(rate(ceph_rbd_read_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_read_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))\n)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "Read",
],
"targets": [
{
- "expr": "topk(10,\n (\n sort((\n irate(ceph_rbd_write_ops[30s]) +\n on (image, pool, namespace) irate(ceph_rbd_read_ops[30s])\n ))\n )\n)\n",
+ "expr": "topk(10,\n (\n sort((\n rate(ceph_rbd_write_ops{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n on (image, pool, namespace) rate(ceph_rbd_read_ops{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n ))\n )\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
],
"targets": [
{
- "expr": "topk(10,\n sort(\n sum(\n irate(ceph_rbd_read_bytes[30s]) + irate(ceph_rbd_write_bytes[30s])\n ) by (pool, image, namespace)\n )\n)\n",
+ "expr": "topk(10,\n sort(\n sum(\n rate(ceph_rbd_read_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n rate(ceph_rbd_write_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n ) by (pool, image, namespace)\n )\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
],
"targets": [
{
- "expr": "topk(10,\n sum(\n irate(ceph_rbd_write_latency_sum[30s]) / clamp_min(irate(ceph_rbd_write_latency_count[30s]), 1) +\n irate(ceph_rbd_read_latency_sum[30s]) / clamp_min(irate(ceph_rbd_read_latency_count[30s]), 1)\n ) by (pool, image, namespace)\n)\n",
+ "expr": "topk(10,\n sum(\n rate(ceph_rbd_write_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_write_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]), 1) +\n rate(ceph_rbd_read_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_read_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]), 1)\n ) by (pool, image, namespace)\n)\n",
"format": "table",
"instant": true,
"intervalFactor": 1,
"schemaVersion": 16,
"style": "dark",
"tags": [
+ "ceph-mixin",
"overview"
],
"templating": {
"refresh": 1,
"regex": "",
"type": "datasource"
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 2,
+ "includeAll": true,
+ "label": "cluster",
+ "multi": true,
+ "name": "cluster",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata, cluster)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".+",
+ "current": { },
+ "datasource": "$datasource",
+ "hide": 0,
+ "includeAll": true,
+ "label": "job",
+ "multi": true,
+ "name": "job",
+ "options": [ ],
+ "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)",
+ "refresh": 1,
+ "regex": "(.*)",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [ ],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
}
]
},