From: Arthur Outhenin-Chalandre Date: Wed, 18 May 2022 10:16:13 +0000 (+0200) Subject: ceph-mixin: rationalize local helper functions to utils X-Git-Tag: v18.0.0~780^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5db37300fde5c6cc2ec9f3ead34ea1b93126f5bf;p=ceph.git ceph-mixin: rationalize local helper functions to utils Signed-off-by: Arthur Outhenin-Chalandre --- diff --git a/monitoring/ceph-mixin/dashboards/cephfs.libsonnet b/monitoring/ceph-mixin/dashboards/cephfs.libsonnet index e18e295cd69..aa267dda78c 100644 --- a/monitoring/ceph-mixin/dashboards/cephfs.libsonnet +++ b/monitoring/ceph-mixin/dashboards/cephfs.libsonnet @@ -2,23 +2,6 @@ local g = import 'grafonnet/grafana.libsonnet'; (import 'utils.libsonnet') { 'cephfs-overview.json': - local CephfsOverviewGraphPanel(title, formatY1, labelY1, expr, legendFormat, x, y, w, h) = - $.graphPanelSchema({}, - title, - '', - 'null', - false, - formatY1, - 'short', - labelY1, - null, - 0, - 1, - '$datasource') - .addTargets( - [$.addTargetSchema(expr, legendFormat)] - ) + { gridPos: { x: x, y: y, w: w, h: h } }; - $.dashboardSchema( 'MDS Performance', '', @@ -71,10 +54,13 @@ local g = import 'grafonnet/grafana.libsonnet'; ) .addPanels([ $.addRowSchema(false, true, 'MDS Performance') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } }, - CephfsOverviewGraphPanel( + $.simpleGraphPanel( + {}, 'MDS Workload - $mds_servers', + '', 'none', 'Reads(-) / Writes (+)', + 0, 'sum(rate(ceph_objecter_op_r{%(matchers)s, ceph_daemon=~"($mds_servers).*"}[$__rate_interval]))' % $.matchers(), 'Read Ops', 0, @@ -89,10 +75,13 @@ local g = import 'grafonnet/grafana.libsonnet'; .addSeriesOverride( { alias: '/.*Reads/', transform: 'negative-Y' } ), - CephfsOverviewGraphPanel( + $.simpleGraphPanel( + {}, 'Client Request Load - $mds_servers', + '', 'none', 'Client Requests', + 0, 'ceph_mds_server_handle_client_request{%(matchers)s, ceph_daemon=~"($mds_servers).*"}' % $.matchers(), '{{ceph_daemon}}', 12, diff --git a/monitoring/ceph-mixin/dashboards/host.libsonnet b/monitoring/ceph-mixin/dashboards/host.libsonnet index d4724fc7d13..aea072852a6 100644 --- a/monitoring/ceph-mixin/dashboards/host.libsonnet +++ b/monitoring/ceph-mixin/dashboards/host.libsonnet @@ -2,41 +2,6 @@ local g = import 'grafonnet/grafana.libsonnet'; (import 'utils.libsonnet') { 'hosts-overview.json': - local HostsOverviewSingleStatPanel(format, - title, - description, - valueName, - expr, - instant, - x, - y, - w, - h) = - $.addSingleStatSchema(['#299c46', 'rgba(237, 129, 40, 0.89)', '#d44a3a'], - '$datasource', - format, - title, - description, - valueName, - false, - 100, - false, - false, - '') - .addTarget( - $.addTargetSchema(expr, '', 'time_series', 1, instant) - ) + { gridPos: { x: x, y: y, w: w, h: h } }; - - local HostsOverviewGraphPanel(title, description, formatY1, expr, legendFormat, x, y, w, h) = - $.graphPanelSchema( - {}, title, description, 'null', false, formatY1, 'short', null, null, 0, 1, '$datasource' - ) - .addTargets( - [$.addTargetSchema( - expr, legendFormat - )] - ) + { gridPos: { x: x, y: y, w: w, h: h } }; - $.dashboardSchema( 'Host Overview', '', @@ -124,19 +89,20 @@ local g = import 'grafonnet/grafana.libsonnet'; 'rgw.(.*)') ) .addPanels([ - HostsOverviewSingleStatPanel( + $.simpleSingleStatPanel( 'none', 'OSD Hosts', '', 'current', 'count(sum by (hostname) (ceph_osd_metadata{%(matchers)s}))' % $.matchers(), true, + 'time_series', 0, 0, 4, 5 ), - HostsOverviewSingleStatPanel( + $.simpleSingleStatPanel( 'percentunit', 'AVG CPU Busy', 'Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster', @@ -150,12 +116,13 @@ local g = import 'grafonnet/grafana.libsonnet'; )) |||, true, + 'time_series', 4, 0, 4, 5 ), - HostsOverviewSingleStatPanel( + $.simpleSingleStatPanel( 'percentunit', 'AVG RAM Utilization', 'Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)', @@ -185,12 +152,13 @@ local g = import 'grafonnet/grafana.libsonnet'; )) |||, true, + 'time_series', 8, 0, 4, 5 ), - HostsOverviewSingleStatPanel( + $.simpleSingleStatPanel( 'none', 'Physical IOPS', 'IOPS Load at the device as reported by the OS on all OSD hosts', @@ -205,12 +173,13 @@ local g = import 'grafonnet/grafana.libsonnet'; )) |||, true, + 'time_series', 12, 0, 4, 5 ), - HostsOverviewSingleStatPanel( + $.simpleSingleStatPanel( 'percent', 'AVG Disk Utilization', 'Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)', @@ -230,12 +199,13 @@ local g = import 'grafonnet/grafana.libsonnet'; ) ||| % $.matchers(), true, + 'time_series', 16, 0, 4, 5 ), - HostsOverviewSingleStatPanel( + $.simpleSingleStatPanel( 'bytes', 'Network Load', 'Total send/receive network load across all hosts in the ceph cluster', @@ -255,18 +225,21 @@ local g = import 'grafonnet/grafana.libsonnet'; ) unless on (device, instance) label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)") ) - ||| - , + |||, true, + 'time_series', 20, 0, 4, 5 ), - HostsOverviewGraphPanel( + $.simpleGraphPanel( + {}, 'CPU Busy - Top 10 Hosts', 'Show the top 10 busiest hosts by cpu', 'percent', + null, + 0, ||| topk(10, 100 * ( @@ -285,8 +258,14 @@ local g = import 'grafonnet/grafana.libsonnet'; 12, 9 ), - HostsOverviewGraphPanel( - 'Network Load - Top 10 Hosts', 'Top 10 hosts by network load', 'Bps', ||| + $.simpleGraphPanel( + {}, + 'Network Load - Top 10 Hosts', + 'Top 10 hosts by network load', + 'Bps', + null, + 0, + ||| topk(10, (sum by(instance) ( ( rate(node_network_receive_bytes{instance=~"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*",device!="lo"}[$__rate_interval]) or @@ -298,61 +277,15 @@ local g = import 'grafonnet/grafana.libsonnet'; ) unless on (device, instance) label_replace((bonding_slaves > 0), "device", "$1", "master", "(.+)")) )) - ||| - , '{{instance}}', 12, 5, 12, 9 + |||, + '{{instance}}', + 12, + 5, + 12, + 9 ), ]), 'host-details.json': - local HostDetailsSingleStatPanel(format, - title, - description, - valueName, - expr, - x, - y, - w, - h) = - $.addSingleStatSchema(['#299c46', 'rgba(237, 129, 40, 0.89)', '#d44a3a'], - '$datasource', - format, - title, - description, - valueName, - false, - 100, - false, - false, - '') - .addTarget($.addTargetSchema(expr)) + { gridPos: { x: x, y: y, w: w, h: h } }; - - local HostDetailsGraphPanel(alias, - title, - description, - nullPointMode, - formatY1, - labelY1, - expr, - legendFormat, - x, - y, - w, - h) = - $.graphPanelSchema(alias, - title, - description, - nullPointMode, - false, - formatY1, - 'short', - labelY1, - null, - null, - 1, - '$datasource') - .addTargets( - [$.addTargetSchema(expr, legendFormat)] - ) + { gridPos: { x: x, y: y, w: w, h: h } }; - $.dashboardSchema( 'Host Details', '', @@ -402,18 +335,20 @@ local g = import 'grafonnet/grafana.libsonnet'; ) .addPanels([ $.addRowSchema(false, true, '$ceph_hosts System Overview') + { gridPos: { x: 0, y: 0, w: 24, h: 1 } }, - HostDetailsSingleStatPanel( + $.simpleSingleStatPanel( 'none', 'OSDs', '', 'current', "count(sum by (ceph_daemon) (ceph_osd_metadata{%(matchers)s, hostname='$ceph_hosts'}))" % $.matchers(), + null, + 'time_series', 0, 1, 3, 5 ), - HostDetailsGraphPanel( + $.simpleGraphPanel( { interrupt: '#447EBC', steal: '#6D1F62', @@ -423,9 +358,9 @@ local g = import 'grafonnet/grafana.libsonnet'; }, 'CPU Utilization', "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown", - 'null', 'percent', '% Utilization', + null, ||| sum by (mode) ( rate(node_cpu{instance=~"($ceph_hosts)([\\\\.:].*)?", mode=~"(irq|nice|softirq|steal|system|user|iowait)"}[$__rate_interval]) or @@ -443,7 +378,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 6, 10 ), - HostDetailsGraphPanel( + $.simpleGraphPanel( { Available: '#508642', Free: '#508642', @@ -454,9 +389,9 @@ local g = import 'grafonnet/grafana.libsonnet'; }, 'RAM Usage', '', - 'null', 'bytes', 'RAM used', + null, ||| node_memory_MemFree{instance=~"$ceph_hosts([\\\\.:].*)?"} or node_memory_MemFree_bytes{instance=~"$ceph_hosts([\\\\.:].*)?"} @@ -526,13 +461,13 @@ local g = import 'grafonnet/grafana.libsonnet'; stack: false, } ), - HostDetailsGraphPanel( + $.simpleGraphPanel( {}, 'Network Load', "Show the network load (rx,tx) across all interfaces (excluding loopback 'lo')", - 'null', 'decbytes', 'Send (-) / Receive (+)', + null, ||| sum by (device) ( rate( @@ -563,13 +498,13 @@ local g = import 'grafonnet/grafana.libsonnet'; .addSeriesOverride( { alias: '/.*tx/', transform: 'negative-Y' } ), - HostDetailsGraphPanel( + $.simpleGraphPanel( {}, 'Network drop rate', '', - 'null', 'pps', 'Send (-) / Receive (+)', + null, ||| rate(node_network_receive_drop{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) or rate(node_network_receive_drop_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) @@ -597,7 +532,7 @@ local g = import 'grafonnet/grafana.libsonnet'; transform: 'negative-Y', } ), - HostDetailsSingleStatPanel( + $.simpleSingleStatPanel( 'bytes', 'Raw Capacity', 'Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.', @@ -608,18 +543,20 @@ local g = import 'grafonnet/grafana.libsonnet'; on (ceph_daemon) ceph_disk_occupation{%(matchers)s, instance=~"($ceph_hosts)([\\\\.:].*)?"} ) ||| % $.matchers(), + null, + 'time_series', 0, 6, 3, 5 ), - HostDetailsGraphPanel( + $.simpleGraphPanel( {}, 'Network error rate', '', - 'null', 'pps', 'Send (-) / Receive (+)', + null, ||| rate(node_network_receive_errs{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) or rate(node_network_receive_errs_total{instance=~"$ceph_hosts([\\\\.:].*)?"}[$__rate_interval]) @@ -648,13 +585,13 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addRowSchema(false, true, 'OSD Disk Performance Statistics') + { gridPos: { x: 0, y: 11, w: 24, h: 1 } }, - HostDetailsGraphPanel( + $.simpleGraphPanel( {}, '$ceph_hosts Disk IOPS', "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value", - 'connected', 'ops', 'Read (-) / Write (+)', + null, ||| label_replace( ( @@ -695,13 +632,13 @@ local g = import 'grafonnet/grafana.libsonnet'; .addSeriesOverride( { alias: '/.*reads/', transform: 'negative-Y' } ), - HostDetailsGraphPanel( + $.simpleGraphPanel( {}, '$ceph_hosts Throughput by Disk', 'For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id', - 'connected', 'Bps', 'Read (-) / Write (+)', + null, ||| label_replace( ( @@ -739,13 +676,13 @@ local g = import 'grafonnet/grafana.libsonnet'; .addSeriesOverride( { alias: '/.*read/', transform: 'negative-Y' } ), - HostDetailsGraphPanel( + $.simpleGraphPanel( {}, '$ceph_hosts Disk Latency', "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id", - 'null as zero', 's', '', + null, ||| max by(instance, device) (label_replace( (rate(node_disk_write_time_seconds_total{instance=~"($ceph_hosts)([\\\\.:].*)?"}[$__rate_interval])) / @@ -766,13 +703,13 @@ local g = import 'grafonnet/grafana.libsonnet'; 11, 9 ), - HostDetailsGraphPanel( + $.simpleGraphPanel( {}, '$ceph_hosts Disk utilization', 'Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.', - 'connected', 'percent', '%Util', + null, ||| label_replace( ( diff --git a/monitoring/ceph-mixin/dashboards/osd.libsonnet b/monitoring/ceph-mixin/dashboards/osd.libsonnet index 60890108174..a4830ee42e5 100644 --- a/monitoring/ceph-mixin/dashboards/osd.libsonnet +++ b/monitoring/ceph-mixin/dashboards/osd.libsonnet @@ -2,79 +2,6 @@ local g = import 'grafonnet/grafana.libsonnet'; (import 'utils.libsonnet') { 'osds-overview.json': - local OsdOverviewStyle(alias, pattern, type, unit) = - $.addStyle(alias, null, [ - 'rgba(245, 54, 54, 0.9)', - 'rgba(237, 129, 40, 0.89)', - 'rgba(50, 172, 45, 0.97)', - ], 'YYYY-MM-DD HH:mm:ss', 2, 1, pattern, [], type, unit, []); - local OsdOverviewGraphPanel(alias, - title, - description, - formatY1, - labelY1, - min, - expr, - legendFormat1, - x, - y, - w, - h) = - $.graphPanelSchema(alias, - title, - description, - 'null', - false, - formatY1, - 'short', - labelY1, - null, - min, - 1, - '$datasource') - .addTargets( - [$.addTargetSchema(expr, legendFormat1)] - ) + { gridPos: { x: x, y: y, w: w, h: h } }; - local OsdOverviewPieChartPanel(alias, description, title) = - $.addPieChartSchema(alias, - '$datasource', - description, - 'Under graph', - 'pie', - title, - 'current'); - local OsdOverviewSingleStatPanel(colors, - format, - title, - description, - valueName, - colorValue, - gaugeMaxValue, - gaugeShow, - sparkLineShow, - thresholds, - expr, - x, - y, - w, - h) = - $.addSingleStatSchema( - colors, - '$datasource', - format, - title, - description, - valueName, - colorValue, - gaugeMaxValue, - gaugeShow, - sparkLineShow, - thresholds - ) - .addTarget( - $.addTargetSchema(expr) - ) + { gridPos: { x: x, y: y, w: w, h: h } }; - $.dashboardSchema( 'OSD Overview', '', @@ -122,7 +49,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addJobTemplate() ) .addPanels([ - OsdOverviewGraphPanel( + $.simpleGraphPanel( { '@95%ile': '#e0752d' }, 'OSD Read Latencies', '', @@ -171,9 +98,9 @@ local g = import 'grafonnet/grafana.libsonnet'; "This table shows the osd's that are delivering the 10 highest read latencies within the cluster", { col: 2, desc: true }, [ - OsdOverviewStyle('OSD ID', 'ceph_daemon', 'string', 'short'), - OsdOverviewStyle('Latency (ms)', 'Value', 'number', 'none'), - OsdOverviewStyle('', '/.*/', 'hidden', 'short'), + $.overviewStyle('OSD ID', 'ceph_daemon', 'string', 'short'), + $.overviewStyle('Latency (ms)', 'Value', 'number', 'none'), + $.overviewStyle('', '/.*/', 'hidden', 'short'), ], 'Highest READ Latencies', 'table' @@ -197,7 +124,7 @@ local g = import 'grafonnet/grafana.libsonnet'; true ) ) + { gridPos: { x: 8, y: 0, w: 4, h: 8 } }, - OsdOverviewGraphPanel( + $.simpleGraphPanel( { '@95%ile write': '#e0752d', }, @@ -246,11 +173,11 @@ local g = import 'grafonnet/grafana.libsonnet'; "This table shows the osd's that are delivering the 10 highest write latencies within the cluster", { col: 2, desc: true }, [ - OsdOverviewStyle( + $.overviewStyle( 'OSD ID', 'ceph_daemon', 'string', 'short' ), - OsdOverviewStyle('Latency (ms)', 'Value', 'number', 'none'), - OsdOverviewStyle('', '/.*/', 'hidden', 'short'), + $.overviewStyle('Latency (ms)', 'Value', 'number', 'none'), + $.overviewStyle('', '/.*/', 'hidden', 'short'), ], 'Highest WRITE Latencies', 'table' @@ -272,13 +199,13 @@ local g = import 'grafonnet/grafana.libsonnet'; true ) ) + { gridPos: { x: 20, y: 0, w: 4, h: 8 } }, - OsdOverviewPieChartPanel( + $.simplePieChart( {}, '', 'OSD Types Summary' ) .addTarget( $.addTargetSchema('count by (device_class) (ceph_osd_metadata{%(matchers)s})' % $.matchers(), '{{device_class}}') ) + { gridPos: { x: 0, y: 8, w: 4, h: 8 } }, - OsdOverviewPieChartPanel( + $.simplePieChart( { 'Non-Encrypted': '#E5AC0E' }, '', 'OSD Objectstore Types' ) .addTarget( @@ -291,7 +218,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 'absent(ceph_bluefs_wal_total_bytes{%(matchers)s)} * count(ceph_osd_metadata{%(matchers)s})' % $.matchers(), 'filestore', 'time_series', 2 ) ) + { gridPos: { x: 4, y: 8, w: 4, h: 8 } }, - OsdOverviewPieChartPanel( + $.simplePieChart( {}, 'The pie chart shows the various OSD sizes used within the cluster', 'OSD Size Summary' ) .addTarget($.addTargetSchema( @@ -335,8 +262,7 @@ local g = import 'grafonnet/grafana.libsonnet'; .addTarget($.addTargetSchema( 'ceph_osd_numpg{%(matchers)s}' % $.matchers(), 'PGs per OSD', 'time_series', 1, true )) + { gridPos: { x: 12, y: 8, w: 8, h: 8 } }, - OsdOverviewSingleStatPanel( - ['#d44a3a', '#299c46'], + $.gaugeSingleStatPanel( 'percentunit', 'OSD onode Hits Ratio', 'This gauge panel shows onode Hits ratio to help determine if increasing RAM per OSD could help improve the performance of the cluster', @@ -352,6 +278,7 @@ local g = import 'grafonnet/grafana.libsonnet'; sum(ceph_bluestore_onode_misses{%(matchers)s}) ) ||| % $.matchers(), + 'time_series', 20, 8, 4, @@ -360,7 +287,7 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addRowSchema(false, true, 'R/W Profile') + { gridPos: { x: 0, y: 16, w: 24, h: 1 } }, - OsdOverviewGraphPanel( + $.simpleGraphPanel( {}, 'Read/Write Profile', 'Show the read/write workload profile overtime', diff --git a/monitoring/ceph-mixin/dashboards/pool.libsonnet b/monitoring/ceph-mixin/dashboards/pool.libsonnet index 8c6d426a299..0f23920cb38 100644 --- a/monitoring/ceph-mixin/dashboards/pool.libsonnet +++ b/monitoring/ceph-mixin/dashboards/pool.libsonnet @@ -2,80 +2,6 @@ local g = import 'grafonnet/grafana.libsonnet'; (import 'utils.libsonnet') { 'pool-overview.json': - local PoolOverviewSingleStatPanel(format, - title, - description, - valueName, - expr, - instant, - targetFormat, - x, - y, - w, - h) = - $.addSingleStatSchema(['#299c46', 'rgba(237, 129, 40, 0.89)', '#d44a3a'], - '$datasource', - format, - title, - description, - valueName, - false, - 100, - false, - false, - '') - .addTarget($.addTargetSchema(expr, '', targetFormat, 1, instant)) + { gridPos: { x: x, y: y, w: w, h: h } }; - - local PoolOverviewStyle(alias, - pattern, - type, - unit, - colorMode, - thresholds, - valueMaps) = - $.addStyle(alias, - colorMode, - [ - 'rgba(245, 54, 54, 0.9)', - 'rgba(237, 129, 40, 0.89)', - 'rgba(50, 172, 45, 0.97)', - ], - 'YYYY-MM-DD HH:mm:ss', - 2, - 1, - pattern, - thresholds, - type, - unit, - valueMaps); - - local PoolOverviewGraphPanel(title, - description, - formatY1, - labelY1, - expr, - legendFormat, - x, - y, - w, - h) = - $.graphPanelSchema({}, - title, - description, - 'null as zero', - false, - formatY1, - 'short', - labelY1, - null, - 0, - 1, - '$datasource') - .addTargets( - [$.addTargetSchema(expr, - legendFormat)] - ) + { gridPos: { x: x, y: y, w: w, h: h } }; - $.dashboardSchema( 'Ceph Pools Overview', '', @@ -117,7 +43,7 @@ local g = import 'grafonnet/grafana.libsonnet'; query='15') ) .addPanels([ - PoolOverviewSingleStatPanel( + $.simpleSingleStatPanel( 'none', 'Pools', '', @@ -130,7 +56,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 3, 3 ), - PoolOverviewSingleStatPanel( + $.simpleSingleStatPanel( 'none', 'Pools with Compression', 'Count of the pools that have compression enabled', @@ -143,7 +69,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 3, 3 ), - PoolOverviewSingleStatPanel( + $.simpleSingleStatPanel( 'bytes', 'Total Raw Capacity', 'Total raw capacity available to the cluster', @@ -156,7 +82,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 3, 3 ), - PoolOverviewSingleStatPanel( + $.simpleSingleStatPanel( 'bytes', 'Raw Capacity Consumed', 'Total raw capacity consumed by user data and associated overheads (metadata + redundancy)', @@ -169,7 +95,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 3, 3 ), - PoolOverviewSingleStatPanel( + $.simpleSingleStatPanel( 'bytes', 'Logical Stored ', 'Total of client data stored in the cluster', @@ -182,7 +108,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 3, 3 ), - PoolOverviewSingleStatPanel( + $.simpleSingleStatPanel( 'bytes', 'Compression Savings', 'A compression saving is determined as the data eligible to be compressed minus the capacity used to store the data after compression', @@ -200,7 +126,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 3, 3 ), - PoolOverviewSingleStatPanel( + $.simpleSingleStatPanel( 'percent', 'Compression Eligibility', 'Indicates how suitable the data is within the pools that are/have been enabled for compression - averaged across all pools holding compressed data', @@ -218,7 +144,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 3, 3 ), - PoolOverviewSingleStatPanel( + $.simpleSingleStatPanel( 'none', 'Compression Factor', 'This factor describes the average ratio of data eligible to be compressed divided by the data actually stored. It does not account for data written that was ineligible for compression (too small, or compression yield too low)', @@ -241,26 +167,26 @@ local g = import 'grafonnet/grafana.libsonnet'; '', { col: 5, desc: true }, [ - PoolOverviewStyle('', 'Time', 'hidden', 'short', null, [], []), - PoolOverviewStyle('', 'instance', 'hidden', 'short', null, [], []), - PoolOverviewStyle('', 'job', 'hidden', 'short', null, [], []), - PoolOverviewStyle('Pool Name', 'name', 'string', 'short', null, [], []), - PoolOverviewStyle('Pool ID', 'pool_id', 'hidden', 'none', null, [], []), - PoolOverviewStyle('Compression Factor', 'Value #A', 'number', 'none', null, [], []), - PoolOverviewStyle('% Used', 'Value #D', 'number', 'percentunit', 'value', ['70', '85'], []), - PoolOverviewStyle('Usable Free', 'Value #B', 'number', 'bytes', null, [], []), - PoolOverviewStyle('Compression Eligibility', 'Value #C', 'number', 'percent', null, [], []), - PoolOverviewStyle('Compression Savings', 'Value #E', 'number', 'bytes', null, [], []), - PoolOverviewStyle('Growth (5d)', 'Value #F', 'number', 'bytes', 'value', ['0', '0'], []), - PoolOverviewStyle('IOPS', 'Value #G', 'number', 'none', null, [], []), - PoolOverviewStyle('Bandwidth', 'Value #H', 'number', 'Bps', null, [], []), - PoolOverviewStyle('', '__name__', 'hidden', 'short', null, [], []), - PoolOverviewStyle('', 'type', 'hidden', 'short', null, [], []), - PoolOverviewStyle('', 'compression_mode', 'hidden', 'short', null, [], []), - PoolOverviewStyle('Type', 'description', 'string', 'short', null, [], []), - PoolOverviewStyle('Stored', 'Value #J', 'number', 'bytes', null, [], []), - PoolOverviewStyle('', 'Value #I', 'hidden', 'short', null, [], []), - PoolOverviewStyle('Compression', 'Value #K', 'string', 'short', null, [], [{ text: 'ON', value: '1' }]), + $.overviewStyle('', 'Time', 'hidden', 'short'), + $.overviewStyle('', 'instance', 'hidden', 'short'), + $.overviewStyle('', 'job', 'hidden', 'short'), + $.overviewStyle('Pool Name', 'name', 'string', 'short'), + $.overviewStyle('Pool ID', 'pool_id', 'hidden', 'none'), + $.overviewStyle('Compression Factor', 'Value #A', 'number', 'none'), + $.overviewStyle('% Used', 'Value #D', 'number', 'percentunit', 'value', ['70', '85']), + $.overviewStyle('Usable Free', 'Value #B', 'number', 'bytes'), + $.overviewStyle('Compression Eligibility', 'Value #C', 'number', 'percent'), + $.overviewStyle('Compression Savings', 'Value #E', 'number', 'bytes'), + $.overviewStyle('Growth (5d)', 'Value #F', 'number', 'bytes', 'value', ['0', '0']), + $.overviewStyle('IOPS', 'Value #G', 'number', 'none'), + $.overviewStyle('Bandwidth', 'Value #H', 'number', 'Bps'), + $.overviewStyle('', '__name__', 'hidden', 'short'), + $.overviewStyle('', 'type', 'hidden', 'short'), + $.overviewStyle('', 'compression_mode', 'hidden', 'short'), + $.overviewStyle('Type', 'description', 'string', 'short'), + $.overviewStyle('Stored', 'Value #J', 'number', 'bytes'), + $.overviewStyle('', 'Value #I', 'hidden', 'short'), + $.overviewStyle('Compression', 'Value #K', 'string', 'short', null, [], [{ text: 'ON', value: '1' }]), ], 'Pool Overview', 'table' @@ -365,11 +291,13 @@ local g = import 'grafonnet/grafana.libsonnet'; $.addTargetSchema('', 'L', '', '', null), ] ) + { gridPos: { x: 0, y: 3, w: 24, h: 6 } }, - PoolOverviewGraphPanel( + $.simpleGraphPanel( + {}, 'Top $topk Client IOPS by Pool', 'This chart shows the sum of read and write IOPS from all clients by pool', 'short', 'IOPS', + 0, ||| topk($topk, round( @@ -396,11 +324,13 @@ local g = import 'grafonnet/grafana.libsonnet'; '{{name}} - write' ) ), - PoolOverviewGraphPanel( + $.simpleGraphPanel( + {}, 'Top $topk Client Bandwidth by Pool', 'The chart shows the sum of read and write bytes from all clients, by pool', 'Bps', 'Throughput', + 0, ||| topk($topk, ( @@ -415,11 +345,13 @@ local g = import 'grafonnet/grafana.libsonnet'; 12, 8 ), - PoolOverviewGraphPanel( + $.simpleGraphPanel( + {}, 'Pool Capacity Usage (RAW)', 'Historical view of capacity usage, to help identify growth and trends in pool consumption', 'bytes', 'Capacity Used', + 0, 'ceph_pool_bytes_used{%(matchers)s} * on(pool_id) group_right ceph_pool_metadata{%(matchers)s}' % $.matchers(), '{{name}}', 0, @@ -429,61 +361,6 @@ local g = import 'grafonnet/grafana.libsonnet'; ), ]), 'pool-detail.json': - local PoolDetailSingleStatPanel(format, - title, - description, - valueName, - colorValue, - gaugeMaxValue, - gaugeShow, - sparkLineShow, - thresholds, - expr, - targetFormat, - x, - y, - w, - h) = - $.addSingleStatSchema(['#299c46', 'rgba(237, 129, 40, 0.89)', '#d44a3a'], - '$datasource', - format, - title, - description, - valueName, - colorValue, - gaugeMaxValue, - gaugeShow, - sparkLineShow, - thresholds) - .addTarget($.addTargetSchema(expr, '', targetFormat)) + { gridPos: { x: x, y: y, w: w, h: h } }; - - local PoolDetailGraphPanel(alias, - title, - description, - formatY1, - labelY1, - expr, - legendFormat, - x, - y, - w, - h) = - $.graphPanelSchema(alias, - title, - description, - 'null as zero', - false, - formatY1, - 'short', - labelY1, - null, - null, - 1, - '$datasource') - .addTargets( - [$.addTargetSchema(expr, legendFormat)] - ) + { gridPos: { x: x, y: y, w: w, h: h } }; - $.dashboardSchema( 'Ceph Pool Details', '', @@ -538,7 +415,7 @@ local g = import 'grafonnet/grafana.libsonnet'; '') ) .addPanels([ - PoolDetailSingleStatPanel( + $.gaugeSingleStatPanel( 'percentunit', 'Capacity used', '', @@ -558,7 +435,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 7, 7 ), - PoolDetailSingleStatPanel( + $.gaugeSingleStatPanel( 's', 'Time till full', 'Time till pool is full assuming the average fill rate of the last 6 hours', @@ -578,7 +455,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 5, 7 ), - PoolDetailGraphPanel( + $.simpleGraphPanel( { read_op_per_sec: '#3F6833', @@ -588,6 +465,7 @@ local g = import 'grafonnet/grafana.libsonnet'; '', 'ops', 'Objects out(-) / in(+) ', + null, ||| deriv(ceph_pool_objects{%(matchers)s}[1m]) * on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} @@ -598,7 +476,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 12, 7 ), - PoolDetailGraphPanel( + $.simpleGraphPanel( { read_op_per_sec: '#3F6833', write_op_per_sec: '#E5AC0E', @@ -607,6 +485,7 @@ local g = import 'grafonnet/grafana.libsonnet'; '', 'iops', 'Read (-) / Write (+)', + null, ||| rate(ceph_pool_rd{%(matchers)s}[$__rate_interval]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} @@ -627,7 +506,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 'writes' ) ), - PoolDetailGraphPanel( + $.simpleGraphPanel( { read_op_per_sec: '#3F6833', write_op_per_sec: '#E5AC0E', @@ -636,6 +515,7 @@ local g = import 'grafonnet/grafana.libsonnet'; '', 'Bps', 'Read (-) / Write (+)', + null, ||| rate(ceph_pool_rd_bytes{%(matchers)s}[$__rate_interval]) + on(pool_id) group_left(instance, name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} @@ -656,7 +536,7 @@ local g = import 'grafonnet/grafana.libsonnet'; 'writes' ) ), - PoolDetailGraphPanel( + $.simpleGraphPanel( { read_op_per_sec: '#3F6833', write_op_per_sec: '#E5AC0E', @@ -665,6 +545,7 @@ local g = import 'grafonnet/grafana.libsonnet'; '', 'short', 'Objects', + null, ||| ceph_pool_objects{%(matchers)s} * on(pool_id) group_left(instance,name) ceph_pool_metadata{%(matchers)s, name=~"$pool_name"} diff --git a/monitoring/ceph-mixin/dashboards/rbd.libsonnet b/monitoring/ceph-mixin/dashboards/rbd.libsonnet index 0e273ef3d6e..b16a86dd55d 100644 --- a/monitoring/ceph-mixin/dashboards/rbd.libsonnet +++ b/monitoring/ceph-mixin/dashboards/rbd.libsonnet @@ -1,6 +1,5 @@ local g = import 'grafonnet/grafana.libsonnet'; local u = import 'utils.libsonnet'; -local c = (import '../mixin.libsonnet')._config; (import 'utils.libsonnet') { 'rbd-details.json': @@ -125,18 +124,6 @@ local c = (import '../mixin.libsonnet')._config; ), ]), 'rbd-overview.json': - local RgwOverviewStyle(alias, pattern, type, unit) = - $.addStyle(alias, - null, - ['rgba(245, 54, 54, 0.9)', 'rgba(237, 129, 40, 0.89)', 'rgba(50, 172, 45, 0.97)'], - 'YYYY-MM-DD HH:mm:ss', - 2, - 1, - pattern, - [], - type, - unit, - []); local RbdOverviewPanel(title, formatY1, expr1, @@ -266,10 +253,10 @@ local c = (import '../mixin.libsonnet')._config; '', { col: 3, desc: true }, [ - RgwOverviewStyle('Pool', 'pool', 'string', 'short'), - RgwOverviewStyle('Image', 'image', 'string', 'short'), - RgwOverviewStyle('IOPS', 'Value', 'number', 'iops'), - RgwOverviewStyle('', '/.*/', 'hidden', 'short'), + $.overviewStyle('Pool', 'pool', 'string', 'short'), + $.overviewStyle('Image', 'image', 'string', 'short'), + $.overviewStyle('IOPS', 'Value', 'number', 'iops'), + $.overviewStyle('', '/.*/', 'hidden', 'short'), ], 'Highest IOPS', 'table' @@ -297,10 +284,10 @@ local c = (import '../mixin.libsonnet')._config; '', { col: 3, desc: true }, [ - RgwOverviewStyle('Pool', 'pool', 'string', 'short'), - RgwOverviewStyle('Image', 'image', 'string', 'short'), - RgwOverviewStyle('Throughput', 'Value', 'number', 'Bps'), - RgwOverviewStyle('', '/.*/', 'hidden', 'short'), + $.overviewStyle('Pool', 'pool', 'string', 'short'), + $.overviewStyle('Image', 'image', 'string', 'short'), + $.overviewStyle('Throughput', 'Value', 'number', 'Bps'), + $.overviewStyle('', '/.*/', 'hidden', 'short'), ], 'Highest Throughput', 'table' @@ -328,10 +315,10 @@ local c = (import '../mixin.libsonnet')._config; '', { col: 3, desc: true }, [ - RgwOverviewStyle('Pool', 'pool', 'string', 'short'), - RgwOverviewStyle('Image', 'image', 'string', 'short'), - RgwOverviewStyle('Latency', 'Value', 'number', 'ns'), - RgwOverviewStyle('', '/.*/', 'hidden', 'short'), + $.overviewStyle('Pool', 'pool', 'string', 'short'), + $.overviewStyle('Image', 'image', 'string', 'short'), + $.overviewStyle('Latency', 'Value', 'number', 'ns'), + $.overviewStyle('', '/.*/', 'hidden', 'short'), ], 'Highest Latency', 'table' diff --git a/monitoring/ceph-mixin/dashboards/rgw.libsonnet b/monitoring/ceph-mixin/dashboards/rgw.libsonnet index 4b5bc6fea87..ef7095c04f4 100644 --- a/monitoring/ceph-mixin/dashboards/rgw.libsonnet +++ b/monitoring/ceph-mixin/dashboards/rgw.libsonnet @@ -1,6 +1,5 @@ local g = import 'grafonnet/grafana.libsonnet'; local u = import 'utils.libsonnet'; -local c = (import '../mixin.libsonnet')._config; (import 'utils.libsonnet') { 'radosgw-sync-overview.json': @@ -832,14 +831,14 @@ local c = (import '../mixin.libsonnet')._config; ), ] ), - $.addPieChartSchema( + $.simplePieChart( { GETs: '#7eb26d', 'Other (HEAD,POST,DELETE)': '#447ebc', PUTs: '#eab839', Requests: '#3f2b5b', Failures: '#bf1b00', - }, '$datasource', '', 'Under graph', 'pie', 'Workload Breakdown', 'current' + }, '', 'Workload Breakdown' ) .addTarget($.addTargetSchema( ||| diff --git a/monitoring/ceph-mixin/dashboards/utils.libsonnet b/monitoring/ceph-mixin/dashboards/utils.libsonnet index f9468cce745..be9d4ce2b21 100644 --- a/monitoring/ceph-mixin/dashboards/utils.libsonnet +++ b/monitoring/ceph-mixin/dashboards/utils.libsonnet @@ -218,4 +218,118 @@ local g = import 'grafonnet/grafana.libsonnet'; multi=true, allValues='.+', ), + + overviewStyle(alias, + pattern, + type, + unit, + colorMode=null, + thresholds=[], + valueMaps=[]):: + $.addStyle(alias, + colorMode, + [ + 'rgba(245, 54, 54, 0.9)', + 'rgba(237, 129, 40, 0.89)', + 'rgba(50, 172, 45, 0.97)', + ], + 'YYYY-MM-DD HH:mm:ss', + 2, + 1, + pattern, + thresholds, + type, + unit, + valueMaps), + + simpleGraphPanel(alias, + title, + description, + formatY1, + labelY1, + min, + expr, + legendFormat, + x, + y, + w, + h):: + $.graphPanelSchema(alias, + title, + description, + 'null', + false, + formatY1, + 'short', + labelY1, + null, + min, + 1, + '$datasource') + .addTargets( + [$.addTargetSchema(expr, legendFormat)] + ) + { gridPos: { x: x, y: y, w: w, h: h } }, + + simpleSingleStatPanel(format, + title, + description, + valueName, + expr, + instant, + targetFormat, + x, + y, + w, + h):: + $.addSingleStatSchema(['#299c46', 'rgba(237, 129, 40, 0.89)', '#d44a3a'], + '$datasource', + format, + title, + description, + valueName, + false, + 100, + false, + false, + '') + .addTarget($.addTargetSchema(expr, '', targetFormat, 1, instant)) + { + gridPos: { x: x, y: y, w: w, h: h }, + }, + gaugeSingleStatPanel(format, + title, + description, + valueName, + colorValue, + gaugeMaxValue, + gaugeShow, + sparkLineShow, + thresholds, + expr, + targetFormat, + x, + y, + w, + h):: + $.addSingleStatSchema(['#299c46', 'rgba(237, 129, 40, 0.89)', '#d44a3a'], + '$datasource', + format, + title, + description, + valueName, + colorValue, + gaugeMaxValue, + gaugeShow, + sparkLineShow, + thresholds) + .addTarget($.addTargetSchema(expr, '', targetFormat)) + { gridPos: { x: + x, y: y, w: w, h: h } }, + + simplePieChart(alias, description, title):: + $.addPieChartSchema(alias, + '$datasource', + description, + 'Under graph', + 'pie', + title, + 'current'), } diff --git a/monitoring/ceph-mixin/dashboards_out/host-details.json b/monitoring/ceph-mixin/dashboards_out/host-details.json index 4a972e772b6..cff3ffcd650 100644 --- a/monitoring/ceph-mixin/dashboards_out/host-details.json +++ b/monitoring/ceph-mixin/dashboards_out/host-details.json @@ -783,7 +783,7 @@ "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "connected", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -881,7 +881,7 @@ "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "connected", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -979,7 +979,7 @@ "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -1065,7 +1065,7 @@ "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "connected", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, diff --git a/monitoring/ceph-mixin/dashboards_out/osds-overview.json b/monitoring/ceph-mixin/dashboards_out/osds-overview.json index 80d955de998..8ec8c36cce8 100644 --- a/monitoring/ceph-mixin/dashboards_out/osds-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/osds-overview.json @@ -672,8 +672,9 @@ "colorBackground": false, "colorValue": true, "colors": [ - "#d44a3a", - "#299c46" + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" ], "datasource": "$datasource", "description": "This gauge panel shows onode Hits ratio to help determine if increasing RAM per OSD could help improve the performance of the cluster", diff --git a/monitoring/ceph-mixin/dashboards_out/pool-detail.json b/monitoring/ceph-mixin/dashboards_out/pool-detail.json index 131604ec867..4810e4c35d6 100644 --- a/monitoring/ceph-mixin/dashboards_out/pool-detail.json +++ b/monitoring/ceph-mixin/dashboards_out/pool-detail.json @@ -240,7 +240,7 @@ "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -329,7 +329,7 @@ "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -430,7 +430,7 @@ "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -531,7 +531,7 @@ "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, diff --git a/monitoring/ceph-mixin/dashboards_out/pool-overview.json b/monitoring/ceph-mixin/dashboards_out/pool-overview.json index 952f7d0bd4d..b4a11645632 100644 --- a/monitoring/ceph-mixin/dashboards_out/pool-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/pool-overview.json @@ -1185,7 +1185,7 @@ "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -1278,7 +1278,7 @@ "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, @@ -1364,7 +1364,7 @@ "lines": true, "linewidth": 1, "links": [ ], - "nullPointMode": "null as zero", + "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false,