+ "expr": "sum by (instance) (\n ceph_disk_occupation{instance=~\"($osd_servers)\"}*0 + on (ceph_daemon) group_right(instance) ceph_osd_stat_bytes\n)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "A"
+ },
+ {
+ "expr": "count by (instance) (\n ceph_disk_occupation{instance=~\"($osd_servers)\"}\n)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
+ "refId": "B"
}
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Disk/OSD Host Summary",
- "titleSize": "h5"
- },
+ ],
+ "title": "OSD Host Capacity Summary",
+ "transform": "table",
+ "type": "table"
+ },
{
- "collapse": false,
- "height": "300",
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 8
+ },
+ "id": 14,
"panels": [
{
"cards": {
- "cardPadding": null,
+ "cardPadding": null,
"cardRound": null
- },
+ },
"color": {
- "cardColor": "#b4ff00",
- "colorScale": "sqrt",
- "colorScheme": "interpolateRdYlGn",
- "exponent": 0.5,
+ "cardColor": "#b4ff00",
+ "colorScale": "sqrt",
+ "colorScheme": "interpolateRdYlGn",
+ "exponent": 0.5,
"mode": "spectrum"
- },
- "dataFormat": "timeseries",
- "datasource": null,
- "description": "The heatmap categorizes disk utilization into discrete buckets (e.g util 0-5) and shows the frequency of the number of disks that fall within that range as a color. The color chosen depends on the number of disks in the 'bucket', ranging from green (low) to red (high). Hover over a colored block to show the count of disk utilization observations at that point.",
- "heatmap": {},
- "highlightCards": true,
- "id": 5,
- "links": [],
- "minSpan": 6,
- "span": 6,
+ },
+ "dataFormat": "timeseries",
+ "datasource": null,
+ "description": "The heatmap categorizes disk utilization into discrete buckets (e.g util 0-5) and shows the frequency of the number of disks that fall within that range as a color. The color chosen depends on the number of disks in the 'bucket', ranging from green (low) to red (high). Hover over a colored block to show the number of disks at a given util% for that time interval (20secs).",
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 9
+ },
+ "heatmap": {},
+ "highlightCards": true,
+ "id": 5,
+ "legend": {
+ "show": false
+ },
+ "links": [],
+ "minSpan": 12,
"targets": [
{
- "expr": "irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"} / 10",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A",
- "step": 10,
+ "expr": "irate(node_disk_io_time_ms{instance=~\"[[osd_servers]]\"}[1m]) / 10 and on (instance, device) ceph_disk_occupation",
- "expr": "quantile($percentile/100, (\n max by (device) (\n irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) / 10\n))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "disk busy %",
- "refId": "A",
- "step": 10,
+ "expr": "quantile($percentile/100, (\n max by (device) (\n irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) / 10\n))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "disk busy %",
+ "refId": "A",
+ "step": 10,
"textEditor": true
}
- ],
+ ],
"thresholds": [
{
- "colorMode": "custom",
- "fill": false,
- "line": true,
- "lineColor": "rgba(178, 0, 0, 0.29)",
- "op": "gt",
+ "colorMode": "custom",
+ "fill": false,
+ "line": true,
+ "lineColor": "rgba(178, 0, 0, 0.29)",
+ "op": "gt",
"value": 80
}
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Disk Utilization - $osd_servers OSDs at $percentile%ile",
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk Utilization - $osd_servers OSDs at $percentile%ile",
"tooltip": {
- "shared": true,
- "sort": 0,
+ "shared": true,
+ "sort": 0,
"value_type": "individual"
- },
- "type": "graph",
+ },
+ "type": "graph",
"xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
"values": []
- },
+ },
"yaxes": [
{
- "format": "short",
- "label": "",
- "logBase": 1,
- "max": "100",
- "min": "0",
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": "100",
+ "min": "0",
"show": true
- },
+ },
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
"show": false
}
]
- },
+ },
{
"aliasColors": {
"IOPS/spindle": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": null,
- "fill": 1,
- "id": 9,
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 0,
+ "y": 25
+ },
+ "id": 9,
"legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
"values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "minSpan": 6,
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "minSpan": 12,
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "quantile($percentile/100.0, (\n avg by (device) (\n irate(node_disk_reads_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) +\n avg by (device) (\n irate(node_disk_writes_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n )\n))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "IOPS/spindle",
- "refId": "A",
- "step": 10,
+ "expr": "quantile($percentile/100.0, (\n avg by (device) (\n irate(node_disk_reads_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) +\n avg by (device) (\n irate(node_disk_writes_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n )\n))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "IOPS/spindle",
+ "refId": "A",
+ "step": 10,
"textEditor": true
}
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeShift": null,
- "title": "IOPS per Disk @ $percentile%ile - $osd_servers OSDs",
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "IOPS per Disk @ $percentile%ile - $osd_servers OSDs",
"tooltip": {
- "shared": true,
- "sort": 0,
+ "shared": true,
+ "sort": 0,
"value_type": "individual"
- },
- "type": "graph",
+ },
+ "type": "graph",
"xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
"values": []
- },
+ },
"yaxes": [
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
"show": true
- },
+ },
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
"show": true
}
]
- },
+ },
{
"aliasColors": {
"IOPS": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": null,
- "fill": 1,
- "id": 10,
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
+ "gridPos": {
+ "h": 8,
+ "w": 12,
+ "x": 12,
+ "y": 25
+ },
+ "id": 10,
"legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
"values": false
- },
- "lines": true,
- "linewidth": 1,
+ },
+ "lines": true,
+ "linewidth": 1,
"links": [
{
- "dashUri": "db/iops-by-server",
- "dashboard": "IOPS by Server",
- "includeVars": true,
- "keepTime": true,
- "targetBlank": true,
- "title": "IOPS by Server",
+ "dashUri": "db/iops-by-server",
+ "dashboard": "IOPS by Server",
+ "includeVars": true,
+ "keepTime": true,
+ "targetBlank": true,
+ "title": "IOPS by Server",
"type": "dashboard"
}
- ],
- "minSpan": 6,
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
+ ],
+ "minSpan": 12,
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "quantile($percentile/100.0, (\n sum(\n irate(node_disk_reads_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) +\n sum(\n irate(node_disk_writes_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n )\n))",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A",
- "step": 10,
+ "expr": "quantile($percentile/100.0, (\n sum(\n irate(node_disk_reads_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) +\n sum(\n irate(node_disk_writes_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n )\n))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 10,
"textEditor": true
}
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeShift": null,
- "title": "Total Disk IOPS - $osd_servers OSDs",
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Total Disk IOPS - $osd_servers OSDs",
"tooltip": {
- "shared": true,
- "sort": 0,
+ "shared": true,
+ "sort": 0,
"value_type": "individual"
- },
- "type": "graph",
+ },
+ "type": "graph",
"xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
"values": []
- },
+ },
"yaxes": [
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
"show": true
- },
+ },
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
"show": false
}
]
}
- ],
- "repeat": null,
- "repeatIteration": null,
- "repeatRowId": null,
- "showTitle": true,
- "title": "Disk/OSD Load Summary",
- "titleSize": "h5"
- },
+ ],
+ "repeat": null,
+ "title": "Disk/OSD Load Summary",
+ "type": "row"
+ },
{
- "collapse": false,
- "height": 250,
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 9
+ },
+ "id": 15,
"panels": [
{
"aliasColors": {
- "CPU Busy": "#447EBC",
- "CPU Busy @ 95%ile": "#890F02",
- "Cluster-wide CPU Busy @ 95%ile": "#890F02",
- "Max CPU Busy": "#BF1B00",
+ "CPU Busy": "#447EBC",
+ "CPU Busy @ 95%ile": "#890F02",
+ "Cluster-wide CPU Busy @ 95%ile": "#890F02",
+ "Max CPU Busy": "#BF1B00",
"Max CPU Busy - all OSD Hosts": "#BF1B00"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": null,
- "fill": 3,
- "id": 11,
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 3,
+ "gridPos": {
+ "h": 7,
+ "w": 12,
+ "x": 0,
+ "y": 10
+ },
+ "id": 11,
"legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
"values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "minSpan": 6,
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "minSpan": 12,
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
"seriesOverrides": [
{
- "alias": "Cluster-wide CPU Busy @ 95%ile",
+ "alias": "Cluster-wide CPU Busy @ 95%ile",
"fill": 0
}
- ],
- "spaceLength": 10,
- "span": 6,
- "stack": false,
- "steppedLine": false,
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "quantile($percentile / 100.0, (\n sum by (instance) (\n irate(node_cpu{mode=~\"(irq|nice|system|user|iowait)\"}[5m])\n ) / sum by (instance) (\n irate(node_cpu[5m])\n )\n) * 100)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Cluster-wide CPU Busy @ $percentile%ile",
- "refId": "A",
- "step": 10,
+ "expr": "quantile($percentile / 100.0, (\n sum by (instance) (\n irate(node_cpu{mode=~\"(irq|nice|system|user)\"}[5m])\n ) / sum by (instance) (\n irate(node_cpu[5m])\n )\n) * 100)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Cluster-wide CPU Busy @ $percentile%ile",
+ "refId": "A",
+ "step": 10,
"textEditor": true
- },
+ },
{
- "expr": "avg(\n sum by (instance) (\n irate(node_cpu{instance=~\"($osd_servers)\", mode=~\"(irq|nice|system|user|iowait)\"}[5m])\n ) / sum by (instance) (\n irate(node_cpu{instance=~\"($osd_servers)\"}[5m])\n )\n) * 100",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 2,
- "legendFormat": "Average OSD Host(s) CPU Busy",
- "refId": "B",
- "step": 10,
+ "expr": "avg(\n sum by (instance) (\n irate(node_cpu{instance=~\"($osd_servers)\", mode=~\"(irq|nice|system|user)\"}[5m])\n ) / sum by (instance) (\n irate(node_cpu{instance=~\"($osd_servers)\"}[5m])\n )\n) * 100",
- "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.",
- "format": "decbytes",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "height": "160",
- "id": 2,
- "interval": null,
- "links": [],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": "",
- "minSpan": 2,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "span": 2,
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
- "show": false
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{device=~\"($device_id)\", instance=~\"($osd_servers)\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A",
- "step": 40,
- "textEditor": true
- }
- ],
- "thresholds": "",
- "title": "Raw Capacity",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "columns": [],
- "datasource": null,
- "description": "",
- "fontSize": "100%",
- "height": "160",
- "hideTimeOverride": false,
- "id": 3,
- "links": [],
- "minSpan": 3,
- "pageSize": 1000,
- "scroll": true,
- "showHeader": true,
- "sort": {
- "col": 0,
- "desc": false
- },
- "span": 3,
- "styles": [
- {
- "alias": "Host | Device | OSD ID",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "Metric",
- "sanitize": false,
- "thresholds": [],
- "type": "string",
- "unit": "short"
- },
- {
- "alias": "",
- "colorMode": null,
- "colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
- "rgba(50, 172, 45, 0.97)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "/.*/",
- "thresholds": [],
- "type": "hidden",
- "unit": "short"
- }
- ],
- "targets": [
- {
- "expr": "max(ceph_disk_occupation{instance=~\"($osd_servers)\"}) by (instance, device, ceph_daemon)",
- "expr": "max by (device) (\n irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\", device=~\"($device_id)\"}\n) / 10",
- "expr": "sum by (device) (irate(node_network_receive_bytes{instance=~\"($osd_servers)\", device=~\"(eth|en|bond).*\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{device}}.rx",
- "refId": "A",
- "step": 10,
- "textEditor": true
- },
- {
- "expr": "sum by (device) (irate(node_network_transmit_bytes{instance=~\"($osd_servers)\", device=~\"(eth|en|bond).*\"}[5m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "{{device}}.tx",
- "refId": "B",
- "step": 10
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeShift": null,
- "title": "$osd_servers Network Load",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "format": "bytes",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": false
- }
- ]
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": null,
+ "decimals": 0,
+ "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.",
+ "expr": "max by (device) (\n irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers).*\", device=~\"($device_id)\"}\n) / 10",