The presence of uid in the dashboard definition is causing
compatibility issues with moving to Grafana v5.1. By removing the
uid entry, the dashboards still work and can be migrated to 5.1
- "Ceph Health (0:OK, 4:Warning,8:Error)": "#DEDAF7",
+ "Ceph Health": "#890F02",
+ "Ceph Health (0:OK, 4:Warning,8:Error)": "#DEDAF7",
"ceph health": "#890F02"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": null,
- "description": "The chart plots the clusters health, over time. Health is depicted as a integer; 0, 1 or 2 where 0 is OK, 1 is WARN and 2 represents an ERROR state.",
- "fill": 1,
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "description": "The chart plots the clusters health, over time. Health is depicted as a integer; 0, 1 or 2 where 0 is OK, 1 is WARN and 2 represents an ERROR state.",
- "description": "This panel indicate whether scrub/deep scrub is running within the cluster. NB. If either of these features are turned off, the cluster will enter a WARN state. Click on the panel or the link below to look at cluster information in more detail",
- "format": "none",
+ ],
+ "datasource": null,
+ "description": "This panel indicate whether scrub/deep scrub is running within the cluster. NB. If either of these features are turned off, the cluster will enter a WARN state. Click on the panel or the link below to look at cluster information in more detail",
- "expr": "sum by (instance) (\n ceph_disk_occupation{instance=~\"($osd_servers)\"}*0 + on (ceph_daemon) group_right(instance) ceph_osd_stat_bytes\n)",
- "format": "table",
- "instant": true,
- "intervalFactor": 1,
+ "expr": "sum by (instance) (\n ceph_disk_occupation{instance=~\"($osd_servers)\"}*0 + on (ceph_daemon) group_right(instance) ceph_osd_stat_bytes\n)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
"refId": "A"
- },
+ },
{
- "expr": "count by (instance) (\n ceph_disk_occupation{instance=~\"($osd_servers)\"}\n)",
- "format": "table",
- "instant": true,
- "intervalFactor": 1,
+ "expr": "count by (instance) (\n ceph_disk_occupation{instance=~\"($osd_servers)\"}\n)",
+ "format": "table",
+ "instant": true,
+ "intervalFactor": 1,
"refId": "B"
}
- ],
- "title": "OSD Host Capacity Summary",
- "transform": "table",
+ ],
+ "title": "OSD Host Capacity Summary",
+ "transform": "table",
"type": "table"
- },
+ },
{
- "collapsed": true,
+ "collapsed": true,
"gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
+ "h": 1,
+ "w": 24,
+ "x": 0,
"y": 8
- },
- "id": 14,
+ },
+ "id": 14,
"panels": [
{
"cards": {
- "cardPadding": null,
+ "cardPadding": null,
"cardRound": null
- },
+ },
"color": {
- "cardColor": "#b4ff00",
- "colorScale": "sqrt",
- "colorScheme": "interpolateRdYlGn",
- "exponent": 0.5,
+ "cardColor": "#b4ff00",
+ "colorScale": "sqrt",
+ "colorScheme": "interpolateRdYlGn",
+ "exponent": 0.5,
"mode": "spectrum"
- },
- "dataFormat": "timeseries",
- "datasource": null,
- "description": "The heatmap categorizes disk utilization into discrete buckets (e.g util 0-5) and shows the frequency of the number of disks that fall within that range as a color. The color chosen depends on the number of disks in the 'bucket', ranging from green (low) to red (high). Hover over a colored block to show the number of disks at a given util% for that time interval (20secs).",
+ },
+ "dataFormat": "timeseries",
+ "datasource": null,
+ "description": "The heatmap categorizes disk utilization into discrete buckets (e.g util 0-5) and shows the frequency of the number of disks that fall within that range as a color. The color chosen depends on the number of disks in the 'bucket', ranging from green (low) to red (high). Hover over a colored block to show the number of disks at a given util% for that time interval (20secs).",
"gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
+ "h": 8,
+ "w": 12,
+ "x": 0,
"y": 9
- },
- "heatmap": {},
- "highlightCards": true,
- "id": 5,
+ },
+ "heatmap": {},
+ "highlightCards": true,
+ "id": 5,
"legend": {
"show": false
- },
- "links": [],
- "minSpan": 12,
+ },
+ "links": [],
+ "minSpan": 12,
"targets": [
{
- "expr": "irate(node_disk_io_time_ms{instance=~\"[[osd_servers]]\"}[1m]) / 10 and on (instance, device) ceph_disk_occupation",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A",
- "step": 10,
+ "expr": "irate(node_disk_io_time_ms{instance=~\"[[osd_servers]]\"}[1m]) / 10 and on (instance, device) ceph_disk_occupation",
- "expr": "quantile($percentile/100, (\n max by (device) (\n irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) / 10\n))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "disk busy %",
- "refId": "A",
- "step": 10,
+ "expr": "quantile($percentile/100, (\n max by (device) (\n irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) / 10\n))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "disk busy %",
+ "refId": "A",
+ "step": 10,
"textEditor": true
}
- ],
+ ],
"thresholds": [
{
- "colorMode": "custom",
- "fill": false,
- "line": true,
- "lineColor": "rgba(178, 0, 0, 0.29)",
- "op": "gt",
+ "colorMode": "custom",
+ "fill": false,
+ "line": true,
+ "lineColor": "rgba(178, 0, 0, 0.29)",
+ "op": "gt",
"value": 80
}
- ],
- "timeFrom": null,
- "timeShift": null,
- "title": "Disk Utilization - $osd_servers OSDs at $percentile%ile",
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk Utilization - $osd_servers OSDs at $percentile%ile",
"tooltip": {
- "shared": true,
- "sort": 0,
+ "shared": true,
+ "sort": 0,
"value_type": "individual"
- },
- "type": "graph",
+ },
+ "type": "graph",
"xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
"values": []
- },
+ },
"yaxes": [
{
- "format": "short",
- "label": "",
- "logBase": 1,
- "max": "100",
- "min": "0",
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": "100",
+ "min": "0",
"show": true
- },
+ },
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
"show": false
}
]
- },
+ },
{
"aliasColors": {
"IOPS/spindle": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": null,
- "fill": 1,
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
"gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
+ "h": 8,
+ "w": 12,
+ "x": 0,
"y": 25
- },
- "id": 9,
+ },
+ "id": 9,
"legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
"values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "minSpan": 12,
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "minSpan": 12,
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "quantile($percentile/100.0, (\n avg by (device) (\n irate(node_disk_reads_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) +\n avg by (device) (\n irate(node_disk_writes_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n )\n))",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "IOPS/spindle",
- "refId": "A",
- "step": 10,
+ "expr": "quantile($percentile/100.0, (\n avg by (device) (\n irate(node_disk_reads_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) +\n avg by (device) (\n irate(node_disk_writes_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n )\n))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "IOPS/spindle",
+ "refId": "A",
+ "step": 10,
"textEditor": true
}
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeShift": null,
- "title": "IOPS per Disk @ $percentile%ile - $osd_servers OSDs",
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "IOPS per Disk @ $percentile%ile - $osd_servers OSDs",
"tooltip": {
- "shared": true,
- "sort": 0,
+ "shared": true,
+ "sort": 0,
"value_type": "individual"
- },
- "type": "graph",
+ },
+ "type": "graph",
"xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
"values": []
- },
+ },
"yaxes": [
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
"show": true
- },
+ },
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
"show": true
}
]
- },
+ },
{
"aliasColors": {
"IOPS": "#3F6833"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": null,
- "fill": 1,
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 1,
"gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
+ "h": 8,
+ "w": 12,
+ "x": 12,
"y": 25
- },
- "id": 10,
+ },
+ "id": 10,
"legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": false,
- "total": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
"values": false
- },
- "lines": true,
- "linewidth": 1,
+ },
+ "lines": true,
+ "linewidth": 1,
"links": [
{
- "dashUri": "db/iops-by-server",
- "dashboard": "IOPS by Server",
- "includeVars": true,
- "keepTime": true,
- "targetBlank": true,
- "title": "IOPS by Server",
+ "dashUri": "db/iops-by-server",
+ "dashboard": "IOPS by Server",
+ "includeVars": true,
+ "keepTime": true,
+ "targetBlank": true,
+ "title": "IOPS by Server",
"type": "dashboard"
}
- ],
- "minSpan": 12,
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
+ ],
+ "minSpan": 12,
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "quantile($percentile/100.0, (\n sum(\n irate(node_disk_reads_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) +\n sum(\n irate(node_disk_writes_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n )\n))",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A",
- "step": 10,
+ "expr": "quantile($percentile/100.0, (\n sum(\n irate(node_disk_reads_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) +\n sum(\n irate(node_disk_writes_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n )\n))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 10,
"textEditor": true
}
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeShift": null,
- "title": "Total Disk IOPS - $osd_servers OSDs",
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Total Disk IOPS - $osd_servers OSDs",
"tooltip": {
- "shared": true,
- "sort": 0,
+ "shared": true,
+ "sort": 0,
"value_type": "individual"
- },
- "type": "graph",
+ },
+ "type": "graph",
"xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
"values": []
- },
+ },
"yaxes": [
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
"show": true
- },
+ },
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
"show": false
}
]
}
- ],
- "repeat": null,
- "title": "Disk/OSD Load Summary",
+ ],
+ "repeat": null,
+ "title": "Disk/OSD Load Summary",
"type": "row"
- },
+ },
{
- "collapsed": true,
+ "collapsed": true,
"gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
+ "h": 1,
+ "w": 24,
+ "x": 0,
"y": 9
- },
- "id": 15,
+ },
+ "id": 15,
"panels": [
{
"aliasColors": {
- "CPU Busy": "#447EBC",
- "CPU Busy @ 95%ile": "#890F02",
- "Cluster-wide CPU Busy @ 95%ile": "#890F02",
- "Max CPU Busy": "#BF1B00",
+ "CPU Busy": "#447EBC",
+ "CPU Busy @ 95%ile": "#890F02",
+ "Cluster-wide CPU Busy @ 95%ile": "#890F02",
+ "Max CPU Busy": "#BF1B00",
"Max CPU Busy - all OSD Hosts": "#BF1B00"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": null,
- "fill": 3,
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "fill": 3,
"gridPos": {
- "h": 7,
- "w": 12,
- "x": 0,
+ "h": 7,
+ "w": 12,
+ "x": 0,
"y": 10
- },
- "id": 11,
+ },
+ "id": 11,
"legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
"values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "minSpan": 12,
- "nullPointMode": "null as zero",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "minSpan": 12,
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
"seriesOverrides": [
{
- "alias": "Cluster-wide CPU Busy @ 95%ile",
+ "alias": "Cluster-wide CPU Busy @ 95%ile",
"fill": 0
}
- ],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
+ ],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "quantile($percentile / 100.0, (\n sum by (instance) (\n irate(node_cpu{mode=~\"(irq|nice|system|user)\"}[5m])\n ) / sum by (instance) (\n irate(node_cpu[5m])\n )\n) * 100)",
- "format": "time_series",
- "intervalFactor": 2,
- "legendFormat": "Cluster-wide CPU Busy @ $percentile%ile",
- "refId": "A",
- "step": 10,
+ "expr": "quantile($percentile / 100.0, (\n sum by (instance) (\n irate(node_cpu{mode=~\"(irq|nice|system|user)\"}[5m])\n ) / sum by (instance) (\n irate(node_cpu[5m])\n )\n) * 100)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Cluster-wide CPU Busy @ $percentile%ile",
+ "refId": "A",
+ "step": 10,
"textEditor": true
- },
+ },
{
- "expr": "avg(\n sum by (instance) (\n irate(node_cpu{instance=~\"($osd_servers)\", mode=~\"(irq|nice|system|user)\"}[5m])\n ) / sum by (instance) (\n irate(node_cpu{instance=~\"($osd_servers)\"}[5m])\n )\n) * 100",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 2,
- "legendFormat": "Average OSD Host(s) CPU Busy",
- "refId": "B",
- "step": 10,
+ "expr": "avg(\n sum by (instance) (\n irate(node_cpu{instance=~\"($osd_servers)\", mode=~\"(irq|nice|system|user)\"}[5m])\n ) / sum by (instance) (\n irate(node_cpu{instance=~\"($osd_servers)\"}[5m])\n )\n) * 100",
- "description": "Scrub activity takes place daily basis across the OSD's and performs object size and attribute checks. Scrub activity can be controlled with the \"ceph osd scrub\" command.",
- "format": "none",
+ ],
+ "datasource": null,
+ "description": "Scrub activity takes place daily basis across the OSD's and performs object size and attribute checks. Scrub activity can be controlled with the \"ceph osd scrub\" command.",
- "description": "Deep scrub acts on the objects within placement groups (PGs). Objects are read, and checksum's compared to protect against silent bit-rot events. Although a weekly activity, in large clusters is normal to see deep-scrub active on a daily basis. Scrub activity can be controlled with the \"ceph osd scrub\" command.",
- "format": "none",
+ ],
+ "datasource": null,
+ "description": "Deep scrub acts on the objects within placement groups (PGs). Objects are read, and checksum's compared to protect against silent bit-rot events. Although a weekly activity, in large clusters is normal to see deep-scrub active on a daily basis. Scrub activity can be controlled with the \"ceph osd scrub\" command.",
- "description": "With the recovery flag enabled, disruption in the cluster will result in data being recreated to freespace within the cluster to honor the pool's protection schema. Under some circumstances, you may use the norecover flag to prevent automatic recovery.",
- "format": "none",
+ ],
+ "datasource": null,
+ "description": "With the recovery flag enabled, disruption in the cluster will result in data being recreated to freespace within the cluster to honor the pool's protection schema. Under some circumstances, you may use the norecover flag to prevent automatic recovery.",
- "description": "The OUT flag setting allows the mon's to mark OSD's as out of the configuration when they stop sending hearbeats to the mon's. By marking them OUT, recovery takes place. However, for planned maintenance you can set the cluster to noout to disable this behavior.",
- "format": "none",
+ ],
+ "datasource": null,
+ "description": "The OUT flag setting allows the mon's to mark OSD's as out of the configuration when they stop sending hearbeats to the mon's. By marking them OUT, recovery takes place. However, for planned maintenance you can set the cluster to noout to disable this behavior.",
+ "format": "none",
"gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
"thresholdMarkers": true
- },
+ },
"gridPos": {
- "h": 3,
- "w": 2,
- "x": 16,
+ "h": 3,
+ "w": 2,
+ "x": 16,
"y": 5
- },
- "height": "95",
- "hideTimeOverride": true,
- "id": 17,
- "interval": null,
- "links": [],
- "mappingType": 1,
+ },
+ "height": "95",
+ "hideTimeOverride": true,
+ "id": 17,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
"mappingTypes": [
{
- "name": "value to text",
+ "name": "value to text",
"value": 1
- },
+ },
{
- "name": "range to text",
+ "name": "range to text",
"value": 2
}
- ],
- "maxDataPoints": "",
- "minSpan": 2,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
+ ],
+ "maxDataPoints": "",
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
"rangeMaps": [
{
- "from": "null",
- "text": "N/A",
+ "from": "null",
+ "text": "N/A",
"to": "null"
}
- ],
+ ],
"sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
"show": false
- },
- "tableColumn": "",
+ },
+ "tableColumn": "",
"targets": [
{
- "expr": "2*scalar(ceph_osd_flag_noout)",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A",
- "step": 2,
+ "expr": "2*scalar(ceph_osd_flag_noout)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2,
"textEditor": true
}
- ],
- "thresholds": "1,2",
- "timeFrom": "1m",
- "title": "OUT",
- "type": "singlestat",
- "valueFontSize": "40%",
+ ],
+ "thresholds": "1,2",
+ "timeFrom": "1m",
+ "title": "OUT",
+ "type": "singlestat",
+ "valueFontSize": "40%",
"valueMaps": [
{
- "op": "=",
- "text": "N/A",
+ "op": "=",
+ "text": "N/A",
"value": "null"
- },
+ },
{
- "op": "=",
- "text": "ENABLED",
+ "op": "=",
+ "text": "ENABLED",
"value": "0"
- },
+ },
{
- "op": "=",
- "text": "ACTIVE",
+ "op": "=",
+ "text": "ACTIVE",
"value": "1"
- },
+ },
{
- "op": "=",
- "text": "DISABLED",
+ "op": "=",
+ "text": "DISABLED",
"value": "2"
- },
+ },
{
- "op": "=",
- "text": "DISABLED",
+ "op": "=",
+ "text": "DISABLED",
"value": "3"
}
- ],
+ ],
"valueName": "current"
- },
+ },
{
- "cacheTimeout": null,
- "colorBackground": true,
- "colorValue": false,
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorValue": false,
"colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
- ],
- "datasource": null,
- "description": "The down feature allows OSD's to mark their peers as DOWN when they are not reachable. However, if there is a poor network or planned outages, you may want to set this flag to nodown to prevent OSD's flapping between up/down states",
- "format": "none",
+ ],
+ "datasource": null,
+ "description": "The down feature allows OSD's to mark their peers as DOWN when they are not reachable. However, if there is a poor network or planned outages, you may want to set this flag to nodown to prevent OSD's flapping between up/down states",
- "description": "Shows the overall health of the ceph cluster.",
- "format": "none",
+ ],
+ "datasource": null,
+ "description": "Shows the overall health of the ceph cluster.",
+ "format": "none",
"gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
"thresholdMarkers": true
- },
+ },
"gridPos": {
- "h": 3,
- "w": 24,
- "x": 0,
+ "h": 3,
+ "w": 24,
+ "x": 0,
"y": 0
- },
- "height": "70",
- "hideTimeOverride": true,
- "id": 1,
- "interval": null,
- "links": [],
- "mappingType": 1,
+ },
+ "height": "70",
+ "hideTimeOverride": true,
+ "id": 1,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
"mappingTypes": [
{
- "name": "value to text",
+ "name": "value to text",
"value": 1
- },
+ },
{
- "name": "range to text",
+ "name": "range to text",
"value": 2
}
- ],
- "maxDataPoints": "",
- "minSpan": 2,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
+ ],
+ "maxDataPoints": "",
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
"rangeMaps": [
{
- "from": "0",
- "text": "HEALTH OK",
+ "from": "0",
+ "text": "HEALTH OK",
"to": "1"
- },
+ },
{
- "from": "1",
- "text": "HEALTH WARNING",
+ "from": "1",
+ "text": "HEALTH WARNING",
"to": "4"
- },
+ },
{
- "from": "5",
- "text": "HEALTH ERROR",
+ "from": "5",
+ "text": "HEALTH ERROR",
"to": "99"
- },
+ },
{
- "from": "-10",
- "text": "NODATA",
+ "from": "-10",
+ "text": "NODATA",
"to": "0"
}
- ],
+ ],
"sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
"show": false
- },
- "tableColumn": "",
+ },
+ "tableColumn": "",
"targets": [
{
- "expr": "ceph_health_status",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A",
- "step": 2,
+ "expr": "ceph_health_status",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2,
"textEditor": true
}
- ],
- "thresholds": "1,5",
- "timeFrom": "1m",
- "timeShift": null,
- "title": "",
- "type": "singlestat",
- "valueFontSize": "50%",
+ ],
+ "thresholds": "1,5",
+ "timeFrom": "1m",
+ "timeShift": null,
+ "title": "",
+ "type": "singlestat",
+ "valueFontSize": "50%",
"valueMaps": [
{
- "op": "=",
- "text": "HEALTH OK",
+ "op": "=",
+ "text": "HEALTH OK",
"value": "0"
- },
+ },
{
- "op": "=",
- "text": "HEALTH WARN",
+ "op": "=",
+ "text": "HEALTH WARN",
"value": "1"
- },
+ },
{
- "op": "=",
- "text": "HEALTH ERROR",
+ "op": "=",
+ "text": "HEALTH ERROR",
"value": "2"
}
- ],
+ ],
"valueName": "current"
- },
+ },
{
- "collapsed": true,
+ "collapsed": true,
"gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
+ "h": 1,
+ "w": 24,
+ "x": 0,
"y": 3
- },
- "id": 22,
+ },
+ "id": 22,
"panels": [
{
"aliasColors": {
"Ceph Health": "#0a50a1"
- },
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": null,
- "description": "The chart plots the clusters health, over time. The colored bands show 3 distinct areas; green (OK), yellow(WARN) and red(ERROR). The plot line in blue is this clusters current health, so you can see over time how long the cluster spends in an OK, WARN or ERROR state",
- "fill": 0,
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": null,
+ "description": "The chart plots the clusters health, over time. The colored bands show 3 distinct areas; green (OK), yellow(WARN) and red(ERROR). The plot line in blue is this clusters current health, so you can see over time how long the cluster spends in an OK, WARN or ERROR state",
+ "description": "Show cluster flags that determine automatic maintenance and recovery operations",
"gridPos": {
- "h": 3,
- "w": 2,
- "x": 8,
+ "h": 3,
+ "w": 2,
+ "x": 8,
"y": 5
- },
- "id": 31,
- "links": [],
- "minSpan": 2,
- "mode": "html",
- "title": "",
- "transparent": true,
+ },
+ "id": 31,
+ "links": [],
+ "minSpan": 2,
+ "mode": "html",
+ "title": "",
+ "transparent": true,
"type": "text"
- },
+ },
{
- "cacheTimeout": null,
- "colorBackground": true,
- "colorValue": false,
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorValue": false,
"colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
- ],
- "datasource": null,
- "description": "Scrub activity takes place daily basis across the OSD's and performs object size and attribute checks. Scrub activity can be controlled with the \"ceph osd scrub\" command.",
- "format": "none",
+ ],
+ "datasource": null,
+ "description": "Scrub activity takes place daily basis across the OSD's and performs object size and attribute checks. Scrub activity can be controlled with the \"ceph osd scrub\" command.",
- "description": "Deep scrub acts on the objects within placement groups (PGs). Objects are read, and checksum's compared to protect against silent bit-rot events. Although a weekly activity, in large clusters is normal to see deep-scrub active on a daily basis. Scrub activity can be controlled with the \"ceph osd scrub\" command.",
- "format": "none",
+ ],
+ "datasource": null,
+ "description": "Deep scrub acts on the objects within placement groups (PGs). Objects are read, and checksum's compared to protect against silent bit-rot events. Although a weekly activity, in large clusters is normal to see deep-scrub active on a daily basis. Scrub activity can be controlled with the \"ceph osd scrub\" command.",
- "description": "The OUT flag setting allows the mon's to mark OSD's as out of the configuration when they stop sending hearbeats to the mon's. By marking them OUT, recovery takes place. However, for planned maintenance you can set the cluster to noout to disable this behavior.",
- "format": "none",
+ ],
+ "datasource": null,
+ "description": "The OUT flag setting allows the mon's to mark OSD's as out of the configuration when they stop sending hearbeats to the mon's. By marking them OUT, recovery takes place. However, for planned maintenance you can set the cluster to noout to disable this behavior.",
+ "format": "none",
"gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
"thresholdMarkers": true
- },
+ },
"gridPos": {
- "h": 3,
- "w": 2,
- "x": 14,
+ "h": 3,
+ "w": 2,
+ "x": 14,
"y": 5
- },
- "height": "95",
- "hideTimeOverride": true,
- "id": 12,
- "interval": null,
- "links": [],
- "mappingType": 1,
+ },
+ "height": "95",
+ "hideTimeOverride": true,
+ "id": 12,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
"mappingTypes": [
{
- "name": "value to text",
+ "name": "value to text",
"value": 1
- },
+ },
{
- "name": "range to text",
+ "name": "range to text",
"value": 2
}
- ],
- "maxDataPoints": "",
- "minSpan": 2,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
+ ],
+ "maxDataPoints": "",
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
"rangeMaps": [
{
- "from": "null",
- "text": "N/A",
+ "from": "null",
+ "text": "N/A",
"to": "null"
}
- ],
+ ],
"sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
"show": false
- },
- "tableColumn": "",
+ },
+ "tableColumn": "",
"targets": [
{
- "expr": "2*scalar(ceph_osd_flag_noout)",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A",
- "step": 2,
+ "expr": "2*scalar(ceph_osd_flag_noout)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 2,
"textEditor": true
}
- ],
- "thresholds": "1,2",
- "timeFrom": "1m",
- "timeShift": null,
- "title": "OUT",
- "type": "singlestat",
- "valueFontSize": "40%",
+ ],
+ "thresholds": "1,2",
+ "timeFrom": "1m",
+ "timeShift": null,
+ "title": "OUT",
+ "type": "singlestat",
+ "valueFontSize": "40%",
"valueMaps": [
{
- "op": "=",
- "text": "N/A",
+ "op": "=",
+ "text": "N/A",
"value": "null"
- },
+ },
{
- "op": "=",
- "text": "ENABLED",
+ "op": "=",
+ "text": "ENABLED",
"value": "0"
- },
+ },
{
- "op": "=",
- "text": "ACTIVE",
+ "op": "=",
+ "text": "ACTIVE",
"value": "1"
- },
+ },
{
- "op": "=",
- "text": "DISABLED",
+ "op": "=",
+ "text": "DISABLED",
"value": "2"
- },
+ },
{
- "op": "=",
- "text": "DISABLED",
+ "op": "=",
+ "text": "DISABLED",
"value": "3"
}
- ],
+ ],
"valueName": "current"
- },
+ },
{
- "cacheTimeout": null,
- "colorBackground": true,
- "colorValue": false,
+ "cacheTimeout": null,
+ "colorBackground": true,
+ "colorValue": false,
"colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
- ],
- "datasource": null,
- "description": "The down feature allows OSD's to mark their peers as DOWN when they are not reachable. However, if there is a poor network or planned outages, you may want to set this flag to nodown to prevent OSD's flapping between up/down states",
- "format": "none",
+ ],
+ "datasource": null,
+ "description": "The down feature allows OSD's to mark their peers as DOWN when they are not reachable. However, if there is a poor network or planned outages, you may want to set this flag to nodown to prevent OSD's flapping between up/down states",
- "description": "With the recovery flag enabled, disruption in the cluster will result in data being recreated to freespace within the cluster to honor the pool's protection schema. Under some circumstances, you may use the norecover flag to prevent automatic recovery.",
- "format": "none",
+ ],
+ "datasource": null,
+ "description": "With the recovery flag enabled, disruption in the cluster will result in data being recreated to freespace within the cluster to honor the pool's protection schema. Under some circumstances, you may use the norecover flag to prevent automatic recovery.",
- "content": "<h1>Ceph Filestore I/O Process</h1>\n<p style=\"text-align: justify;\">\nA write request is first committed to a journal using direct-io (<i><b>apply</b></i>). Once this write is complete, the data is persisted to HDD by a second 'buffered' write operation (<i><b>commit</b></i>). The commit operation is basically a measure of time taken to perform a <i>syncfs</i> call to flush dirty pages to disk, and is therefore <b>not</b> a time associated with any specific client initiated operation.<p> The tables on the right show commit and apply latencies for all OSDs, or use the pull down above to focus on a specific OSD.\n",
+ "content": "<h1>Ceph Filestore I/O Process</h1>\n<p style=\"text-align: justify;\">\nA write request is first committed to a journal using direct-io (<i><b>apply</b></i>). Once this write is complete, the data is persisted to HDD by a second 'buffered' write operation (<i><b>commit</b></i>). The commit operation is basically a measure of time taken to perform a <i>syncfs</i> call to flush dirty pages to disk, and is therefore <b>not</b> a time associated with any specific client initiated operation.<p> The tables on the right show commit and apply latencies for all OSDs, or use the pull down above to focus on a specific OSD.\n",
- "description": "Shows the latency for a given OSD, allowing you to compare a specific OSD against the $percentile%ile graph. Note that when the \"OSD Id\" pull-down shows **ALL**, the graph will be empty to avoid the chart being unreadable.",
- "fill": 0,
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Local",
+ "description": "Shows the latency for a given OSD, allowing you to compare a specific OSD against the $percentile%ile graph. Note that when the \"OSD Id\" pull-down shows **ALL**, the graph will be empty to avoid the chart being unreadable.",
- "title": "Filestore IO Summary - all OSD's @ $percentile%ile",
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Filestore IO Summary - all OSD's @ $percentile%ile",
"tooltip": {
- "shared": true,
- "sort": 0,
+ "shared": true,
+ "sort": 0,
"value_type": "individual"
- },
- "type": "graph",
+ },
+ "type": "graph",
"xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
"values": []
- },
+ },
"yaxes": [
{
- "format": "ms",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
"show": true
- },
+ },
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
"show": false
}
]
}
- ],
- "repeat": null,
- "title": "Filestore OSD Latencies",
+ ],
+ "repeat": null,
+ "title": "Filestore OSD Latencies",
"type": "row"
- },
+ },
{
- "collapsed": true,
+ "collapsed": true,
"gridPos": {
- "h": 1,
- "w": 24,
- "x": 0,
+ "h": 1,
+ "w": 24,
+ "x": 0,
"y": 8
- },
- "id": 26,
+ },
+ "id": 26,
"panels": [
{
- "content": "<h1>Ceph Bluestore I/O Process</h1>\n<p style=\"text-align: justify;\">\nUnlike filestore, bluestore does not suffer from a double-write penalty (i.e write to journal then write to HDD). With bluestore, once a write is scheduled (<b>submit</b> and <b>throttle</b> latencies), it is done directly to the disk (<b>AIO wait</b>), and then the metadata relating to the object is changed (<b>kv_commit</b>). Writes are not considered complete until the kv store is updated. <p> The tables on the right focus on the top 10 Bluestore OSDs with the highest latencies.\n",
+ "content": "<h1>Ceph Bluestore I/O Process</h1>\n<p style=\"text-align: justify;\">\nUnlike filestore, bluestore does not suffer from a double-write penalty (i.e write to journal then write to HDD). With bluestore, once a write is scheduled (<b>submit</b> and <b>throttle</b> latencies), it is done directly to the disk (<b>AIO wait</b>), and then the metadata relating to the object is changed (<b>kv_commit</b>). Writes are not considered complete until the kv store is updated. <p> The tables on the right focus on the top 10 Bluestore OSDs with the highest latencies.\n",
- "description": "Time spent preparing the request (transaction)",
- "fontSize": "100%",
+ ],
+ "datasource": "Local",
+ "description": "Time spent preparing the request (transaction)",
+ "fontSize": "100%",
"gridPos": {
- "h": 8,
- "w": 4,
- "x": 8,
+ "h": 8,
+ "w": 4,
+ "x": 8,
"y": 9
- },
- "height": "310",
- "hideTimeOverride": true,
- "id": 18,
- "links": [],
- "minSpan": 4,
- "pageSize": 5,
- "scroll": false,
- "showHeader": true,
+ },
+ "height": "310",
+ "hideTimeOverride": true,
+ "id": 18,
+ "links": [],
+ "minSpan": 4,
+ "pageSize": 5,
+ "scroll": false,
+ "showHeader": true,
"sort": {
- "col": 1,
+ "col": 1,
"desc": false
- },
+ },
"styles": [
{
- "alias": "Time",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "pattern": "Time",
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
"type": "date"
- },
+ },
{
- "alias": "Submit Latency",
- "colorMode": "row",
+ "alias": "Submit Latency",
+ "colorMode": "row",
"colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "Current",
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "Current",
"thresholds": [
- ".001",
+ ".001",
".003"
- ],
- "type": "number",
+ ],
+ "type": "number",
"unit": "s"
- },
+ },
{
- "alias": "OSD Id",
- "colorMode": null,
+ "alias": "OSD Id",
+ "colorMode": null,
"colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "Metric",
- "thresholds": [],
- "type": "number",
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "Metric",
+ "thresholds": [],
+ "type": "number",
"unit": "short"
- },
+ },
{
- "alias": "",
- "colorMode": null,
+ "alias": "",
+ "colorMode": null,
"colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
- ],
- "decimals": 2,
- "pattern": "/.*/",
- "thresholds": [],
- "type": "number",
+ ],
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [],
+ "type": "number",
"unit": "short"
}
- ],
+ ],
"targets": [
{
- "expr": "",
- "format": "table",
- "intervalFactor": 2,
- "refId": "A",
+ "expr": "",
+ "format": "table",
+ "intervalFactor": 2,
+ "refId": "A",
"textEditor": true
}
- ],
- "timeFrom": "2m",
- "title": "",
- "transform": "timeseries_aggregations",
+ ],
+ "timeFrom": "2m",
+ "title": "",
+ "transform": "timeseries_aggregations",
"type": "table"
- },
+ },
{
"columns": [
{
- "text": "Current",
+ "text": "Current",
"value": "current"
}
- ],
- "datasource": "Local",
- "description": "Time requests wait due to throttling or busy conditions",
- "fontSize": "100%",
+ ],
+ "datasource": "Local",
+ "description": "Time requests wait due to throttling or busy conditions",
+ "fontSize": "100%",
"gridPos": {
- "h": 8,
- "w": 4,
- "x": 12,
+ "h": 8,
+ "w": 4,
+ "x": 12,
"y": 9
- },
- "height": "310",
- "hideTimeOverride": true,
- "id": 19,
- "links": [],
- "minSpan": 4,
- "pageSize": 5,
- "scroll": false,
- "showHeader": true,
+ },
+ "height": "310",
+ "hideTimeOverride": true,
+ "id": 19,
+ "links": [],
+ "minSpan": 4,
+ "pageSize": 5,
+ "scroll": false,
+ "showHeader": true,
"sort": {
- "col": 1,
+ "col": 1,
"desc": true
- },
+ },
"styles": [
{
- "alias": "Time",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "pattern": "Time",
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
"type": "date"
- },
+ },
{
- "alias": "Throttle Latency",
- "colorMode": "row",
+ "alias": "Throttle Latency",
+ "colorMode": "row",
"colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "Current",
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "Current",
"thresholds": [
- ".002",
+ ".002",
".005"
- ],
- "type": "number",
+ ],
+ "type": "number",
"unit": "s"
- },
+ },
{
- "alias": "OSD Id",
- "colorMode": null,
+ "alias": "OSD Id",
+ "colorMode": null,
"colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "Metric",
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "Metric",
"thresholds": [
""
- ],
- "type": "number",
+ ],
+ "type": "number",
"unit": "short"
- },
+ },
{
- "alias": "",
- "colorMode": null,
+ "alias": "",
+ "colorMode": null,
"colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
- ],
- "decimals": 2,
- "pattern": "/.*/",
- "thresholds": [],
- "type": "number",
+ ],
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [],
+ "type": "number",
"unit": "short"
}
- ],
+ ],
"targets": [
{
- "expr": "",
- "format": "table",
- "intervalFactor": 2,
- "refId": "A",
+ "expr": "",
+ "format": "table",
+ "intervalFactor": 2,
+ "refId": "A",
"textEditor": true
}
- ],
- "timeFrom": "2m",
- "title": "",
- "transform": "timeseries_aggregations",
+ ],
+ "timeFrom": "2m",
+ "title": "",
+ "transform": "timeseries_aggregations",
"type": "table"
- },
+ },
{
"columns": [
{
- "text": "Current",
+ "text": "Current",
"value": "current"
}
- ],
- "datasource": "Local",
- "description": "Time spent waiting for the physical I/O request to complete",
- "fontSize": "100%",
+ ],
+ "datasource": "Local",
+ "description": "Time spent waiting for the physical I/O request to complete",
+ "fontSize": "100%",
"gridPos": {
- "h": 8,
- "w": 4,
- "x": 16,
+ "h": 8,
+ "w": 4,
+ "x": 16,
"y": 9
- },
- "height": "310",
- "hideTimeOverride": true,
- "id": 20,
- "links": [],
- "minSpan": 4,
- "pageSize": 5,
- "scroll": false,
- "showHeader": true,
+ },
+ "height": "310",
+ "hideTimeOverride": true,
+ "id": 20,
+ "links": [],
+ "minSpan": 4,
+ "pageSize": 5,
+ "scroll": false,
+ "showHeader": true,
"sort": {
- "col": 1,
+ "col": 1,
"desc": true
- },
+ },
"styles": [
{
- "alias": "Time",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "pattern": "Time",
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
"type": "date"
- },
+ },
{
- "alias": "AIO Wait Time",
- "colorMode": "row",
+ "alias": "AIO Wait Time",
+ "colorMode": "row",
"colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "Current",
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "Current",
"thresholds": [
- ".020",
+ ".020",
".050"
- ],
- "type": "number",
+ ],
+ "type": "number",
"unit": "s"
- },
+ },
{
- "alias": "OSD Id",
- "colorMode": null,
+ "alias": "OSD Id",
+ "colorMode": null,
"colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "Metric",
- "thresholds": [],
- "type": "number",
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "Metric",
+ "thresholds": [],
+ "type": "number",
"unit": "short"
- },
+ },
{
- "alias": "",
- "colorMode": null,
+ "alias": "",
+ "colorMode": null,
"colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
- ],
- "decimals": 2,
- "pattern": "/.*/",
- "thresholds": [],
- "type": "number",
+ ],
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [],
+ "type": "number",
"unit": "short"
}
- ],
+ ],
"targets": [
{
- "expr": "",
- "format": "table",
- "intervalFactor": 2,
- "refId": "A",
+ "expr": "",
+ "format": "table",
+ "intervalFactor": 2,
+ "refId": "A",
"textEditor": true
}
- ],
- "timeFrom": "2m",
- "title": "",
- "transform": "timeseries_aggregations",
+ ],
+ "timeFrom": "2m",
+ "title": "",
+ "transform": "timeseries_aggregations",
"type": "table"
- },
+ },
{
"columns": [
{
- "text": "Current",
+ "text": "Current",
"value": "current"
}
- ],
- "datasource": "Local",
- "description": "Time spent waiting for rocksdb (metadata store) to commit meta data",
- "fontSize": "100%",
+ ],
+ "datasource": "Local",
+ "description": "Time spent waiting for rocksdb (metadata store) to commit meta data",
+ "fontSize": "100%",
"gridPos": {
- "h": 8,
- "w": 4,
- "x": 20,
+ "h": 8,
+ "w": 4,
+ "x": 20,
"y": 9
- },
- "height": "310",
- "hideTimeOverride": true,
- "id": 21,
- "links": [],
- "minSpan": 4,
- "pageSize": 5,
- "scroll": false,
- "showHeader": true,
+ },
+ "height": "310",
+ "hideTimeOverride": true,
+ "id": 21,
+ "links": [],
+ "minSpan": 4,
+ "pageSize": 5,
+ "scroll": false,
+ "showHeader": true,
"sort": {
- "col": 1,
+ "col": 1,
"desc": true
- },
+ },
"styles": [
{
- "alias": "Time",
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "pattern": "Time",
+ "alias": "Time",
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
"type": "date"
- },
+ },
{
- "alias": "KV Commit ",
- "colorMode": "row",
+ "alias": "KV Commit ",
+ "colorMode": "row",
"colors": [
- "rgba(50, 172, 45, 0.97)",
- "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "Current",
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "Current",
"thresholds": [
- ".003",
+ ".003",
".005"
- ],
- "type": "number",
+ ],
+ "type": "number",
"unit": "s"
- },
+ },
{
- "alias": "OSD Id",
- "colorMode": null,
+ "alias": "OSD Id",
+ "colorMode": null,
"colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
- ],
- "dateFormat": "YYYY-MM-DD HH:mm:ss",
- "decimals": 2,
- "pattern": "Metric",
- "thresholds": [],
- "type": "number",
+ ],
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "decimals": 2,
+ "pattern": "Metric",
+ "thresholds": [],
+ "type": "number",
"unit": "short"
- },
+ },
{
- "alias": "",
- "colorMode": null,
+ "alias": "",
+ "colorMode": null,
"colors": [
- "rgba(245, 54, 54, 0.9)",
- "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
"rgba(50, 172, 45, 0.97)"
- ],
- "decimals": 2,
- "pattern": "/.*/",
- "thresholds": [],
- "type": "number",
+ ],
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [],
+ "type": "number",
"unit": "short"
}
- ],
+ ],
"targets": [
{
- "expr": "",
- "format": "table",
- "intervalFactor": 2,
- "refId": "A",
+ "expr": "",
+ "format": "table",
+ "intervalFactor": 2,
+ "refId": "A",
"textEditor": true
}
- ],
- "timeFrom": "2m",
- "title": "",
- "transform": "timeseries_aggregations",
+ ],
+ "timeFrom": "2m",
+ "title": "",
+ "transform": "timeseries_aggregations",
"type": "table"
- },
+ },
{
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "Local",
- "fill": 1,
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Local",
+ "fill": 1,
"gridPos": {
- "h": 8,
- "w": 12,
- "x": 0,
+ "h": 8,
+ "w": 12,
+ "x": 0,
"y": 17
- },
- "height": "300",
- "id": 22,
+ },
+ "height": "300",
+ "id": 22,
"legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
"values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "minSpan": 12,
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": true,
- "steppedLine": false,
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "minSpan": 12,
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": true,
+ "steppedLine": false,
"targets": [
{
- "expr": "",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A",
+ "expr": "",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
"textEditor": true
- },
+ },
{
- "expr": "",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "B",
+ "expr": "",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "B",
"textEditor": true
- },
+ },
{
- "expr": "",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "C",
+ "expr": "",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "C",
"textEditor": true
- },
+ },
{
- "expr": "",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "D",
+ "expr": "",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "D",
"textEditor": true
}
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeShift": null,
- "title": "Bluestore Latency for OSD '$osd_id'",
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Bluestore Latency for OSD '$osd_id'",
"tooltip": {
- "shared": true,
- "sort": 0,
+ "shared": true,
+ "sort": 0,
"value_type": "individual"
- },
- "type": "graph",
+ },
+ "type": "graph",
"xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
"values": []
- },
+ },
"yaxes": [
{
- "format": "s",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": "0",
+ "format": "s",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
"show": true
- },
+ },
{
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
"show": false
}
]
- },
+ },
{
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "Local",
- "description": "This charts shows the $percentile%ile latencies across all OSDs, which indicates overall performance, but does not represent any specific OSD",
- "fill": 1,
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Local",
+ "description": "This charts shows the $percentile%ile latencies across all OSDs, which indicates overall performance, but does not represent any specific OSD",
+ "fill": 1,
"gridPos": {
- "h": 8,
- "w": 12,
- "x": 12,
+ "h": 8,
+ "w": 12,
+ "x": 12,
"y": 17
- },
- "height": "300px",
- "id": 23,
+ },
+ "height": "300px",
+ "id": 23,
"legend": {
- "avg": false,
- "current": false,
- "max": false,
- "min": false,
- "show": true,
- "total": false,
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
"values": false
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "minSpan": 12,
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "minSpan": 12,
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A",
+ "expr": "",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A",
"textEditor": true
- },
+ },
{
- "expr": "",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "B",
+ "expr": "",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "B",
"textEditor": true
- },
+ },
{
- "expr": "",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "C",
+ "expr": "",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "C",
"textEditor": true
- },
+ },
{
- "expr": "",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "D",
+ "expr": "",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "D",
"textEditor": true
}
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeShift": null,
- "title": "BlueStore IO Summary - all OSD's @ $percentile%ile",
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "BlueStore IO Summary - all OSD's @ $percentile%ile",
- "description": "LUNs are configured with a primary path (active), and a number of secondary paths (passive). Under normal circumstances, only the active/primary path is used for I/O. This chart shows the distribution of the active paths across each of the gateways.",
- "fontSize": "80%",
- "format": "short",
+ },
+ "datasource": null,
+ "description": "LUNs are configured with a primary path (active), and a number of secondary paths (passive). Under normal circumstances, only the active/primary path is used for I/O. This chart shows the distribution of the active paths across each of the gateways.",
- "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.",
- "format": "bytes",
+ ],
+ "datasource": null,
+ "decimals": 0,
+ "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.",
+ "format": "bytes",
"gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
"thresholdMarkers": true
- },
+ },
"gridPos": {
- "h": 4,
- "w": 4,
- "x": 0,
+ "h": 4,
+ "w": 4,
+ "x": 0,
"y": 5
- },
- "height": "160",
- "id": 2,
- "interval": null,
- "links": [],
- "mappingType": 1,
+ },
+ "height": "160",
+ "id": 2,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
"mappingTypes": [
{
- "name": "value to text",
+ "name": "value to text",
"value": 1
- },
+ },
{
- "name": "range to text",
+ "name": "range to text",
"value": 2
}
- ],
- "maxDataPoints": "",
- "minSpan": 4,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
+ ],
+ "maxDataPoints": "",
+ "minSpan": 4,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
"rangeMaps": [
{
- "from": "null",
- "text": "N/A",
+ "from": "null",
+ "text": "N/A",
"to": "null"
}
- ],
+ ],
"sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": false,
- "lineColor": "rgb(31, 120, 193)",
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
"show": false
- },
- "tableColumn": "",
+ },
+ "tableColumn": "",
"targets": [
{
- "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{device=~\"($device_id)\", instance=~\"($osd_servers).*\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A",
- "step": 40,
+ "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{device=~\"($device_id)\", instance=~\"($osd_servers).*\"})",
- "expr": "max by (device) (\n irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers).*\", device=~\"($device_id)\"}\n) / 10",
- "format": "time_series",
- "hide": false,
- "intervalFactor": 1,
- "legendFormat": "{{device}}",
- "refId": "A",
- "step": 10,
+ "expr": "max by (device) (\n irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers).*\", device=~\"($device_id)\"}\n) / 10",