From: Boris Ranto Date: Fri, 27 Jul 2018 21:27:44 +0000 (+0200) Subject: Rename the dashboards X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f25e77569f3140c997c79f33bb72e7153be7d80f;p=cephmetrics.git Rename the dashboards Signed-off-by: Boris Ranto --- diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/alert-status.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/alert-status.json new file mode 100644 index 0000000..1069b43 --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/alert-status.json @@ -0,0 +1,1716 @@ +{ + "__requires": [ + { + "type": "panel", + "id": "alertlist", + "name": "Alert List", + "version": "5.0.0" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Local", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1526437197732, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 11, + "panels": [], + "repeat": null, + "title": "Active Alert List", + "type": "row" + }, + { + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 1, + "limit": "20", + "links": [], + "onlyAlertsOnDashboard": true, + "show": "current", + "sortOrder": 3, + "stateFilter": [ + "alerting" + ], + "title": "Active Ceph Alert List", + "type": "alertlist" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 12, + "panels": [], + "repeat": null, + "title": "Health Checks", + "type": "row" + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "10s", + "handler": 1, + "name": "Overall Ceph Health alert", + "noDataState": "no_data", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": { + "Ceph Health": "#890F02", + "Ceph Health (0:OK, 4:Warning,8:Error)": "#DEDAF7", + "ceph health": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "The chart plots the clusters health, over time. Health is depicted as a integer; 0, 1 or 2 where 0 is OK, 1 is WARN and 2 represents an ERROR state.", + "fill": 1, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 9 + }, + "hideTimeOverride": false, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "maxDataPoints": "360", + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "ceph_health_status", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Ceph Health", + "refId": "A", + "step": 20, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Overall Ceph Health", + "tooltip": { + "shared": false, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "2", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Disks Near Full alert", + "noDataState": "ok", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "This shows how many disks are at or above 80% full. Performance may degrade beyond this threshold on filestore (XFS) backed OSD's.", + "fill": 1, + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 9 + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count((ceph_osd_stat_bytes_used / ceph_osd_stat_bytes) * 100 > 85)", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disks Near Full", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "30s", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "10s", + "handler": 1, + "name": "OSDs Down alert", + "noDataState": "ok", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Count of OSDs currently in a DOWN state", + "fill": 2, + "gridPos": { + "h": 7, + "w": 4, + "x": 8, + "y": 9 + }, + "hideTimeOverride": true, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "count(ceph_osd_metadata) - count(ceph_osd_up > 0.5)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "down", + "refId": "A", + "step": 4, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": "5m", + "timeShift": null, + "title": "OSDs Down", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "30s", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "30s", + "handler": 1, + "name": "OSDs Hosts Down alert", + "noDataState": "ok", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Count of OSD Hosts that have all daemons down", + "fill": 2, + "gridPos": { + "h": 7, + "w": 4, + "x": 12, + "y": 9 + }, + "hideTimeOverride": true, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "count(\n (count by(instance) (ceph_disk_occupation * \n on(ceph_daemon) group_right(instance) ceph_osd_up == 0) - \n count by(instance) (ceph_disk_occupation)) == 0)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "down", + "refId": "A", + "step": 4, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": "5m", + "timeShift": null, + "title": "OSDs Hosts Down", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "PG's Stuck alert", + "noDataState": "ok", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "This chart shows whether there are pg's in a stuck state, that need manual intervention to resolve.", + "fill": 2, + "gridPos": { + "h": 7, + "w": 4, + "x": 16, + "y": 9 + }, + "hideTimeOverride": true, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(ceph_osd_numpg) - scalar(ceph_pg_active)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "# pg's stuck inactive", + "refId": "A", + "step": 240, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": "6h", + "timeShift": null, + "title": "PG's Stuck", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ + "total" + ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "lt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "min" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "OSD Host Loss Check alert", + "noDataState": "no_data", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": { + "Largest OSD Host": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "This graph checks the cluster @ 90% full is enough to support the loss of the largest OSD host", + "fill": 1, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 9 + }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(\n sum(ceph_osd_stat_bytes - ceph_osd_stat_bytes_used)\n) * 0.9 -\nmax(\n sum by (instance) (\n ceph_osd_stat_bytes + on (ceph_daemon) group_left (instance) (ceph_disk_occupation*0)\n )\n)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "lt", + "value": 0 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "OSD Host Loss Check", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 1000 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "30s", + "handler": 1, + "name": "Slow OSD responses alert", + "noDataState": "ok", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": { + "Largest OSD Host": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Graph checking for OSD Latencies that are above 1s.", + "fill": 1, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 16 + }, + "hideTimeOverride": true, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": "", + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(\n (\n irate(node_disk_read_time_ms[5m]) / clamp_min(irate(node_disk_reads_completed[5m]), 0.001) +\n irate(node_disk_write_time_ms[5m]) / clamp_min(irate(node_disk_writes_completed[5m]), 0.001)\n ) and on (instance, device) ceph_disk_occupation\n) >= 1000", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}.{{device}}", + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1000 + } + ], + "timeFrom": "1h", + "timeShift": null, + "title": "Slow OSD responses", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "ms", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 10 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "max" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "30s", + "handler": 1, + "name": "Network Errors alert", + "noDataState": "no_data", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Checks all interfaces for dropped/error packets, and alerts if more than 10 are seen in a 5m interval", + "fill": 1, + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 16 + }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance, device) (\n irate(node_network_receive_drop{device=~\"(eth|en|bond|ib|mlx).*\"}[5m]) +\n irate(node_network_receive_errs{device=~\"(eth|en|bond|ib|mlx).*\"}[5m]) +\n irate(node_network_transmit_drop{device=~\"(eth|en|bond|ib|mlx).*\"}[5m]) +\n irate(node_network_transmit_errs{device=~\"(eth|en|bond|ib|mlx).*\"}[5m])\n) > 0", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} / {{device}}", + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 10 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Packet Drops/Errors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 85 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Pool Capacity alert", + "noDataState": "no_data", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 5, + "gridPos": { + "h": 7, + "w": 4, + "x": 8, + "y": 16 + }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail) * 100 + on (pool_id) group_left (name) (ceph_pool_metadata*0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 85 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Pool Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "10s", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "30s", + "handler": 1, + "name": "MONs Down alert", + "noDataState": "ok", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Count of MONs currently not in quorum/down", + "fill": 2, + "gridPos": { + "h": 7, + "w": 4, + "x": 12, + "y": 16 + }, + "hideTimeOverride": true, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "count(ceph_mon_quorum_status != 1)", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "down", + "refId": "A", + "step": 4, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": "5m", + "timeShift": null, + "title": "MONs Down", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "7", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 85 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "Cluster Capacity alert", + "noDataState": "no_data", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "This trigger raises a notification if the raw used crosses the 85% capacity threshold of the ceph cluster", + "fill": 1, + "gridPos": { + "h": 7, + "w": 4, + "x": 16, + "y": 16 + }, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(ceph_osd_stat_bytes_used) / sum(ceph_osd_stat_bytes) * 100 ", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Raw Capacity Used %", + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 85 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Capacity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "frequency": "60s", + "handler": 1, + "name": "OSDs with High PG Count", + "noDataState": "ok", + "notifications": [ + { + "id": 1 + } + ] + }, + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows the number of PGs that have a pg count > 275.", + "fill": 1, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 16 + }, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "count(ceph_osd_numpg > 275)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "OSDs with High PG Count", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": "#NUM OSDs with high PG Count", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Alert Status", + "version": 17 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-at-a-glance.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-at-a-glance.json new file mode 100644 index 0000000..932166d --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-at-a-glance.json @@ -0,0 +1,3252 @@ +{ + "__requires": [ + { + "type": "panel", + "id": "alertlist", + "name": "Alert List", + "version": "5.0.0" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.4" + }, + { + "type": "panel", + "id": "grafana-piechart-panel", + "name": "Pie Chart", + "version": "1.3.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Local", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "vonage-status-panel", + "name": "Status Panel", + "version": "1.0.8" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "limit": 100, + "name": "Annotations & Alerts", + "showIn": 0, + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1526962541471, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "overview" + ], + "targetBlank": true, + "title": "Shortcuts", + "type": "dashboards" + } + ], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 0, + "y": 0 + }, + "height": "50px", + "id": 1, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-cluster", + "dashboard": "Ceph Cluster", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph Cluster", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "Cluster", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 2, + "y": 0 + }, + "height": "50px", + "id": 2, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-pools", + "dashboard": "Ceph Pools", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph Pools", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "Pools", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 4, + "y": 0 + }, + "height": "50px", + "id": 3, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-osd-information", + "dashboard": "Ceph OSD Information", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Information", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "OSDs", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 6, + "y": 0 + }, + "height": "50px", + "id": 40, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-rgw-workload", + "dashboard": "Ceph RGW Workload", + "includeVars": false, + "keepTime": true, + "targetBlank": true, + "title": "Ceph RGW Workload", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "S3/Swift", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 8, + "y": 0 + }, + "height": "50px", + "id": 5, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-backend-storage", + "dashboard": "Ceph Backend Storage", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "OSD Host Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "OSD Hosts", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 10, + "y": 0 + }, + "height": "50px", + "id": 6, + "interval": null, + "links": [ + { + "dashUri": "db/network-usage-by-server", + "dashboard": "Network Usage by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Network Usage by Server", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "10%", + "prefix": "", + "prefixFontSize": "10%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "", + "transparent": true, + "type": "singlestat", + "valueFontSize": "35%", + "valueMaps": [ + { + "op": "=", + "text": "Network", + "value": "null" + } + ], + "valueName": "current" + }, + { + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 8, + "limit": 10, + "links": [ + { + "dashUri": "db/alert-status", + "dashboard": "Alert Status", + "targetBlank": true, + "title": "Alert Status", + "type": "dashboard" + } + ], + "minSpan": 4, + "onlyAlertsOnDashboard": false, + "show": "current", + "sortOrder": 3, + "stateFilter": [ + "alerting" + ], + "title": "Active Alerts", + "type": "alertlist" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 35, + "panels": [], + "repeat": null, + "title": "At a Glance", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(1, 167, 1, 1)", + "rgba(255,165,0, 1)", + "rgba(255, 0, 0, 1)" + ], + "datasource": null, + "description": "Shows the overall health of the ceph cluster", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 0, + "y": 5 + }, + "hideTimeOverride": true, + "id": 9, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-health", + "dashboard": "Ceph Health", + "includeVars": true, + "keepTime": false, + "targetBlank": true, + "title": "Ceph Health", + "type": "dashboard" + } + ], + "mappingType": 2, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "0", + "text": "OK", + "to": "0" + }, + { + "from": "1", + "text": "WARN", + "to": "4" + }, + { + "from": "5", + "text": "ERROR", + "to": "99" + }, + { + "from": "-10", + "text": "NODATA", + "to": "0" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "ceph_health_status", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "thresholds": "1,5", + "timeFrom": "1m", + "timeShift": null, + "title": "Health", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "OK", + "value": "0" + }, + { + "op": "=", + "text": "WARN", + "value": "4" + }, + { + "op": "=", + "text": "ERROR", + "value": "8" + } + ], + "valueName": "current" + }, + { + "clusterName": "MONs", + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgb(1,167,1)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 0, + "datasource": null, + "displayName": "MONs", + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 6, + "w": 2, + "x": 2, + "y": 5 + }, + "hideTimeOverride": true, + "id": 10, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": true, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [], + "minSpan": 2, + "namePrefix": "", + "targets": [ + { + "aggregation": "Last", + "alias": "total", + "displayType": "Regular", + "expr": "count(ceph_mon_quorum_status)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "total", + "refId": "D", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "quorum", + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mon_quorum_status == 1)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "quorum", + "refId": "E", + "textEditor": true, + "valueHandler": "String Threshold", + "warn": "1" + }, + { + "aggregation": "Last", + "alias": "down", + "crit": 2, + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mon_quorum_status != 1)", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "down", + "refId": "A", + "textEditor": true, + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1 + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "", + "type": "vonage-status-panel" + }, + { + "clusterName": "OSDs", + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgb(1,167,1)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 0, + "datasource": null, + "displayName": "OSDs", + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 6, + "w": 2, + "x": 4, + "y": 5 + }, + "hideTimeOverride": true, + "id": 11, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [ + { + "dashUri": "db/ceph-osd-information", + "dashboard": "Ceph OSD Information", + "targetBlank": true, + "title": "Ceph OSD Information", + "type": "dashboard" + } + ], + "minSpan": 2, + "namePrefix": "", + "targets": [ + { + "aggregation": "Last", + "alias": "total", + "displayType": "Regular", + "expr": "count(ceph_osd_up)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "total", + "refId": "E", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "in", + "displayType": "Regular", + "expr": "count(ceph_osd_up == 1)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "up", + "refId": "F", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "up", + "displayType": "Regular", + "expr": "count(ceph_osd_in == 1)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "in", + "refId": "A", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "out", + "displayType": "Regular", + "expr": "count(ceph_osd_in == 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "out", + "refId": "C", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "down", + "crit": 5, + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_osd_up == 0)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "down", + "refId": "B", + "textEditor": true, + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1 + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "", + "type": "vonage-status-panel" + }, + { + "clusterName": "OSD Hosts", + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgb(1,167,1)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 0, + "datasource": null, + "displayName": "OSD Hosts", + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 6, + "w": 2, + "x": 6, + "y": 5 + }, + "hideTimeOverride": true, + "id": 12, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [ + { + "dashUri": "db/ceph-health", + "dashboard": "Ceph Health", + "targetBlank": true, + "title": "Ceph Health", + "type": "dashboard" + } + ], + "minSpan": 2, + "targets": [ + { + "aggregation": "Last", + "alias": "total", + "displayType": "Regular", + "expr": "count(\n count(\n ceph_disk_occupation\n ) by (instance)\n)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "total", + "refId": "A", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "up", + "displayType": "Regular", + "expr": "count(\n count(\n ceph_disk_occupation\n ) by (instance)) -\ncount(\n (count by(instance) (ceph_disk_occupation * \n on(ceph_daemon) group_right(instance) ceph_osd_up == 0) - \n count by(instance) (ceph_disk_occupation)) == 0)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "up", + "refId": "B", + "textEditor": true, + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "down", + "crit": 2, + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(\n (count by(instance) (ceph_disk_occupation * \n on(ceph_daemon) group_right(instance) ceph_osd_up == 0) - \n count by(instance) (ceph_disk_occupation)) == 0)", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "down", + "refId": "C", + "textEditor": true, + "units": "none", + "url": "dashboard/db/ceph-health", + "valueHandler": "Number Threshold", + "warn": 1 + } + ], + "timeFrom": "30s", + "timeShift": null, + "title": "", + "type": "vonage-status-panel" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": null, + "decimals": 0, + "description": "Number of RGW daemons active", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 8, + "y": 5 + }, + "id": 39, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-rgw-workload", + "dashboard": "Ceph RGW Workload", + "targetBlank": true, + "title": "Ceph RGW Workload", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_rgw_metadata)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "RGWs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": null, + "decimals": 0, + "description": "Number of MDS daemons active", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 10, + "y": 5 + }, + "id": 41, + "interval": null, + "links": [ + { + "dashUri": "db/mds-performance", + "dashboard": "MDS Performance", + "targetBlank": true, + "title": "MDS Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_mds_metadata)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "MDS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(1, 167, 1, 1)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "decimals": 0, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 5 + }, + "hideTimeOverride": true, + "id": 15, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-cluster", + "dashboard": "Ceph Cluster", + "params": "panelId=3&fullscreen&orgId=1", + "targetBlank": true, + "title": "Cluster Capacity Information", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": "%", + "postfixFontSize": "40%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_osd_stat_bytes_used) / sum(ceph_osd_stat_bytes) * 100", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "thresholds": "70,90", + "timeFrom": "1m", + "timeShift": null, + "title": "Capacity Utilization", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 1, + "description": "Shows the growth rate based on osd usage over the past $growth_window.", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 16, + "y": 5 + }, + "id": 16, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "topk(1, ceph_cluster_total_used_bytes offset 1d) - ignoring (instance,job) topk(1, ceph_cluster_total_used_bytes offset 7d)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "$growth_window Growth Rate", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "Shows the estimated number of weeks left, based on consumption over the past $growth_window.", + "format": "locale", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 18, + "y": 5 + }, + "id": 17, + "interval": null, + "links": [], + "mappingType": 2, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + }, + { + "from": "-99999999999999999999999", + "text": "No Growth", + "to": "0" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(ceph_cluster_total_bytes - ceph_cluster_total_used_bytes) /\n scalar(topk(1, ceph_cluster_total_used_bytes offset 1d) - ignoring (instance,job) topk(1, ceph_cluster_total_used_bytes offset 7d))\n", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 1, + "refId": "A", + "textEditor": true + } + ], + "thresholds": "", + "title": "Weeks Remaining", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "Unknown": "#bf1b00", + "active + clean": "#01a701", + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg_active_clean": "#01a701", + "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg_peering": "#ffa500", + "peering": "#0A50A1" + }, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": "" + }, + "datasource": null, + "fontSize": "100%", + "format": "none", + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 5 + }, + "height": "", + "hideTimeOverride": true, + "id": 18, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [ + { + "dashUri": "db/ceph-cluster", + "dashboard": "Ceph Cluster", + "includeVars": false, + "keepTime": false, + "targetBlank": true, + "title": "Ceph Cluster Information", + "type": "dashboard" + } + ], + "maxDataPoints": "1", + "minSpan": 4, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": "", + "targets": [ + { + "expr": "ceph_pg_active", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "Active", + "refId": "B", + "textEditor": true + }, + { + "expr": "ceph_pg_degraded", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "Degraded", + "refId": "C", + "textEditor": true + }, + { + "expr": "ceph_pg_peering", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "Peering", + "refId": "D", + "textEditor": true + }, + { + "expr": "ceph_pg_unknown", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "Unknown", + "refId": "E" + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "Placement Group Status", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 36, + "panels": [], + "repeat": null, + "title": "Performance", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(251,251,251, 0.97)", + "rgba(255,165,0, 0.89)", + "rgba(255, 0, 0, 1)" + ], + "datasource": null, + "decimals": 1, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 12 + }, + "id": 20, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-pools", + "dashboard": "Ceph Pools", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph Pools", + "type": "dashboard" + } + ], + "mappingType": 2, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "/s", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(ceph_osd_recovery_ops[1m]))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "thresholds": "1,2", + "title": "Recovery/Backfill Ops", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(251,251,251, 0.97)", + "rgba(255,0,0,1)", + "rgba(255, 0, 0, 1)" + ], + "datasource": null, + "description": "This panel indicate whether scrub/deep scrub is running within the cluster. NB. If either of these features are turned off, the cluster will enter a WARN state. Click on the panel or the link below to look at cluster information in more detail", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 4, + "y": 12 + }, + "id": 19, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-cluster", + "dashboard": "Ceph Cluster", + "includeVars": false, + "keepTime": false, + "targetBlank": true, + "title": "Ceph Cluster", + "type": "dashboard" + } + ], + "mappingType": 2, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "0", + "text": "INACTIVE", + "to": "0" + }, + { + "from": "1", + "text": "ACTIVE", + "to": "99999" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "ceph_pg_scrubbing{job=\"ceph\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "thresholds": "1", + "title": "Scrub", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "INACTIVE", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "", + "value": "" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 6, + "y": 12 + }, + "id": 21, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-pools", + "dashboard": "Ceph Pools", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph Pools", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(\n rate(ceph_pool_wr[$__interval])\n)\n+ \nsum(\n rate(ceph_pool_rd[$__interval])\n)", + "format": "time_series", + "groupBy": [], + "hide": false, + "intervalFactor": 1, + "legendFormat": "IOPS", + "policy": "default", + "rawQuery": false, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "textEditor": true + } + ], + "thresholds": "", + "title": "Client IOPS", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 1, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 10, + "y": 12 + }, + "id": 22, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-pools", + "dashboard": "Ceph Pools", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph Pools", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": "/s", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(rate(ceph_pool_wr_bytes[$__interval]) + rate(ceph_pool_rd_bytes[$__interval]))", + "format": "time_series", + "groupBy": [], + "hide": false, + "intervalFactor": 1, + "policy": "default", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "textEditor": true + } + ], + "thresholds": "", + "title": "Client Throughput", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 14, + "y": 12 + }, + "id": 23, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_pool_metadata)", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "textEditor": true + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "Reads": "#01a701", + "Writes": "#82B5D8" + }, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": null, + "description": "Shows the read/write threshold of client IOPS serviced by the ceph cluster", + "fontSize": "80%", + "format": "none", + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 12 + }, + "height": "230", + "id": 25, + "interval": null, + "legend": { + "percentage": false, + "show": false, + "values": false + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": "90", + "minSpan": 4, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "round(sum(irate(ceph_pool_rd[30s])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "reads", + "refId": "A", + "textEditor": true + }, + { + "expr": "round(sum(irate(ceph_pool_wr[30s])))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "writes", + "refId": "B", + "textEditor": true + } + ], + "title": "Client Read/Write Ratio", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + "95%ile Commit Latency": "#447EBC", + "Apply Latency Max": "#890F02", + "Commit Latency": "#447EBC", + "apply": "#508642", + "commit": "#0a50a1" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows the OSD apply and commit latency at the $percentile%ile across the cluster over the past 15 minutes", + "fill": 0, + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 12 + }, + "hideTimeOverride": true, + "id": 26, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "dashUri": "db/ceph-osd-information", + "dashboard": "Ceph OSD Information", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Information", + "type": "dashboard" + } + ], + "minSpan": 4, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile($percentile / 100, ceph_osd_commit_latency_ms)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "commit", + "refId": "A", + "textEditor": true + }, + { + "expr": "quantile($percentile / 100, ceph_osd_apply_latency_ms)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "apply", + "refId": "B", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": "15m", + "timeShift": null, + "title": "OSD Apply vs Commit Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "decimals": 0, + "description": "CPU usage is presented based on the $percentile%ile across all OSD hosts", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 18 + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": " %", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "quantile(\n $percentile / 100, (\n avg (\n sum (\n irate(node_cpu{mode=~\"(system|user|irq|nice)\", instance=~'[[osd_servers_raw]].*'}[1m]))\n by (instance,cpu)\n ) by (instance)\n ) * 100\n)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A", + "textEditor": true + } + ], + "thresholds": "70,90", + "title": "OSD Hosts CPU Busy", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "RAM Usage shows the $percentile%ile of RAM used across all OSD hosts", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 4, + "y": 18 + }, + "id": 31, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-backend-storage", + "dashboard": "Ceph Backend Storage", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Host Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": " %", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(46, 161, 15, 0)", + "full": false, + "lineColor": "rgb(164, 139, 4)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "quantile(\n $percentile/100, \n (\n node_memory_MemTotal{job='node'} - node_memory_MemFree{job='node'} - node_memory_Buffers{job='node'} - node_memory_Cached{job='node'}\n ) / node_memory_MemTotal{job='node'} * 100\n)", + "format": "time_series", + "groupBy": [], + "hide": false, + "intervalFactor": 2, + "policy": "default", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "textEditor": true + } + ], + "thresholds": "", + "title": "RAM Util.", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "Total IOPS from all OSDs in the cluster", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 6, + "y": 18 + }, + "id": 28, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-backend-storage", + "dashboard": "Ceph Backend Storage", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Host Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(\n sum(\n rate(\n node_disk_reads_completed[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation\n)\n+\nsum(\n sum(\n rate(\n node_disk_writes_completed[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation\n)", + "format": "time_series", + "groupBy": [], + "hide": false, + "intervalFactor": 1, + "policy": "default", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "textEditor": true + } + ], + "thresholds": "", + "title": "Disk IOPS", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 1, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 10, + "y": 18 + }, + "id": 29, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-backend-storage", + "dashboard": "Ceph Backend Storage", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Host Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": "/s", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "# should only include OSD hosts\nsum(\n sum(\n rate(\n node_disk_bytes_read[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation\n)\n+\nsum(\n sum(\n rate(\n node_disk_bytes_written[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation\n)", + "format": "time_series", + "groupBy": [], + "hide": false, + "intervalFactor": 1, + "policy": "default", + "rawQuery": true, + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "textEditor": true + }, + { + "expr": "# will include non-OSD hosts\nsum(\n rate(\n node_disk_bytes_read[$__interval]\n ) + \n rate(\n node_disk_bytes_written[$__interval]\n )\n)", + "format": "time_series", + "groupBy": [], + "hide": true, + "intervalFactor": 1, + "policy": "default", + "rawQuery": true, + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "tags": [], + "textEditor": true + } + ], + "thresholds": "", + "title": "Disk Throughput", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "The count of the number of disks in the cluster that are over $disk_full_threshold% full.", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 14, + "y": 18 + }, + "id": 30, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-backend-storage", + "dashboard": "Ceph Backend Storage", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Ceph OSD Host Performance", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(\n (ceph_osd_stat_bytes_used / ceph_osd_stat_bytes) > ($disk_full_threshold / 100)\n)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "thresholds": "", + "title": "Nearly Full Disks", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(1, 167, 1,1)", + "rgba(255,165,0,1)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "decimals": 0, + "description": "This panel shows the 5th %ile disk latency, indicating that 95% of the OSDs are delivering this latency or higher", + "format": "short", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 18 + }, + "id": 32, + "interval": null, + "links": [ + { + "dashUri": "db/latency-by-server", + "dashboard": "Latency by Server", + "includeVars": true, + "targetBlank": true, + "title": "Latency by Server", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "90", + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": " ms", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "quantile(\n 5 / 100,\n (irate(node_disk_read_time_ms[1m]) + irate(node_disk_write_time_ms[1m]) / \n (irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed[1m])) \n +\n ignoring(ceph_daemon,job) ceph_disk_occupation))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "C" + }, + { + "expr": "max(\n irate(node_disk_read_time_ms[30s]) / irate(node_disk_reads_completed[30s])\n +\n irate(node_disk_write_time_ms[30s]) / irate(node_disk_writes_completed[30s])\n + ignoring(ceph_daemon,job) ceph_disk_occupation\n)", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "refId": "B" + }, + { + "expr": "sum(\n (irate(node_disk_read_time_ms[1m]) + irate(node_disk_write_time_ms[1m]) / \n (irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed[1m])) \n +\n ignoring(ceph_daemon,job) ceph_disk_occupation)) / count(ceph_osd_up)", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "20,60", + "title": "Disk Latency", + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "average": "#0a50a1", + "average %util": "#1f78c1" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Chart shows the disk utilization over the past 15 mins expressed as an average across all OSDs, and at the $percentile%ile.", + "fill": 1, + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 18 + }, + "hideTimeOverride": true, + "id": 33, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "dashUri": "db/disk-busy-by-server", + "dashboard": "Disk Busy by Server", + "includeVars": true, + "targetBlank": true, + "title": "Disk Busy by Server", + "type": "dashboard" + } + ], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "95%ile", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg (\n max by (device) (\n irate(node_disk_io_time_ms[1m]) \n and on (instance, device) ceph_disk_occupation\n ) / 10\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "average", + "refId": "A", + "textEditor": false + }, + { + "expr": "quantile(\n $percentile/100, \n (\n max by (device) (\n irate(node_disk_io_time_ms[1m]) \n and on (instance, device) ceph_disk_occupation\n ) / 10\n )\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "$percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": "15m", + "timeShift": null, + "title": "Disk Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": "Percentile", + "multi": false, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "80", + "value": "80" + }, + { + "selected": false, + "text": "85", + "value": "85" + }, + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "80,85,90,95,98", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "85", + "value": "85" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "disk_full_threshold", + "options": [ + { + "selected": true, + "text": "85", + "value": "85" + } + ], + "query": "85", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "7d", + "value": "7d" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "growth_window", + "options": [ + { + "selected": true, + "text": "7d", + "value": "7d" + } + ], + "query": "7d", + "type": "custom" + }, + { + "allValue": "", + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": "OSD Host", + "multi": false, + "name": "osd_servers", + "options": [], + "query": "label_values(ceph_disk_occupation, instance)", + "refresh": 1, + "regex": "([^.]*).*", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": "", + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": "OSD Host", + "multi": false, + "name": "osd_servers_raw", + "options": [], + "query": "label_values(ceph_disk_occupation, instance)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph - At A Glance", + "version": 43 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-backend-storage.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-backend-storage.json new file mode 100644 index 0000000..2f0474e --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-backend-storage.json @@ -0,0 +1,1268 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.4" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Local", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1525149605368, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "overview" + ], + "targetBlank": true, + "title": "Shortcuts", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 13, + "panels": [], + "repeat": null, + "title": "Disk/OSD Host Summary", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 2, + "x": 0, + "y": 1 + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_osd_up < 0.5)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 60 + } + ], + "thresholds": "", + "title": "OSDs down", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 4, + "x": 2, + "y": 1 + }, + "id": 2, + "links": [], + "minSpan": 4, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "% Full", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Host and Disk", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "% Full", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ceph_osd_stat_bytes_used / ceph_osd_stat_bytes * 100 > $disk_full_threshold", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ceph_daemon}}", + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "title": "Disks Near Full", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 12, + "x": 6, + "y": 1 + }, + "id": 17, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": false + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "OSD Host", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Total Capacity", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "pattern": "Value #A", + "thresholds": [], + "type": "number", + "unit": "bytes" + }, + { + "alias": "# Drives", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Value #B", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum by (instance) (\n ceph_disk_occupation{instance=~\"($osd_servers)\"}*0 + on (ceph_daemon) group_right(instance) ceph_osd_stat_bytes\n)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "refId": "A" + }, + { + "expr": "count by (instance) (\n ceph_disk_occupation{instance=~\"($osd_servers)\"}\n)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "refId": "B" + } + ], + "title": "OSD Host Capacity Summary", + "transform": "table", + "type": "table" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 14, + "panels": [ + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateRdYlGn", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": null, + "description": "The heatmap categorizes disk utilization into discrete buckets (e.g util 0-5) and shows the frequency of the number of disks that fall within that range as a color. The color chosen depends on the number of disks in the 'bucket', ranging from green (low) to red (high). Hover over a colored block to show the number of disks at a given util% for that time interval (20secs).", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "heatmap": {}, + "highlightCards": true, + "id": 5, + "legend": { + "show": false + }, + "links": [], + "minSpan": 12, + "targets": [ + { + "expr": "irate(node_disk_io_time_ms{instance=~\"[[osd_servers]]\"}[1m]) / 10 and on (instance, device) ceph_disk_occupation", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "title": "Disk Drive Utilization Heatmap - $osd_servers", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": 180, + "xBucketSize": "", + "yAxis": { + "decimals": null, + "format": "short", + "logBase": 1, + "max": "100", + "min": "0", + "show": true, + "splitFactor": null + }, + "yBucketNumber": null, + "yBucketSize": 5 + }, + { + "aliasColors": { + "Read Throughput": "#629E51", + "Write Throughput": "#E0752D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n irate(node_disk_bytes_read{instance=~\"($osd_servers)\"}[5m]) and on (instance, device) ceph_disk_occupation\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Read throughtput", + "refId": "A", + "step": 10, + "textEditor": true + }, + { + "expr": "sum(\n irate(node_disk_bytes_written{instance=~\"($osd_servers)\"}[5m]) and on (instance, device) ceph_disk_occupation\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Write throughtput", + "refId": "B", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Throughput - $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Read Latency": "#629E51", + "Write Latency": "#E0752D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/latency-by-server", + "dashboard": "Latency by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Latency by Server", + "type": "dashboard" + } + ], + "minSpan": 12, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "quantile($percentile/100.0, (\n irate(node_disk_read_time_ms[5m]) / clamp_min(irate(node_disk_reads_completed[5m]), 0.001)\n and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Read latency", + "refId": "A", + "step": 10, + "textEditor": true + }, + { + "expr": "quantile($percentile/100.0, (\n irate(node_disk_write_time_ms[5m]) / clamp_min(irate(node_disk_writes_completed[5m]), 0.001)\n and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Write latency", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Latency - $osd_servers OSDs @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "disk busy %": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 17 + }, + "id": 8, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/disk-busy-by-server", + "dashboard": "Disk Busy by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Disk Busy by Server", + "type": "dashboard" + } + ], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile($percentile/100, (\n max by (device) (\n irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) / 10\n))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "disk busy %", + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "custom", + "fill": false, + "line": true, + "lineColor": "rgba(178, 0, 0, 0.29)", + "op": "gt", + "value": 80 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Disk Utilization - $osd_servers OSDs at $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "IOPS/spindle": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile($percentile/100.0, (\n avg by (device) (\n irate(node_disk_reads_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) +\n avg by (device) (\n irate(node_disk_writes_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n )\n))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "IOPS/spindle", + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS per Disk @ $percentile%ile - $osd_servers OSDs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "IOPS": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/iops-by-server", + "dashboard": "IOPS by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "IOPS by Server", + "type": "dashboard" + } + ], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile($percentile/100.0, (\n sum(\n irate(node_disk_reads_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) +\n sum(\n irate(node_disk_writes_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n )\n))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total Disk IOPS - $osd_servers OSDs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "title": "Disk/OSD Load Summary", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 15, + "panels": [ + { + "aliasColors": { + "CPU Busy": "#447EBC", + "CPU Busy @ 95%ile": "#890F02", + "Cluster-wide CPU Busy @ 95%ile": "#890F02", + "Max CPU Busy": "#BF1B00", + "Max CPU Busy - all OSD Hosts": "#BF1B00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 3, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Cluster-wide CPU Busy @ 95%ile", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile($percentile / 100.0, (\n sum by (instance) (\n irate(node_cpu{mode=~\"(irq|nice|system|user)\"}[5m])\n ) / sum by (instance) (\n irate(node_cpu[5m])\n )\n) * 100)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Cluster-wide CPU Busy @ $percentile%ile", + "refId": "A", + "step": 10, + "textEditor": true + }, + { + "expr": "avg(\n sum by (instance) (\n irate(node_cpu{instance=~\"($osd_servers)\", mode=~\"(irq|nice|system|user)\"}[5m])\n ) / sum by (instance) (\n irate(node_cpu{instance=~\"($osd_servers)\"}[5m])\n )\n) * 100", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Average OSD Host(s) CPU Busy", + "refId": "B", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilization - $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Network load (rx+tx)": "#3F6833" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/network-usage-by-server", + "dashboard": "Network Usage by Server", + "includeVars": true, + "keepTime": true, + "targetBlank": true, + "title": "Network Usage by Server", + "type": "dashboard" + } + ], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate(node_network_receive_bytes{instance=~\"($osd_servers)\", device=~\"(eth|en|bond|ib|mlx).*\"}[5m])) + \nsum (irate(node_network_transmit_bytes{instance=~\"($osd_servers)\", device=~\"(eth|en|bond|ib|mlx).*\"}[5m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Network load (rx+tx)", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load - $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "title": "OSD Host CPU and Network Load", + "type": "row" + } + ], + "refresh": "10s", + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": "Percentile", + "multi": false, + "name": "percentile", + "options": [ + { + "selected": false, + "text": "80", + "value": "80" + }, + { + "selected": false, + "text": "85", + "value": "85" + }, + { + "selected": false, + "text": "90", + "value": "90" + }, + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "98", + "value": "98" + } + ], + "query": "80,85,90,95,98", + "type": "custom" + }, + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 0, + "includeAll": true, + "label": "OSD Hostname", + "multi": true, + "name": "osd_servers", + "options": [], + "query": "ceph_disk_occupation", + "refresh": 1, + "regex": "/instance=\"([^\"]*)\"/", + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "85", + "value": "85" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "disk_full_threshold", + "options": [ + { + "selected": true, + "text": "85", + "value": "85" + } + ], + "query": "85", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Backend Storage", + "version": 14 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-cluster.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-cluster.json new file mode 100644 index 0000000..b84c75c --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-cluster.json @@ -0,0 +1,2588 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.4" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Local", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "overview" + ], + "targetBlank": true, + "title": "Shortcuts", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 29, + "panels": [], + "repeat": null, + "title": "Cluster Configuration", + "type": "row" + }, + { + "content": "", + "gridPos": { + "h": 3, + "w": 2, + "x": 0, + "y": 1 + }, + "id": 1, + "links": [], + "minSpan": 2, + "mode": "markdown", + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 2, + "y": 1 + }, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_mon_metadata)", + "format": "time_series", + "groupBy": [], + "instant": true, + "intervalFactor": 1, + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 60, + "tags": [], + "textEditor": true + } + ], + "thresholds": "", + "title": "MONs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 4, + "y": 1 + }, + "id": 3, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(sum by (instance) (ceph_disk_occupation))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 60, + "textEditor": true + } + ], + "thresholds": "", + "title": "OSD Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 6, + "y": 1 + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_mds_metadata)", + "format": "time_series", + "groupBy": [], + "instant": true, + "intervalFactor": 1, + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 60, + "tags": [], + "textEditor": true + } + ], + "thresholds": "", + "title": "MDS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 8, + "y": 1 + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_rgw_metadata)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A", + "textEditor": true + } + ], + "thresholds": "", + "title": "RGW Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 10, + "y": 1 + }, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(ceph_iscsi_gateway_tpg_total)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "thresholds": "", + "title": "iSCSI Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(max by (id) (ceph_osd_metadata))", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "legendFormat": "", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 60, + "tags": [], + "textEditor": true + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 16, + "y": 1 + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_pool_metadata)", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 60, + "tags": [], + "textEditor": true + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 1, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 18, + "y": 1 + }, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "ceph_cluster_total_bytes - ceph_cluster_total_used_bytes", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A", + "step": 60, + "textEditor": true + } + ], + "thresholds": "", + "title": "Unused Capacity", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 30, + "panels": [], + "repeat": null, + "title": "Cluster Flags", + "type": "row" + }, + { + "content": "", + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 5 + }, + "id": 10, + "links": [], + "minSpan": 4, + "mode": "markdown", + "title": "", + "transparent": true, + "type": "text" + }, + { + "content": "

Cluster Flags:

", + "gridPos": { + "h": 3, + "w": 2, + "x": 4, + "y": 5 + }, + "height": "95", + "id": 11, + "links": [], + "minSpan": 2, + "mode": "html", + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "Scrub activity takes place daily basis across the OSD's and performs object size and attribute checks. Scrub activity can be controlled with the \"ceph osd scrub\" command.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 6, + "y": 5 + }, + "height": "95", + "hideTimeOverride": true, + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_noscrub) + scalar(ceph_pg_scrubbing >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "timeShift": null, + "title": "SCRUB", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "Deep scrub acts on the objects within placement groups (PGs). Objects are read, and checksum's compared to protect against silent bit-rot events. Although a weekly activity, in large clusters is normal to see deep-scrub active on a daily basis. Scrub activity can be controlled with the \"ceph osd scrub\" command.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 8, + "y": 5 + }, + "height": "95", + "hideTimeOverride": true, + "id": 13, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_nodeep_scrub) + scalar(ceph_pg_deep >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "DEEP", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "With the recovery flag enabled, disruption in the cluster will result in data being recreated to freespace within the cluster to honor the pool's protection schema. Under some circumstances, you may use the norecover flag to prevent automatic recovery.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 10, + "y": 5 + }, + "height": "95", + "hideTimeOverride": true, + "id": 14, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_norecover) + scalar(ceph_pg_recovering >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "RECOVERY", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 12, + "y": 5 + }, + "height": "95", + "hideTimeOverride": true, + "id": 15, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_nobackfill) + scalar(ceph_pg_backfilling >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 1, + "refId": "B" + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "BACKFILL", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 14, + "y": 5 + }, + "height": "95", + "hideTimeOverride": true, + "id": 16, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_norebalance)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "REBALANCE", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "The OUT flag setting allows the mon's to mark OSD's as out of the configuration when they stop sending hearbeats to the mon's. By marking them OUT, recovery takes place. However, for planned maintenance you can set the cluster to noout to disable this behavior.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 16, + "y": 5 + }, + "height": "95", + "hideTimeOverride": true, + "id": 17, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_noout)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "OUT", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "The down feature allows OSD's to mark their peers as DOWN when they are not reachable. However, if there is a poor network or planned outages, you may want to set this flag to nodown to prevent OSD's flapping between up/down states", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 18, + "y": 5 + }, + "height": "95", + "hideTimeOverride": true, + "id": 18, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_nodown) + scalar(ceph_pg_down >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "DOWN", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 31, + "panels": [ + { + "aliasColors": { + "Raw": "#3F6833", + "Used": "#E0752D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows the Capacity within the cluster over the past 7 days", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 9 + }, + "hideTimeOverride": true, + "id": 19, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Used", + "expr": "scalar(ceph_cluster_total_used_bytes)", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "legendFormat": "Used", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 3600, + "tags": [], + "textEditor": true + }, + { + "alias": "Raw Capacity", + "expr": "scalar(ceph_cluster_total_bytes)", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "legendFormat": "Raw", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 3600, + "tags": [], + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": "7d", + "timeShift": null, + "title": "Cluster Capacity - Past 7 Days", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "Raw": "#3F6833", + "Used": "#E0752D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "description": "Shows the Capacity within each pool over the past 7 days", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 9 + }, + "hideTimeOverride": true, + "id": 20, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Used", + "expr": "max by (name) (\n ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail) * 100 + on (pool_id) group_left (name) ceph_pool_metadata\n)", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "legendFormat": "{{name}}", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 3600, + "tags": [], + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": "7d", + "timeShift": null, + "title": "Pool Capacity - Past 7 Days", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "Raw": "#3F6833", + "Used": "#E0752D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "decimals": 2, + "description": "Shows the Capacity within the cluster over the past 7 days", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 9 + }, + "hideTimeOverride": true, + "id": 21, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "alias": "Used", + "expr": "scalar(ceph_cluster_total_objects)", + "format": "time_series", + "groupBy": [], + "intervalFactor": 2, + "legendFormat": "RADOS Objects", + "policy": "default", + "refId": "B", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + } + ] + ], + "step": 3600, + "tags": [], + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": "7d", + "timeShift": null, + "title": "RADOS Object History - Past 7 Days", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "RADOS Object Count", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Each bar indicates the number of OSD's that have a PG count in a specific range as shown on the x axis.", + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 37, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_osd_numpg\n", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "PGs per OSD", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Distribution of PGs per OSD", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": 20, + "mode": "histogram", + "name": null, + "show": true, + "values": [ + "total" + ] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": "# of OSDs", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "columns": [], + "datasource": null, + "description": "This table shows all OSDs sorted by their PG Count. The PG count is color coded.", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 35, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 5, + "desc": true + }, + "styles": [ + { + "alias": "OSD", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "ceph_daemon", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Device", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "device", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Host", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "# PGs", + "colorMode": "value", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Value", + "thresholds": [ + "200", + "250" + ], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ceph_osd_numpg * on(ceph_daemon) group_left(instance,device) ceph_disk_occupation", + "format": "table", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "OSDs by PG Counts", + "transform": "table", + "type": "table" + } + ], + "repeat": null, + "title": "Cluster Capacity", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 32, + "panels": [ + { + "columns": [], + "datasource": "Local", + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 10 + }, + "hideTimeOverride": true, + "id": 22, + "links": [], + "minSpan": 4, + "pageSize": 10, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Host", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "ceph_daemon", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Version", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "short_vers", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "label_replace(ceph_mon_metadata,\"short_vers\",\"$1\",\"ceph_version\",\"ceph version(.*) (.*) (.*) (.*)\")", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "D" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "MONs", + "transform": "table", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 10 + }, + "hideTimeOverride": true, + "id": 24, + "links": [], + "minSpan": 4, + "pageSize": 10, + "scroll": true, + "showHeader": true, + "sort": { + "col": 6, + "desc": false + }, + "styles": [ + { + "alias": "OSD", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "ceph_daemon", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "Version", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "short_vers", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "label_replace(ceph_osd_metadata,\"short_vers\",\"$1\",\"ceph_version\",\"ceph version(.*) (.*) (.*) (.*)\")", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{id}} - {{short_vers}}", + "refId": "A" + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "OSDs", + "transform": "table", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 4, + "x": 8, + "y": 10 + }, + "hideTimeOverride": true, + "id": 26, + "links": [], + "minSpan": 4, + "pageSize": 10, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "RGW Host", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "ceph_daemon", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Version", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "short_vers", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "label_replace(ceph_rgw_metadata,\"short_vers\",\"$1\",\"ceph_version\",\"ceph version(.*) (.*) (.*) (.*)\")", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "RGWs", + "transform": "table", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 4, + "x": 12, + "y": 10 + }, + "hideTimeOverride": true, + "id": 33, + "links": [], + "minSpan": 4, + "pageSize": 10, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "MDS Host", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "ceph_daemon", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Version", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "short_vers", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "label_replace(ceph_mds_metadata,\"short_vers\",\"$1\",\"ceph_version\",\"ceph version(.*) (.*) (.*) (.*)\")", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{id}} - {{short_vers}}", + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "MDSs", + "transform": "table", + "type": "table" + }, + { + "content": "", + "gridPos": { + "h": 7, + "w": 2, + "x": 16, + "y": 10 + }, + "id": 27, + "links": [], + "minSpan": 2, + "mode": "markdown", + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "title": "Ceph Daemon Version Information", + "type": "row" + } + ], + "refresh": false, + "schemaVersion": 16, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Cluster", + "version": 12 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-health.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-health.json new file mode 100644 index 0000000..e50b2bc --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-health.json @@ -0,0 +1,2343 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.4" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Local", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1526964924155, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "overview" + ], + "targetBlank": true, + "title": "Shortcuts", + "type": "dashboards" + } + ], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(1, 167, 1, 1)", + "rgba(255,165,0, 1)", + "rgba(255, 0, 0, 1)" + ], + "datasource": null, + "description": "Shows the overall health of the ceph cluster.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 0 + }, + "height": "70", + "hideTimeOverride": true, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "0", + "text": "HEALTH OK", + "to": "1" + }, + { + "from": "1", + "text": "HEALTH WARNING", + "to": "4" + }, + { + "from": "5", + "text": "HEALTH ERROR", + "to": "99" + }, + { + "from": "-10", + "text": "NODATA", + "to": "0" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "ceph_health_status", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,5", + "timeFrom": "1m", + "timeShift": null, + "title": "", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "HEALTH OK", + "value": "0" + }, + { + "op": "=", + "text": "HEALTH WARN", + "value": "1" + }, + { + "op": "=", + "text": "HEALTH ERROR", + "value": "2" + } + ], + "valueName": "current" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 22, + "panels": [ + { + "aliasColors": { + "Ceph Health": "#0a50a1" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "The chart plots the clusters health, over time. The colored bands show 3 distinct areas; green (OK), yellow(WARN) and red(ERROR). The plot line in blue is this clusters current health, so you can see over time how long the cluster spends in an OK, WARN or ERROR state", + "fill": 0, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 4 + }, + "height": "350", + "hideTimeOverride": true, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "maxDataPoints": "", + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": true, + "targets": [ + { + "expr": "ceph_health_status ", + "format": "time_series", + "instant": false, + "intervalFactor": 2, + "legendFormat": "Ceph Health", + "refId": "A", + "step": 600, + "textEditor": true + } + ], + "thresholds": [ + { + "colorMode": "custom", + "fill": true, + "fillColor": "#9ac48a", + "line": false, + "op": "lt", + "value": 0.1 + }, + { + "colorMode": "custom", + "fill": true, + "fillColor": "rgba(244, 213, 152, 0.58)", + "line": false, + "op": "lt", + "value": 1.1 + }, + { + "colorMode": "custom", + "fill": true, + "fillColor": "rgba(163, 0, 0, 0.3)", + "line": false, + "op": "gt", + "value": 1.1 + } + ], + "timeFrom": "3d", + "timeShift": null, + "title": "Health History - Last 3 days", + "tooltip": { + "shared": false, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "2", + "min": "-0.5", + "show": false + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "title": "Cluster Health History", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 23, + "panels": [ + { + "content": "

MONs

", + "gridPos": { + "h": 5, + "w": 2, + "x": 0, + "y": 5 + }, + "id": 6, + "links": [], + "minSpan": 2, + "mode": "html", + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 3, + "x": 2, + "y": 5 + }, + "id": 28, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "repeat": "mon_servers", + "repeatDirection": "v", + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "label_replace(ceph_mon_quorum_status{ceph_daemon=~\"[[mon_servers]]\"},\"mon_host\",\"$2\",\"ceph_daemon\",\"(.*)_(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "0.5,1", + "title": "$mon_servers", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "OK", + "value": "1" + }, + { + "op": "=", + "text": "DOWN", + "value": "0" + } + ], + "valueName": "current" + }, + { + "content": "

Cluster Flags:

", + "description": "Show cluster flags that determine automatic maintenance and recovery operations", + "gridPos": { + "h": 3, + "w": 2, + "x": 8, + "y": 5 + }, + "id": 31, + "links": [], + "minSpan": 2, + "mode": "html", + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "Scrub activity takes place daily basis across the OSD's and performs object size and attribute checks. Scrub activity can be controlled with the \"ceph osd scrub\" command.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 10, + "y": 5 + }, + "height": "95", + "hideTimeOverride": true, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_noscrub) + scalar(ceph_pg_scrubbing >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "SCRUB", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "Deep scrub acts on the objects within placement groups (PGs). Objects are read, and checksum's compared to protect against silent bit-rot events. Although a weekly activity, in large clusters is normal to see deep-scrub active on a daily basis. Scrub activity can be controlled with the \"ceph osd scrub\" command.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 12, + "y": 5 + }, + "height": "95", + "hideTimeOverride": true, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_nodeep_scrub) + scalar(ceph_pg_deep >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "DEEP", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "The OUT flag setting allows the mon's to mark OSD's as out of the configuration when they stop sending hearbeats to the mon's. By marking them OUT, recovery takes place. However, for planned maintenance you can set the cluster to noout to disable this behavior.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 14, + "y": 5 + }, + "height": "95", + "hideTimeOverride": true, + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_noout)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "timeShift": null, + "title": "OUT", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "The down feature allows OSD's to mark their peers as DOWN when they are not reachable. However, if there is a poor network or planned outages, you may want to set this flag to nodown to prevent OSD's flapping between up/down states", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 16, + "y": 5 + }, + "height": "95", + "hideTimeOverride": true, + "id": 13, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_nodown) + scalar(ceph_pg_down >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "timeShift": null, + "title": "DOWN", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 18, + "y": 5 + }, + "height": "95", + "hideTimeOverride": true, + "id": 34, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_nobackfill) + scalar(ceph_pg_backfilling >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + }, + { + "expr": "", + "format": "time_series", + "intervalFactor": 1, + "refId": "B" + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "BACKFILL", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 20, + "y": 5 + }, + "height": "95", + "hideTimeOverride": true, + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_norebalance)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "REBALANCE", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "description": "With the recovery flag enabled, disruption in the cluster will result in data being recreated to freespace within the cluster to honor the pool's protection schema. Under some circumstances, you may use the norecover flag to prevent automatic recovery.", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 22, + "y": 5 + }, + "height": "95", + "hideTimeOverride": true, + "id": 9, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "2*scalar(ceph_osd_flag_norecover) + scalar(ceph_pg_recovering >bool 0)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "1,2", + "timeFrom": "1m", + "title": "RECOVERY", + "type": "singlestat", + "valueFontSize": "40%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "ENABLED", + "value": "0" + }, + { + "op": "=", + "text": "ACTIVE", + "value": "1" + }, + { + "op": "=", + "text": "DISABLED", + "value": "2" + }, + { + "op": "=", + "text": "DISABLED", + "value": "3" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 3, + "x": 2, + "y": 7 + }, + "id": 69, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "repeat": null, + "repeatDirection": "v", + "repeatIteration": 1526963039202, + "repeatPanelId": 28, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "label_replace(ceph_mon_quorum_status{ceph_daemon=~\"[[mon_servers]]\"},\"mon_host\",\"$2\",\"ceph_daemon\",\"(.*)_(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "0.5,1", + "title": "$mon_servers", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "OK", + "value": "1" + }, + { + "op": "=", + "text": "DOWN", + "value": "0" + } + ], + "valueName": "current" + }, + { + "content": "

OSD
Hosts
Down

", + "description": "Shows which OSD hosts have all defined osd daemons in a down state", + "gridPos": { + "h": 4, + "w": 2, + "x": 8, + "y": 8 + }, + "id": 54, + "links": [], + "minSpan": 2, + "mode": "html", + "title": "", + "transparent": true, + "type": "text" + }, + { + "columns": [], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 4, + "w": 10, + "x": 10, + "y": 8 + }, + "id": 58, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 2, + "desc": false + }, + "styles": [ + { + "alias": "Hostname", + "colorMode": "row", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "thresholds": [ + "" + ], + "type": "string", + "unit": "short" + }, + { + "alias": "OSDs Active", + "colorMode": "row", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Value", + "thresholds": [ + "1" + ], + "type": "hidden", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "(count by(instance) (ceph_disk_occupation * on(ceph_daemon) group_right(instance) ceph_osd_up == 0) - count by(instance) (ceph_disk_occupation))", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "", + "transform": "table", + "transparent": false, + "type": "table" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 3, + "x": 2, + "y": 9 + }, + "id": 70, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "repeat": null, + "repeatDirection": "v", + "repeatIteration": 1526963039202, + "repeatPanelId": 28, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "label_replace(ceph_mon_quorum_status{ceph_daemon=~\"[[mon_servers]]\"},\"mon_host\",\"$2\",\"ceph_daemon\",\"(.*)_(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "0.5,1", + "title": "$mon_servers", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + }, + { + "op": "=", + "text": "OK", + "value": "1" + }, + { + "op": "=", + "text": "DOWN", + "value": "0" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "title": "Cluster State", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 64, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "filterNull": false, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 5, + "x": 0, + "y": 6 + }, + "id": 18, + "links": [], + "minSpan": 6, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Object State", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Count", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "alias": "Objects", + "expr": "ceph_cluster_total_objects", + "format": "time_series", + "groupBy": [ + { + "params": [ + "$__interval" + ], + "type": "time" + }, + { + "params": [ + "null" + ], + "type": "fill" + } + ], + "intervalFactor": 1, + "legendFormat": "Total Objects", + "policy": "default", + "refId": "A", + "resultFormat": "time_series", + "select": [ + [ + { + "params": [ + "value" + ], + "type": "field" + }, + { + "params": [], + "type": "mean" + } + ] + ], + "step": 20, + "tags": [], + "textEditor": true + }, + { + "expr": "", + "format": "table", + "intervalFactor": 1, + "legendFormat": "Objects misplaced", + "refId": "B" + }, + { + "expr": "", + "format": "table", + "intervalFactor": 1, + "legendFormat": "Objects degraded", + "refId": "C" + }, + { + "expr": "", + "format": "table", + "intervalFactor": 1, + "legendFormat": "Objects unfound", + "refId": "D" + } + ], + "title": "Object Summary", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Avg", + "value": "avg" + } + ], + "datasource": null, + "filterNull": false, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 5, + "x": 5, + "y": 6 + }, + "id": 20, + "links": [], + "minSpan": 6, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "PG State", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Count", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "expr": "ceph_pg_total", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "PGs", + "refId": "A", + "step": 20 + }, + { + "expr": "ceph_pg_active", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "PGs Active", + "refId": "B", + "step": 20 + }, + { + "expr": "ceph_pg_clean", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "PGs Active+clean", + "refId": "C", + "step": 20 + }, + { + "expr": "ceph_pg_peering", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "PGs Peering", + "refId": "D", + "step": 20 + }, + { + "expr": "ceph_pg_unknown", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "PG's Unknown", + "refId": "E" + } + ], + "title": "PG Summary", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "title": "RADOS Information", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 24, + "panels": [ + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 3, + "x": 0, + "y": 14 + }, + "id": 14, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "% Full", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "hidden", + "unit": "none" + }, + { + "alias": "Host.OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ceph_osd_stat_bytes_used / ceph_osd_stat_bytes * 100 > $disk_full_threshold", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ceph_daemon}}", + "refId": "A", + "step": 60, + "textEditor": true + } + ], + "title": "Disks Near Full", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 6, + "x": 3, + "y": 14 + }, + "hideTimeOverride": true, + "id": 15, + "links": [], + "minSpan": 2, + "pageSize": 100, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": false + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "_id", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "Hostname", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "osd_host", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Device", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "device", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "label_replace(ceph_osd_up,\"_id\",\"$1\",\"ceph_daemon\",\"osd.(.*)\") * on(ceph_daemon) \n group_left(osd_host,device) label_replace(ceph_disk_occupation,\"osd_host\",\"$1\",\"instance\",\"(.+?)\\\\.(.*)\") < 0.5", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 2, + "textEditor": true + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "OSD's Down", + "transform": "table", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 6, + "x": 9, + "y": 14 + }, + "hideTimeOverride": true, + "id": 16, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": false + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "_id", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Hostname", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "osd_host", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Device", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "device", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "label_replace(ceph_osd_in,\"_id\",\"$1\",\"ceph_daemon\",\"osd.(.*)\") * on(ceph_daemon) \n group_left(osd_host,device) label_replace(ceph_disk_occupation,\"osd_host\",\"$1\",\"instance\",\"(.+?)\\\\.(.*)\") < 0.5", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "refId": "B" + } + ], + "timeFrom": "5m", + "timeShift": null, + "title": "OSDs Out", + "transform": "table", + "type": "table" + }, + { + "columns": [], + "datasource": null, + "description": "This table shows all OSDs with > 275 PG's", + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 9, + "x": 15, + "y": 14 + }, + "id": 68, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 5, + "desc": true + }, + "styles": [ + { + "alias": "OSD", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "ceph_daemon", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Device", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "device", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Host", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "# PGs", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Value", + "thresholds": [ + "200", + "250" + ], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "(ceph_osd_numpg > 275) * on(ceph_daemon) group_left(instance,device) ceph_disk_occupation", + "format": "table", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "OSDs with High PG Counts", + "transform": "table", + "type": "table" + }, + { + "content": "", + "gridPos": { + "h": 7, + "w": 2, + "x": 6, + "y": 21 + }, + "id": 17, + "links": [], + "minSpan": 2, + "mode": "html", + "title": "", + "transparent": true, + "type": "text" + }, + { + "content": "", + "gridPos": { + "h": 7, + "w": 2, + "x": 16, + "y": 21 + }, + "id": 19, + "links": [], + "minSpan": 2, + "mode": "markdown", + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "title": "Storage Information", + "type": "row" + } + ], + "refresh": "10s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "osd_servers", + "options": [], + "query": "ceph_disk_occupation", + "refresh": 1, + "regex": "/instance=\"([^\"]*)\"/", + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "85", + "value": "85" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "disk_full_threshold", + "options": [ + { + "selected": true, + "text": "85", + "value": "85" + } + ], + "query": "85", + "type": "custom" + }, + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "mon_servers", + "options": [], + "query": "label_values(ceph_mon_quorum_status, ceph_daemon)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Health", + "version": 43 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-osd-information.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-osd-information.json new file mode 100644 index 0000000..b0cb210 --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-osd-information.json @@ -0,0 +1,2021 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "grafana-piechart-panel", + "name": "Pie Chart", + "version": "1.3.3" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Local", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1531263612973, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 24, + "panels": [], + "repeat": null, + "title": "OSD Summary", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 0, + "y": 1 + }, + "hideTimeOverride": true, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(max by (ceph_daemon) (ceph_osd_metadata))", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 2, + "y": 1 + }, + "hideTimeOverride": true, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_osd_up > 0.5)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "OSDs UP", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(251,251,251,0.97)", + "rgba(255,165,0, 1)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 2, + "x": 4, + "y": 1 + }, + "hideTimeOverride": true, + "id": 3, + "interval": null, + "links": [ + { + "dashUri": "db/ceph-health", + "dashboard": "Ceph Health", + "targetBlank": true, + "title": "Ceph Health", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_osd_up < 0.5)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 2 + } + ], + "thresholds": "1,3", + "timeFrom": "1m", + "title": "OSDs DOWN", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "0", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 1 + }, + "hideTimeOverride": true, + "id": 5, + "links": [], + "maxDataPoints": "", + "minSpan": 4, + "pageSize": 50, + "scroll": true, + "showHeader": true, + "sort": { + "col": 4, + "desc": false + }, + "styles": [ + { + "alias": "Hostname", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "OSD", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "ceph_osd", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "device", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "bytes" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sum by (instance, ceph_daemon) (\n ceph_disk_occupation*0 + on (ceph_daemon) group_right(instance,device,osd_id) ceph_osd_stat_bytes\n)", + "format": "time_series", + "hide": true, + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 2, + "textEditor": true + }, + { + "expr": "label_replace(ceph_disk_occupation,\"ceph_osd\",\"$1\",\"ceph_daemon\",\"osd.(.*)\") * on(ceph_osd) group_right(instance,device) \nlabel_replace(ceph_osd_stat_bytes{ceph_daemon=~\"osd.[[osd_id]]\"},\"ceph_osd\",\"$1\",\"ceph_daemon\",\"osd.(.*)\")", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "refId": "B" + } + ], + "timeFrom": "1m", + "timeShift": null, + "title": "OSD Size", + "transform": "table", + "type": "table" + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": null, + "fontSize": "80%", + "format": "none", + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 4, + "interval": null, + "legend": { + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "count by(device_class) (ceph_osd_metadata)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device_class}}", + "refId": "A" + } + ], + "title": "OSD Types Summary", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": "0.05" + }, + "datasource": null, + "description": "The pie chart shows the various OSD sizes used within the cluster", + "fontSize": "80%", + "format": "none", + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 1 + }, + "height": "220", + "hideTimeOverride": true, + "id": 27, + "interval": null, + "legend": { + "header": "", + "percentage": false, + "show": true, + "sideWidth": null, + "sortDesc": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": "", + "minSpan": 6, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": "1", + "targets": [ + { + "expr": "count(ceph_osd_stat_bytes < 1099511627776)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<1 TB", + "refId": "A", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 1099511627776 < 2199023255552)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<2 TB", + "refId": "B", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 2199023255552 < 3298534883328)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<3TB", + "refId": "C", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 3298534883328 < 4398046511104)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<4TB", + "refId": "D", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 4398046511104 < 6597069766656)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<6TB", + "refId": "E", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 6597069766656 < 8796093022208)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<8TB", + "refId": "F", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 8796093022208 < 10995116277760)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<10TB", + "refId": "G", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 10995116277760 < 13194139533312)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<12TB", + "refId": "H", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 13194139533312)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "12TB+", + "refId": "I", + "step": 2 + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "OSD Size Summary", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + "Non-Encrypted": "#E5AC0E" + }, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": null, + "fontSize": "80%", + "format": "none", + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 1 + }, + "height": "200px", + "hideTimeOverride": true, + "id": 7, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": "1", + "minSpan": 4, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "count(ceph_bluefs_wal_total_bytes)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "bluestore", + "refId": "A", + "step": 240 + }, + { + "expr": "count(ceph_osd_metadata) - count(ceph_bluefs_wal_total_bytes)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "filestore", + "refId": "B", + "step": 240 + }, + { + "expr": "absent(ceph_bluefs_wal_total_bytes)*count(ceph_osd_metadata)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "filestore", + "refId": "C", + "step": 240 + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "OSD Objectstore Types", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 25, + "panels": [ + { + "content": "

Ceph Filestore I/O Process

\n

\nA write request is first committed to a journal using direct-io (apply). Once this write is complete, the data is persisted to HDD by a second 'buffered' write operation (commit). The commit operation is basically a measure of time taken to perform a syncfs call to flush dirty pages to disk, and is therefore not a time associated with any specific client initiated operation.

The tables on the right show commit and apply latencies for all OSDs, or use the pull down above to focus on a specific OSD.\n", + "gridPos": { + "h": 5, + "w": 10, + "x": 0, + "y": 8 + }, + "height": "300", + "id": 8, + "links": [], + "minSpan": 6, + "mode": "html", + "title": "", + "transparent": true, + "type": "text" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "Local", + "description": "Filestore OSDs", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 2, + "x": 10, + "y": 8 + }, + "height": "310", + "hideTimeOverride": true, + "id": 9, + "links": [], + "minSpan": 2, + "pageSize": 0, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "label_replace(\n ceph_disk_occupation{ceph_daemon=~\"osd.($filestore_osd_id)\"},\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{osd_num}}", + "refId": "A", + "step": 2, + "textEditor": true + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [], + "datasource": "Local", + "description": "Apply latency covers the time taken to commit to the journal and complete the transaction", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 5, + "x": 12, + "y": 8 + }, + "height": "310", + "hideTimeOverride": true, + "id": 12, + "links": [], + "minSpan": 4, + "pageSize": 0, + "scroll": true, + "showHeader": true, + "sort": { + "col": 6, + "desc": true + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "osd_num", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Apply Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [ + "50", + "150" + ], + "type": "number", + "unit": "ms" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "label_replace(\n topk($max_devices,\n ceph_osd_apply_latency_ms{ceph_daemon=~\"osd.[[osd_id]]\"} + on (ceph_daemon) (ceph_filestore_journal_latency_count * 0)\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{ceph_daemon}}", + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "table", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": "Local", + "description": "Commit latency is the time taken for writes to be flushed to disk as part of async kernel activity", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 5, + "x": 17, + "y": 8 + }, + "height": "310", + "hideTimeOverride": true, + "id": 13, + "links": [], + "minSpan": 4, + "pageSize": 0, + "scroll": false, + "showHeader": true, + "sort": { + "col": 6, + "desc": true + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "osd_num", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Commit Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [ + "50", + "150" + ], + "type": "number", + "unit": "ms" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "label_replace(\n topk($max_devices,\n ceph_osd_commit_latency_ms{ceph_daemon=~\"osd.[[osd_id]]\"} + on (ceph_daemon) (ceph_filestore_journal_latency_count * 0)\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n )", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{ceph_daemon}}", + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "table", + "type": "table" + }, + { + "aliasColors": { + "95%ile Commit Latency": "#447EBC", + "Apply Latency Max": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "Shows the latency for a given OSD, allowing you to compare a specific OSD against the $percentile%ile graph. Note that when the \"OSD Id\" pull-down shows **ALL**, the graph will be empty to avoid the chart being unreadable.", + "fill": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "height": "300px", + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": "", + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Apply Latency Max", + "fill": 0 + }, + { + "alias": "95%ile Apply Latency", + "fill": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(\n ceph_osd_commit_latency_ms{ceph_daemon=~\"osd.([[osd_id]])\"} + on (ceph_daemon) (ceph_filestore_journal_latency_count * 0)\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "commit", + "refId": "B", + "textEditor": true + }, + { + "expr": "avg(\n ceph_osd_apply_latency_ms{ceph_daemon=~\"osd.([[osd_id]])\"} + on (ceph_daemon) (ceph_filestore_journal_latency_count * 0)\n)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "apply", + "refId": "A", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filestore Latency for OSD '$osd_id'", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "height": "300px", + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile(\n $percentile/100,\n ceph_osd_commit_latency_ms + on (ceph_daemon) (ceph_filestore_journal_latency_count * 0)\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "commit", + "refId": "B", + "textEditor": true + }, + { + "expr": "quantile(\n $percentile/100,\n ceph_osd_apply_latency_ms + on (ceph_daemon) (ceph_filestore_journal_latency_count * 0)\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "apply", + "refId": "A", + "textEditor": true + }, + { + "expr": "quantile(\n $percentile/100,\n rate(ceph_filestore_journal_latency_sum[$__interval]) / \n (rate(ceph_filestore_journal_latency_count[$__interval]) != 0)\n)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "journal", + "refId": "C", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filestore IO Summary - all OSD's @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "title": "Filestore OSD Latencies", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 26, + "panels": [ + { + "content": "

Ceph Bluestore I/O Process

\n

\nUnlike filestore, bluestore does not suffer from a double-write penalty (i.e write to journal then write to HDD). With bluestore, once a write is scheduled (submit and throttle latencies), it is done directly to the disk (AIO wait), and then the metadata relating to the object is changed (kv_latency). Writes are not considered complete until the kv store is updated.

The tables on the right focus on the top 10 Bluestore OSDs with the highest latencies.\n", + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 9 + }, + "height": "300", + "id": 16, + "links": [], + "minSpan": 6, + "mode": "html", + "title": "", + "type": "text" + }, + { + "columns": [], + "datasource": "Local", + "description": "Bluestore OSDs", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 2, + "x": 6, + "y": 9 + }, + "height": "310", + "hideTimeOverride": true, + "id": 17, + "links": [], + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "label_replace(\n ceph_disk_occupation{ceph_daemon=~\"osd.($bluestore_osd_id)\"},\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "legendFormat": "{{osd_num}}", + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "timeShift": null, + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [], + "datasource": "Local", + "description": "Time spent preparing the request (transaction)", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 4, + "x": 8, + "y": 9 + }, + "height": "310", + "hideTimeOverride": true, + "id": 18, + "links": [], + "minSpan": 4, + "pageSize": 0, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "osd_num", + "thresholds": [], + "type": "string", + "unit": "s" + }, + { + "alias": "Submit Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [ + ".001", + ".003" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "label_replace(\n (\n topk($max_devices,\n irate(ceph_bluestore_submit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_submit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "table", + "type": "table" + }, + { + "columns": [], + "datasource": "Local", + "description": "Time requests wait due to throttling or busy conditions", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 4, + "x": 12, + "y": 9 + }, + "height": "310", + "hideTimeOverride": true, + "id": 19, + "links": [], + "minSpan": 4, + "pageSize": 0, + "scroll": true, + "showHeader": true, + "sort": { + "col": 4, + "desc": false + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "osd_num", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Throttle Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [ + ".001", + ".003" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "label_replace(\n (\n topk($max_devices,\n irate(ceph_bluestore_throttle_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_throttle_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "table", + "type": "table" + }, + { + "columns": [], + "datasource": "Local", + "description": "Time spent waiting for the physical I/O request to complete", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 4, + "x": 16, + "y": 9 + }, + "height": "310", + "hideTimeOverride": true, + "id": 20, + "links": [], + "minSpan": 4, + "pageSize": 0, + "scroll": true, + "showHeader": true, + "sort": { + "col": 5, + "desc": false + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "osd_num", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "AIO Wait Time", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [ + ".020", + ".050" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "label_replace(\n (\n topk($max_devices,\n irate(ceph_bluestore_state_aio_wait_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_state_aio_wait_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "table", + "type": "table" + }, + { + "columns": [], + "datasource": "Local", + "description": "Time spent waiting for rocksdb (metadata store) to commit meta data", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 9 + }, + "height": "310", + "hideTimeOverride": true, + "id": 21, + "links": [], + "minSpan": 4, + "pageSize": 0, + "scroll": true, + "showHeader": true, + "sort": { + "col": null, + "desc": false + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "osd_num", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "KV Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [ + ".020", + ".050" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "label_replace(\n (\n topk($max_devices,\n irate(ceph_bluestore_kv_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_kv_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)", + "format": "table", + "instant": true, + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "timeFrom": "2m", + "title": "", + "transform": "table", + "type": "table" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "height": "300", + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(\n irate(ceph_bluestore_submit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_submit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Submit", + "refId": "A", + "textEditor": true + }, + { + "expr": "avg(\n irate(ceph_bluestore_throttle_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_throttle_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Throttle", + "refId": "B", + "textEditor": true + }, + { + "expr": "avg(\n irate(ceph_bluestore_state_aio_wait_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_state_aio_wait_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "AIO Wait", + "refId": "C", + "textEditor": true + }, + { + "expr": "avg(\n irate(ceph_bluestore_kv_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_kv_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "KV Latency", + "refId": "D", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Bluestore Latency for OSD '$osd_id'", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "description": "This charts shows the $percentile%ile latencies across all OSDs, which indicates overall performance, but does not represent any specific OSD", + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 17 + }, + "height": "300px", + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile(\n $percentile/100,\n irate(ceph_bluestore_submit_lat_sum[$__interval]) / \n (irate(ceph_bluestore_submit_lat_count[$__interval]) != 0)\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Submit", + "refId": "A", + "textEditor": true + }, + { + "expr": "quantile(\n $percentile/100,\n irate(ceph_bluestore_throttle_lat_sum[$__interval]) / \n (irate(ceph_bluestore_throttle_lat_count[$__interval]) != 0)\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Throttle", + "refId": "B", + "textEditor": true + }, + { + "expr": "quantile(\n $percentile/100,\n irate(ceph_bluestore_state_aio_wait_lat_sum[$__interval]) / \n (irate(ceph_bluestore_state_aio_wait_lat_count[$__interval]) != 0)\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "AIO Wait", + "refId": "C", + "textEditor": true + }, + { + "expr": "quantile(\n $percentile/100,\n irate(ceph_bluestore_kv_lat_sum[$__interval]) / \n (irate(ceph_bluestore_kv_lat_count[$__interval]) != 0)\n)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "KV Latency", + "refId": "D", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "BlueStore IO Summary - all OSD's @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "title": "Bluestore OSD Latencies", + "type": "row" + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "percentile", + "options": [ + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "96", + "value": "96" + }, + { + "selected": false, + "text": "97", + "value": "97" + }, + { + "selected": false, + "text": "98", + "value": "98" + }, + { + "selected": false, + "text": "99", + "value": "99" + } + ], + "query": "95,96,97,98,99", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "10", + "value": "10" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "max_devices", + "options": [ + { + "selected": true, + "text": "10", + "value": "10" + } + ], + "query": "10", + "type": "custom" + }, + { + "allValue": ".*", + "current": {}, + "datasource": null, + "hide": 0, + "includeAll": true, + "label": "OSD Id", + "multi": false, + "name": "osd_id", + "options": [], + "query": "label_values(ceph_osd_metadata, ceph_daemon)", + "refresh": 1, + "regex": "/osd\\.(.*)/", + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": "", + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "bluestore_osd_id", + "options": [], + "query": "label_values(ceph_bluefs_bytes_written_wal, ceph_daemon)", + "refresh": 2, + "regex": "/osd\\.(.*)/", + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": "", + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "filestore_osd_id", + "options": [], + "query": "label_values(ceph_filestore_journal_latency_count, ceph_daemon)", + "refresh": 2, + "regex": "/osd\\.(.*)/", + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph OSD Information", + "version": 31 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-pools.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-pools.json new file mode 100644 index 0000000..4f3473b --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-pools.json @@ -0,0 +1,932 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Local", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1526263024209, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 14, + "panels": [], + "repeat": null, + "title": "Pool Overview", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 5, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace((rate(ceph_pool_rd{pool_id=~\"[[pool_id]]\"}[1m]) + rate(ceph_pool_wr{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) ", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "F" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client IOPS by Pool", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 5, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace((rate(ceph_pool_rd_bytes{pool_id=~\"[[pool_id]]\"}[1m]) + rate(ceph_pool_wr_bytes{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) ", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Throughput by Pool", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 15, + "panels": [], + "repeat": null, + "title": "Top 5's", + "type": "row" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 9 + }, + "id": 3, + "links": [], + "minSpan": 12, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 6, + "desc": true + }, + "styles": [ + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Time", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "id", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "job", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Pool Name", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "name", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Pool ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "pool_id", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "IOPS (R+W)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "none" + } + ], + "targets": [ + { + "expr": "topk(5,(label_replace((irate(ceph_pool_rd{pool_id=~\"[[pool_id]]\"}[1m]) + irate(ceph_pool_wr{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) )", + "format": "table", + "instant": true, + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "title": "Top 5 Pools by Client IOPS", + "transform": "table", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 9 + }, + "id": 4, + "links": [], + "minSpan": 12, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 6, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "id", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "job", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Pool Name", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "name", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Pool ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "pool_id", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Throughput", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "decbytes" + } + ], + "targets": [ + { + "expr": "(label_replace((irate(ceph_pool_rd_bytes{pool_id=~\"[[pool_id]]\"}[1m]) + irate(ceph_pool_wr_bytes{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) ", + "format": "table", + "instant": true, + "intervalFactor": 2, + "refId": "A", + "textEditor": true + } + ], + "title": "Top 5 Pools by Throughput", + "transform": "table", + "type": "table" + }, + { + "columns": [], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 9 + }, + "id": 5, + "links": [], + "minSpan": 8, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 5, + "desc": true + }, + "styles": [ + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Time", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "instance", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "job", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Pool Name", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "name", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Pool ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "pool_id", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Capacity Used", + "colorMode": "value", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [ + "70", + "85" + ], + "type": "number", + "unit": "percentunit" + } + ], + "targets": [ + { + "expr": "topk(5,((ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail)) * on(pool_id) group_left(name) ceph_pool_metadata))", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "D" + } + ], + "title": "Top 5 Pools By Capacity Used", + "transform": "table", + "type": "table" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 16, + "panels": [ + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace(irate(ceph_pool_rd[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "reads", + "refId": "B" + }, + { + "expr": "(label_replace(irate(ceph_pool_wr[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "writes", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 17 + }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(label_replace(irate(ceph_pool_rd_bytes[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "reads", + "refId": "A" + }, + { + "expr": "(label_replace(irate(ceph_pool_wr_bytes[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"[[pool_name]]\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "writes", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Pool '$pool_name' Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": "pool_name", + "title": "Pool '$pool_name' Performance Details", + "type": "row" + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "pool_id", + "options": [], + "query": "label_values(ceph_pool_metadata,pool_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": "Pool Name", + "multi": false, + "name": "pool_name", + "options": [], + "query": "label_values(ceph_pool_metadata,name)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph Pools", + "version": 16 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-rgw-workload.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-rgw-workload.json new file mode 100644 index 0000000..8585fc5 --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/ceph-rgw-workload.json @@ -0,0 +1,1180 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Local", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1530165442642, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [], + "title": "RGW Overview - All Gateways", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GET AVG", + "refId": "A" + }, + { + "expr": "avg(rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUT AVG", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average GET/PUT Latencies", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 7, + "x": 8, + "y": 1 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(rgw_host) (label_replace(rate(ceph_rgw_req[30s]), \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total Requests/sec by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts", + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 15, + "y": 1 + }, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(rate(ceph_rgw_get_initial_lat_sum[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_get_initial_lat_count[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "GET Latencies by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": false + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 3, + "x": 21, + "y": 1 + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_rgw_qlen)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Request Queue Length", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Total bytes transferred in/out of all radosgw instances within the cluster", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(ceph_rgw_get_b[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs", + "refId": "A" + }, + { + "expr": "sum(rate(ceph_rgw_put_b[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Bandwidth Consumed by Type", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Total bytes transferred in/out through get/put operations, by radosgw instance", + "fill": 1, + "gridPos": { + "h": 6, + "w": 7, + "x": 8, + "y": 8 + }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(rgw_host) (\n (label_replace(rate(ceph_rgw_get_b[30s]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")) + \n (label_replace(rate(ceph_rgw_put_b[30s]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\"))\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Bandwidth by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts", + "fill": 1, + "gridPos": { + "h": 6, + "w": 6, + "x": 15, + "y": 8 + }, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(rate(ceph_rgw_put_initial_lat_sum[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_put_initial_lat_count[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "PUT Latencies by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": false + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Failed HTTP Requests by RGW instance", + "fill": 1, + "gridPos": { + "h": 6, + "w": 3, + "x": 21, + "y": 8 + }, + "id": 41, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(ceph_rgw_failed_req, \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Failed Requests", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": false + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 12, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 15 + }, + "id": 34, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "rgw_servers": { + "selected": false, + "text": "rgw.rhs-srv-01", + "value": "rgw.rhs-srv-01" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rgw_get_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_get_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GET", + "refId": "A" + }, + { + "expr": "rate(ceph_rgw_put_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_put_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUT", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$rgw_servers GET/PUT Latencies", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 8, + "w": 7, + "x": 6, + "y": 15 + }, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "rgw_servers": { + "selected": false, + "text": "rgw.rhs-srv-01", + "value": "rgw.rhs-srv-01" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rgw_get_b{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs", + "refId": "B" + }, + { + "expr": "rate(ceph_rgw_put_b{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Bandwidth by HTTP Operation", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "GETs": "#7eb26d", + "Other": "#447ebc", + "PUTs": "#eab839", + "Requests": "#3f2b5b", + "Requests Failed": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 8, + "w": 7, + "x": 13, + "y": 15 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "rgw_servers": { + "selected": false, + "text": "rgw.rhs-srv-01", + "value": "rgw.rhs-srv-01" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rgw_failed_req{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Requests Failed", + "refId": "B" + }, + { + "expr": "rate(ceph_rgw_get{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs", + "refId": "C" + }, + { + "expr": "rate(ceph_rgw_put{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs", + "refId": "D" + }, + { + "expr": "rate(ceph_rgw_req{ceph_daemon=~\"[[rgw_servers]]\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"[[rgw_servers]]\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"[[rgw_servers]]\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Other", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "HTTP Request Breakdown", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Failures": "#bf1b00", + "GETs": "#7eb26d", + "Other (HEAD,POST,DELETE)": "#447ebc", + "PUTs": "#eab839" + }, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": null, + "fontSize": "80%", + "format": "none", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 15 + }, + "id": 23, + "interval": null, + "legend": { + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "scopedVars": { + "rgw_servers": { + "selected": false, + "text": "rgw.rhs-srv-01", + "value": "rgw.rhs-srv-01" + } + }, + "strokeWidth": 1, + "targets": [ + { + "expr": "rate(ceph_rgw_failed_req{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Failures", + "refId": "A" + }, + { + "expr": "rate(ceph_rgw_get{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs", + "refId": "B" + }, + { + "expr": "rate(ceph_rgw_put{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs", + "refId": "C" + }, + { + "expr": "rate(ceph_rgw_req{ceph_daemon=~\"[[rgw_servers]]\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"[[rgw_servers]]\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"[[rgw_servers]]\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Other (DELETE,LIST)", + "refId": "D" + } + ], + "title": "Workload Breakdown", + "type": "grafana-piechart-panel", + "valueName": "current" + } + ], + "repeat": "rgw_servers", + "title": "RGW Host Detail : $rgw_servers", + "type": "row" + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "rgw_servers", + "options": [], + "query": "label_values(ceph_rgw_req, ceph_daemon)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Ceph RGW Workload", + "version": 26 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/cephmetrics-host.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/cephmetrics-host.json new file mode 100644 index 0000000..c937e29 --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/cephmetrics-host.json @@ -0,0 +1,1000 @@ +{ + "__inputs": [ + { + "name": "DS_LOCAL_PROMETHEUS", + "label": "local_prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 9, + "panels": [], + "repeat": null, + "title": "CPU & RAM", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 8, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 - (avg by (instance) (irate(node_cpu{job=\"cephmetrics\",mode=\"idle\"}[5m])) * 100)", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "{{Busy %}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cephmetrics Host CPU Busy %", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 8, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "(avg by(mode) (irate(node_cpu{job=\"cephmetrics\",mode!=\"idle\"}[30s]))*100)", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{mode}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cephmetrics Host CPU Usage Breakdown", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "Installed": "#890f02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 7, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 8, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Installed", + "fill": 0, + "linewidth": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemTotal{job=\"cephmetrics\"} - node_memory_MemAvailable{job=\"cephmetrics\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Used", + "refId": "C" + }, + { + "expr": "node_memory_MemTotal{job=\"cephmetrics\"} ", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Installed", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": "7d", + "timeShift": null, + "title": "RAM Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 10, + "panels": [], + "repeat": null, + "title": "Disk", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 9 + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 8, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_io_time_ms{job='cephmetrics',device=~\"[h,s,v]d[a-z]\"}[1m]) / 10", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "%UTIL", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 9 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 8, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_reads_completed{job=\"cephmetrics\",device=~\"[h,s,v]d[a-z]\"}[30s]) + irate(node_disk_writes_completed{job=~\"cephmetrics\",device=~\"[h,s,v]d[a-z]\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 9 + }, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 8, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_bytes_read{job='cephmetrics',device=~\"[h,s,v]d[a-z]\"}[1m]) + irate(node_disk_bytes_written{job='cephmetrics',device=~\"[h,s,v]d[a-z]\"}[1m]) ", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Throughput", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 16 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 8, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(irate(node_disk_write_time_ms{job='cephmetrics',device=~\"[h,s,v]d[a-z]\"}[1m]) + irate(node_disk_read_time_ms{job='cephmetrics',device=~\"[h,s,v]d[a-z]\"}[1m])) / \n(irate(node_disk_reads_completed{job='cephmetrics',device=~\"[h,s,v]d[a-z]\"}[1m]) + irate(node_disk_writes_completed{job='cephmetrics',device=~\"[h,s,v]d[a-z]\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Size": "#890f02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 16, + "x": 8, + "y": 16 + }, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Size", + "fill": 0, + "linewidth": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_size{mountpoint='/var/lib/cephmetrics/data',fstype='xfs'} - node_filesystem_free{mountpoint='/var/lib/cephmetrics/data',fstype='xfs'}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used", + "refId": "A" + }, + { + "expr": "node_filesystem_size{mountpoint='/var/lib/cephmetrics/data',fstype='xfs'}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Size", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filesystem Usage - Prometheus Growth", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 11, + "panels": [], + "repeat": null, + "title": "Network", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n irate(node_network_receive_bytes{job='cephmetrics'}[1m]) \n ) by(instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Network RX", + "refId": "A" + }, + { + "expr": "sum(\n irate(node_network_transmit_bytes{job='cephmetrics'}[1m]) \n ) by(instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Network TX", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Size": "#890f02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows data growth on '/' which includes the prometheus data. Prometheus installed 04/14, migrated to separate logical volume 04/22", + "fill": 1, + "gridPos": { + "h": 7, + "w": 16, + "x": 0, + "y": 31 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Size", + "fill": 0, + "linewidth": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_size{mountpoint='/',fstype='xfs'} - node_filesystem_free{mountpoint='/',fstype='xfs'}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used", + "refId": "A" + }, + { + "expr": "node_filesystem_size{mountpoint='/',fstype='xfs'}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Size", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": "14d", + "timeShift": null, + "title": "Filesystem Usage - '/'", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Cephmetrics Host Metrics", + "version": 13 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/disk-busy-by-server.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/disk-busy-by-server.json new file mode 100644 index 0000000..bc79b8e --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/disk-busy-by-server.json @@ -0,0 +1,1520 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "links": [], + "refresh": "15s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "height": "400", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\"}[30s]) / 10),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{host}}", + "refId": "A", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Hosts Disk Utilization Peak", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 2, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "repeatIteration": 1519090074308, + "repeatPanelId": 4, + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "all disks busy @ $percentile%ile", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Utilization @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Dashboard Row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "osd_servers", + "options": [], + "query": "label_values(ceph_disk_occupation, instance)", + "refresh": 1, + "regex": "([^.]*).*", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": "Percentile", + "multi": false, + "name": "percentile", + "options": [ + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "96", + "value": "96" + }, + { + "selected": false, + "text": "97", + "value": "97" + }, + { + "selected": false, + "text": "98", + "value": "98" + }, + { + "selected": false, + "text": "99", + "value": "99" + } + ], + "query": "95,96,97,98,99", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Disk Busy By Server" +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/iops-by-server.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/iops-by-server.json new file mode 100644 index 0000000..b4da67b --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/iops-by-server.json @@ -0,0 +1,295 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.6.3" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Local", + "version": "1.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": "15s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 0 + }, + "height": "400", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "{{host}}", + "refId": "B" + }, + { + "expr": "sum(\n sum(\n irate(node_disk_reads_completed{job=\"node\" }[1m]) + \n irate(node_disk_writes_completed{job=\"node\"}[1m]))\n by(instance, device) + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation)\n by(instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk IOPS Across All OSD Hosts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "All Servers by IOPS", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 250, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n sum(\n irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[1m]) + irate(node_disk_writes_completed[1m]))\n by(instance,device) +\n ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation)\n \n \n", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total IOPS for $osd_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "IOPS Load by Server", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": "", + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": "OSD Host", + "multi": false, + "name": "osd_servers", + "options": [], + "query": "label_values(ceph_disk_occupation, instance)", + "refresh": 1, + "regex": "([^.]*).*", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "IOPS by Server", + "version": 6 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/iscsi-client-details.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/iscsi-client-details.json new file mode 100644 index 0000000..f2bd608 --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/iscsi-client-details.json @@ -0,0 +1,447 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Local", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1526265032109, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 28, + "panels": [], + "repeat": null, + "title": "Client Details for $client_iqn", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum by(lun_name) (irate(ceph_iscsi_lun_iops{client_iqn=~\"[[client_iqn]]\"}[30s])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{lun_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS Detail for $client_iqn", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum by(lun_name) (irate(ceph_iscsi_lun_read_bytes{client_iqn=~\"[[client_iqn]]\"}[30s])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{lun_name}}(r)", + "refId": "A" + }, + { + "expr": "round(sum by(lun_name) (irate(ceph_iscsi_lun_write_bytes{client_iqn=~\"[[client_iqn]]\"}[30s])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{lun_name}}(w)", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput for $client_iqn", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "columns": [], + "datasource": null, + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 24, + "links": [], + "minSpan": 12, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 5, + "desc": true + }, + "styles": [ + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Time", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Gateway Owner", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "gw_owner", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "LUN Name (pool.image)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "a_lun_name", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Size", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "bytes" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "max(label_replace(ceph_iscsi_client_lun{client_iqn=~\"[[client_iqn]]\"},\"a_lun_name\",\"$1\", \"lun_name\",\"(.*)\")) \n by(a_lun_name,client_iqn) +\non(a_lun_name) group_right(client_iqn) \n max(label_replace(ceph_iscsi_lun_mapped,\"a_lun_name\",\"$1\",\"lun_name\",\"(.*)\")) by(a_lun_name, gw_owner) +\non(a_lun_name) group_right(client_iqn, gw_owner) \n max(label_replace(ceph_iscsi_lun_size_bytes,\"a_lun_name\",\"$1\",\"lun_name\",\"(.*)\")) by(a_lun_name)\n\n", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "refId": "C" + } + ], + "title": "LUN Details for $client_iqn", + "transform": "table", + "type": "table" + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "iscsi" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 0, + "includeAll": false, + "label": "Client IQN", + "multi": false, + "name": "client_iqn", + "options": [], + "query": "label_values(ceph_iscsi_client_login,client_iqn)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "gateway_iqn", + "options": [], + "query": "label_values(ceph_iscsi_gateway_tpg_total, gw_iqn)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "(eth|bon|en|ib|mlx)", + "value": "(eth|bon|en|ib|mlx)" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interfaces", + "options": [ + { + "selected": true, + "text": "(eth|bon|en|ib|mlx)", + "value": "(eth|bon|en|ib|mlx)" + } + ], + "query": "(eth|bon|en|ib|mlx)", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "iSCSI Client Details", + "version": 7 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/iscsi-overview.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/iscsi-overview.json new file mode 100644 index 0000000..d45c15e --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/iscsi-overview.json @@ -0,0 +1,1554 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "grafana-piechart-panel", + "name": "Pie Chart", + "version": "1.3.3" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Local", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1530144424365, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "iscsi" + ], + "targetBlank": true, + "title": "Clients", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 25, + "panels": [], + "repeat": null, + "title": "iSCSI Gateway Group : $gateway_iqn", + "type": "row" + }, + { + "content": "", + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 8, + "links": [], + "minSpan": 4, + "mode": "markdown", + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 4, + "y": 1 + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(ceph_iscsi_gateway_tpg_total)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Gateways", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 6, + "y": 1 + }, + "id": 3, + "interval": null, + "links": [ + { + "dashUri": "db/iscsi-client-details", + "dashboard": "iSCSI Client Details", + "targetBlank": true, + "title": "iSCSI Client Details", + "type": "dashboard" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(count by(instance) (ceph_iscsi_client_login))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Clients", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 8, + "y": 1 + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(sum(ceph_iscsi_client_login) by(gw_name))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Sessions", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": null, + "decimals": 0, + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 10, + "y": 1 + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(sum(ceph_iscsi_lun_size_bytes) by(gw_name))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Defined Capacity", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 14, + "y": 1 + }, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(count by(instance) (ceph_iscsi_lun_mapped))", + "format": "time_series", + "intervalFactor": 2, + "refId": "B" + } + ], + "thresholds": "", + "title": "LUNs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 16, + "y": 1 + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(count by(instance) (ceph_iscsi_lun_mapped)) - max(count by(instance) (ceph_iscsi_lun_mapped == 1))", + "format": "time_series", + "hide": false, + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Unused LUNs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "content": "", + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 1 + }, + "id": 9, + "links": [], + "minSpan": 6, + "mode": "markdown", + "title": "", + "transparent": true, + "type": "text" + }, + { + "content": "", + "gridPos": { + "h": 4, + "w": 2, + "x": 0, + "y": 5 + }, + "id": 10, + "links": [], + "minSpan": 2, + "mode": "markdown", + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": null, + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 2, + "y": 5 + }, + "height": "200", + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(sum(rate(ceph_iscsi_lun_iops[30s])) by(gw_name))", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "IOPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": null, + "decimals": 1, + "format": "decbytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 6, + "y": 5 + }, + "height": "200", + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": "/s", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum((sum(rate(ceph_iscsi_lun_read_bytes[30s])) by(gw_name)) + (sum(rate(ceph_iscsi_lun_write_bytes[30s])) by(gw_name)))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Throughput", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 5, + "w": 6, + "x": 10, + "y": 5 + }, + "height": "200", + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n (sum(\n rate(\n node_network_transmit_bytes{job=\"node\", instance=~\"($iscsi_gws).*\"}[30s])\n ) by(instance)),\n \"gw_name\", \"$1\",\"instance\",\"([^.]*).*\")", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{gw_name}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Load by Gateway", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": null, + "description": "LUNs are configured with a primary path (active), and a number of secondary paths (passive). Under normal circumstances, only the active/primary path is used for I/O. This chart shows the distribution of the active paths across each of the gateways.", + "fontSize": "80%", + "format": "short", + "gridPos": { + "h": 5, + "w": 6, + "x": 16, + "y": 5 + }, + "id": 14, + "interval": null, + "legend": { + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": 3, + "minSpan": 6, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "max(count(ceph_iscsi_lun_mapped) by(instance,gw_owner)) by(gw_owner)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{gw_owner}}", + "refId": "A" + } + ], + "title": "Active LUN Paths by Gateway", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 26, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(\n sum by(gw_name) \n (rate(ceph_iscsi_lun_iops[30s])\n )\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{gw_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS by Gateway", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(\n sum by(gw_name) \n ((rate(ceph_iscsi_lun_read_bytes[30s])) + \n (rate(ceph_iscsi_lun_write_bytes[30s]))\n )\n )", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{gw_name}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput by Gateway", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 - (avg by (gw) \n (label_replace(\n irate(node_cpu{job=\"node\",instance=~\"($iscsi_gws).*\",mode=\"idle\"}[30s]),\n \"gw\",\"$1\",\"instance\",\"([^.]*).*\")\n ) * 100)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{gw}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Busy %", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n ((node_memory_MemTotal{job=\"node\", instance=~\"($iscsi_gws).*\"} - node_memory_MemFree) / node_memory_MemTotal) * 100, \n \"gw\", \"$1\", \"instance\",\"([^.]*).*\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{gw}}", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "RAM Util%", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "title": "Gateway Load", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 27, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 24, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum by(shortname) (label_replace(rate(ceph_iscsi_lun_iops[30s]),\"shortname\",\"$2\",\"client_iqn\",\"(.*):(.*)\")))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "{{shortname}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IOPS by Client (R+W)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 24, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(\n sum by(shortname) \n ((label_replace(rate(ceph_iscsi_lun_read_bytes[30s]),\"shortname\",\"$2\",\"client_iqn\",\"(.*):(.*)\")) +\n (label_replace(rate(ceph_iscsi_lun_write_bytes[30s]),\"shortname\",\"$2\",\"client_iqn\",\"(.*):(.*)\"))\n ))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "{{shortname}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput by Client", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "title": "Client Workloads", + "type": "row" + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "gateway_iqn", + "options": [], + "query": "label_values(ceph_iscsi_gateway_tpg_total, gw_iqn)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "(eth|bon|en|ib|mlx)", + "value": "(eth|bon|en|ib|mlx)" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interfaces", + "options": [ + { + "selected": true, + "text": "(eth|bon|en|ib|mlx)", + "value": "(eth|bon|en|ib|mlx)" + } + ], + "query": "(eth|bon|en|ib|mlx)", + "type": "custom" + }, + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "iscsi_gws", + "options": [], + "query": "label_values(ceph_iscsi_scrape_duration_seconds, gw_name)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "iSCSI Overview", + "version": 38 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/latency-by-server.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/latency-by-server.json new file mode 100644 index 0000000..7e9510f --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/latency-by-server.json @@ -0,0 +1,304 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Local", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1524194437238, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 18, + "panels": [], + "repeat": null, + "title": "All OSD Hosts", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(\n (irate(node_disk_read_time_ms[30s]) + irate(node_disk_write_time_ms[30s]) / \n (irate(node_disk_reads_completed[30s]) + irate(node_disk_writes_completed[30s])) +\n ignoring(ceph_daemon,job) ceph_disk_occupation))\n by(instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "All OSD Hosts - Highest Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "ms", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 19, + "panels": [], + "repeat": null, + "title": "Each OSD Host's Max Disk Latency", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 9 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "dashUri": "db/osd-node-detail", + "dashboard": "OSD Node Detail", + "includeVars": true, + "targetBlank": true, + "title": "OSD Node Detail", + "type": "dashboard" + } + ], + "minSpan": 4, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "osd_servers", + "repeatDirection": "h", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Max Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "ms", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "osd_servers", + "options": [], + "query": "label_values(ceph_disk_occupation, instance)", + "refresh": 1, + "regex": "([^.]*).*", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Latency by Server", + "version": 12 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/mds-performance.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/mds-performance.json new file mode 100644 index 0000000..6ef2390 --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/mds-performance.json @@ -0,0 +1,546 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Local", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1525407331553, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 8, + "panels": [], + "title": "MDS Overview", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_mds_metadata)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Active MDS Servers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 3, + "y": 1 + }, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_fs_metadata)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Filesystems", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_mds_sessions_session_count)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Clients", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 10, + "panels": [], + "title": "MDS Performance", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(ceph_objecter_op_r{ceph_daemon=~\"($mds_servers).*\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MDS Reads", + "refId": "A" + }, + { + "expr": "sum(ceph_objecter_op_w{ceph_daemon=~\"($mds_servers).*\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MDS Writes", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "MDS Workload - $mds_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_mds_server_handle_client_request{ceph_daemon=~\"($mds_servers).*\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ceph_daemon}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Request Load - $mds_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 0, + "includeAll": true, + "label": "MDS Server", + "multi": false, + "name": "mds_servers", + "options": [], + "query": "label_values(ceph_mds_inodes, ceph_daemon)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "MDS Performance", + "version": 3 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/network-usage-by-node.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/network-usage-by-node.json new file mode 100644 index 0000000..7349f9f --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/network-usage-by-node.json @@ -0,0 +1,456 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Local", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1525134169600, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 4, + "panels": [], + "repeat": null, + "title": "Aggregated Network Load", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate(node_network_receive_bytes{instance=~\"([[mon_servers]]).*\",device=~\"[[interfaces]].*\"}[30s])) + \nsum (irate(node_network_transmit_bytes{instance=~\"([[mon_servers]]).*\",device=~\"[[interfaces]].*\"}[30s]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "MONs", + "refId": "A" + }, + { + "expr": "sum (irate(node_network_receive_bytes{instance=~\"[[osd_servers]].*\",device=~\"[[interfaces]].*\"}[30s])) + \nsum (irate(node_network_transmit_bytes{instance=~\"[[osd_servers]].*\",device=~\"[[interfaces]].*\"}[30s]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "OSDs", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 5, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum (irate(node_network_receive_bytes{instance=~\"([[mon_servers]]).*\", device=~\"[[interfaces]].*\"}[30s])) by (instance) + \nsum (irate(node_network_transmit_bytes{instance=~\"([[mon_servers]]).*\", device=~\"[[interfaces]].*\"}[30s])) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "MON Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "title": "MON Hosts", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 6, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum \n (irate(node_network_receive_bytes{instance=~\"[[osd_servers]]\", device=~\"[[interfaces]].*\"}[30s]) + \n irate(node_network_transmit_bytes{instance=~\"[[osd_servers]]\", device=~\"[[interfaces]].*\"}[30s])) by(instance)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Hosts Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "title": "OSD Hosts", + "type": "row" + } + ], + "refresh": "15s", + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "osd_servers", + "options": [], + "query": "label_values(ceph_disk_occupation, instance)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "only_osds", + "options": [], + "query": "label_values(ceph_server_metadata{services=\"osd\"}, hostname)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "mon_servers", + "options": [], + "query": "label_values(ceph_mon_quorum_status, ceph_daemon)", + "refresh": 1, + "regex": "/mon.(.*)/", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "(eth|en|bond|mlx|ib)", + "value": "(eth|en|bond|mlx|ib)" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "interfaces", + "options": [ + { + "selected": true, + "text": "(eth|en|bond|mlx|ib)", + "value": "(eth|en|bond|mlx|ib)" + } + ], + "query": "(eth|en|bond|mlx|ib)", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Network Usage by Server", + "version": 19 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/osd-node-detail.json b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/osd-node-detail.json new file mode 100644 index 0000000..2dc231b --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/osd-node-detail.json @@ -0,0 +1,1150 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Local", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1526509711107, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 11, + "panels": [], + "repeat": null, + "title": "'$osd_servers' OSD Overview", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "height": "160", + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_disk_occupation{device=~\"($device_id)\", instance=~\"($osd_servers).*\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [], + "datasource": null, + "description": "", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 13, + "x": 4, + "y": 1 + }, + "height": "160", + "hideTimeOverride": false, + "id": 3, + "links": [], + "minSpan": 6, + "pageSize": 20, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": false + }, + "styles": [ + { + "alias": "Hostname", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "aa_hostname", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "OSD", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "ceph_daemon", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Device", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "device", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Device Type", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "device_class", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Size", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "bytes" + }, + { + "alias": "Ceph Version", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "ceph_ver", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "(label_replace(ceph_disk_occupation{instance=~\"($osd_servers).*\",device=~\"($device_id)\"},\"aa_hostname\",\"$1\",\"instance\",\"(.*)\") * \n on(ceph_daemon) group_left(aa_instance) ceph_osd_stat_bytes) *\n on(ceph_daemon) group_left(device_class,ceph_ver) label_replace(label_replace(ceph_osd_metadata,\"ceph_daemon\",\"osd.$1\",\"id\",\"(.*)\"),\"ceph_ver\",\"$1\",\"ceph_version\",\"ceph version (.*) (.*) (.*) (.*)\")", + "format": "table", + "hide": false, + "instant": true, + "intervalFactor": 1, + "refId": "C" + } + ], + "timeFrom": null, + "title": "Host OSD Breakdown", + "transform": "table", + "type": "table" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": null, + "decimals": 0, + "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 5 + }, + "height": "160", + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": "", + "minSpan": 4, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{device=~\"($device_id)\", instance=~\"($osd_servers).*\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "A", + "step": 40, + "textEditor": true + } + ], + "thresholds": "", + "title": "Raw Capacity", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 12, + "panels": [], + "repeat": null, + "title": "'$osd_servers' Performance Statistics", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by (device) (\n irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers).*\", device=~\"($device_id)\"}\n) / 10", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "%Util", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by (device) (\n irate(node_disk_reads_completed{device=~\"($device_id)\", instance=~\"($osd_servers).*\"}[5m]) +\n irate(node_disk_writes_completed{device=~\"($device_id)\", instance=~\"($osd_servers).*\"}[5m])\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk IOPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by (device) (\n irate(node_disk_write_time_ms{device=~ \"($device_id)\", instance=~\"($osd_servers).*\"}[5m])\n /\n clamp_min(irate(node_disk_writes_completed{device=~ \"($device_id)\", instance=~\"($osd_servers).*\"}[5m]), 0.001)\n+\n irate(node_disk_read_time_ms{device=~ \"($device_id)\", instance=~\"($osd_servers).*\"}[5m])\n /\n clamp_min(irate(node_disk_reads_completed{device=~ \"($device_id)\", instance=~\"($osd_servers).*\"}[5m]), 0.001)\n)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Disk Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 17 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by (device) (\n irate(node_disk_bytes_read{device=~\"($device_id)\", instance=~\"($osd_servers).*\"}[5m]) + \n irate(node_disk_bytes_written{device=~\"($device_id)\", instance=~\"($osd_servers).*\"}[5m])\n)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}}", + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Throughput by Disk", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "interrupt": "#447EBC", + "steal": "#6D1F62", + "system": "#890F02", + "user": "#3F6833", + "wait": "#C15C17" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown", + "fill": 3, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 24 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($osd_servers).*\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[5m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($osd_servers).*\"}[5m]))\n) * 100", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{mode}}", + "refId": "A", + "step": 10, + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers CPU Utilisation", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "Available": "#508642", + "Free": "#508642", + "Total": "#bf1b00", + "Used": "#bf1b00", + "total": "#bf1b00", + "used": "#0a50a1" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 24 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "total", + "color": "#bf1b00", + "fill": 0, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_memory_MemTotal{instance=~\"[[osd_servers]].*\"}) - (\n sum(node_memory_MemFree{instance=~\"[[osd_servers]].*\"}) + \n sum(node_memory_Cached{instance=~\"[[osd_servers]].*\"}) + \n sum(node_memory_Buffers{instance=~\"[[osd_servers]].*\"}) +\n sum(node_memory_Slab{instance=~\"[[osd_servers]].*\"})\n )\n \n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "used", + "refId": "D" + }, + { + "expr": "sum(node_memory_MemFree{instance=~\"[[osd_servers]].*\"}) ", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Free", + "refId": "A" + }, + { + "expr": "sum(node_memory_Cached{instance=~\"[[osd_servers]].*\"}) + \nsum(node_memory_Buffers{instance=~\"[[osd_servers]].*\"}) +\nsum(node_memory_Slab{instance=~\"[[osd_servers]].*\"}) \n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "buffers/cache", + "refId": "C" + }, + { + "expr": "sum(node_memory_MemTotal{instance=~\"[[osd_servers]].*\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "total", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "RAM Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 24 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (device) (irate(node_network_receive_bytes{instance=~\"($osd_servers).*\", device=~\"(eth|en|bond|ib|mlx).*\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}.rx", + "refId": "A", + "step": 10, + "textEditor": true + }, + { + "expr": "sum by (device) (irate(node_network_transmit_bytes{instance=~\"($osd_servers).*\", device=~\"(eth|en|bond|ib|mlx).*\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{device}}.tx", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$osd_servers Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "refresh": "10s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 0, + "includeAll": true, + "label": "OSD Host Name", + "multi": false, + "name": "osd_servers", + "options": [], + "query": "label_values(ceph_disk_occupation, instance)", + "refresh": 1, + "regex": "([^.]*).*", + "sort": 3, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": null, + "hide": 0, + "includeAll": true, + "label": "Disk Name", + "multi": true, + "name": "device_id", + "options": [], + "query": "ceph_disk_occupation", + "refresh": 1, + "regex": "/device=\"([^\"]*)\"/", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "60", + "value": "60" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "max_devices", + "options": [ + { + "selected": true, + "text": "60", + "value": "60" + } + ], + "query": "60", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "OSD Node Detail", + "version": 15 +} diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/tests/__init__.py b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/tests/test_mgr_dashboards.py b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/tests/test_mgr_dashboards.py new file mode 100644 index 0000000..4d417f4 --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/tests/test_mgr_dashboards.py @@ -0,0 +1,66 @@ +import pytest + +from .util import TestDashboards, get_dashboards + + +def walk(obj, callback, parent_key=None, path=None): + if path is None: + path = '.' + if isinstance(obj, dict): + for key, value in obj.items(): + walk( + value, + callback, + parent_key=key, + path='{}["{}"]'.format(path, key), + ) + elif isinstance(obj, list): + for i in range(len(obj)): + walk( + obj[i], + callback, + parent_key=parent_key, + path='{}[{}]'.format(path, i), + ) + else: + callback(obj, parent_key, path) + + +class TestMgrDashboards(TestDashboards): + dashboards = get_dashboards() + + @pytest.mark.parametrize("name", dashboards.keys()) + def test_type(self, name): + assert name + obj = self.dashboards[name] + assert type(obj) is dict + + @pytest.mark.parametrize("name", dashboards.keys()) + def test_no_collectd(self, name): + def test(item, pkey, path): + if type(item) in (basestring, unicode): + assert 'collectd' not in item + walk(self.dashboards[name], test) + + @pytest.mark.parametrize("name", dashboards.keys()) + def test_no_ds_local(self, name): + def test(item, pkey, path): + if type(item) in (basestring, unicode): + assert '${DS_LOCAL}' not in item + walk(self.dashboards[name], test) + + @pytest.mark.parametrize("name", dashboards.keys()) + def test_no_influxdb_dstype(self, name): + def test(item, pkey, path): + if pkey == 'dsType' and type(item) in (basestring, unicode): + assert 'influxdb' not in item + walk(self.dashboards[name], test) + + @pytest.mark.parametrize("name", dashboards.keys()) + def test_no_influxdb_query(self, name): + def test(item, pkey, path): + if pkey == 'query': + assert 'SELECT' not in item + assert 'FROM' not in item + assert 'WHERE' not in item + walk(self.dashboards[name], test) diff --git a/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/tests/util.py b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/tests/util.py new file mode 120000 index 0000000..47f38a1 --- /dev/null +++ b/ansible/roles/ceph-grafana/files/dashboards/cephmetrics/tests/util.py @@ -0,0 +1 @@ +../../tests/util.py \ No newline at end of file diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/alert-status.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/alert-status.json deleted file mode 100644 index 1069b43..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/alert-status.json +++ /dev/null @@ -1,1716 +0,0 @@ -{ - "__requires": [ - { - "type": "panel", - "id": "alertlist", - "name": "Alert List", - "version": "5.0.0" - }, - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.4" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Local", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1526437197732, - "links": [], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 11, - "panels": [], - "repeat": null, - "title": "Active Alert List", - "type": "row" - }, - { - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 1, - "limit": "20", - "links": [], - "onlyAlertsOnDashboard": true, - "show": "current", - "sortOrder": 3, - "stateFilter": [ - "alerting" - ], - "title": "Active Ceph Alert List", - "type": "alertlist" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 12, - "panels": [], - "repeat": null, - "title": "Health Checks", - "type": "row" - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "10s", - "handler": 1, - "name": "Overall Ceph Health alert", - "noDataState": "no_data", - "notifications": [ - { - "id": 1 - } - ] - }, - "aliasColors": { - "Ceph Health": "#890F02", - "Ceph Health (0:OK, 4:Warning,8:Error)": "#DEDAF7", - "ceph health": "#890F02" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "The chart plots the clusters health, over time. Health is depicted as a integer; 0, 1 or 2 where 0 is OK, 1 is WARN and 2 represents an ERROR state.", - "fill": 1, - "gridPos": { - "h": 7, - "w": 4, - "x": 0, - "y": 9 - }, - "hideTimeOverride": false, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "maxDataPoints": "360", - "minSpan": 4, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "ceph_health_status", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Ceph Health", - "refId": "A", - "step": 20, - "textEditor": true - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Overall Ceph Health", - "tooltip": { - "shared": false, - "sort": 1, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "", - "logBase": 1, - "max": "2", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Disks Near Full alert", - "noDataState": "ok", - "notifications": [ - { - "id": 1 - } - ] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "This shows how many disks are at or above 80% full. Performance may degrade beyond this threshold on filestore (XFS) backed OSD's.", - "fill": 1, - "gridPos": { - "h": 7, - "w": 4, - "x": 4, - "y": 9 - }, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 4, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "count((ceph_osd_stat_bytes_used / ceph_osd_stat_bytes) * 100 > 85)", - "format": "time_series", - "hide": false, - "instant": false, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A", - "step": 40, - "textEditor": true - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Disks Near Full", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "30s", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "10s", - "handler": 1, - "name": "OSDs Down alert", - "noDataState": "ok", - "notifications": [ - { - "id": 1 - } - ] - }, - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Count of OSDs currently in a DOWN state", - "fill": 2, - "gridPos": { - "h": 7, - "w": 4, - "x": 8, - "y": 9 - }, - "hideTimeOverride": true, - "id": 4, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "count(ceph_osd_metadata) - count(ceph_osd_up > 0.5)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "down", - "refId": "A", - "step": 4, - "textEditor": true - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0 - } - ], - "timeFrom": "5m", - "timeShift": null, - "title": "OSDs Down", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "30s", - "now" - ] - }, - "reducer": { - "params": [], - "type": "last" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "30s", - "handler": 1, - "name": "OSDs Hosts Down alert", - "noDataState": "ok", - "notifications": [ - { - "id": 1 - } - ] - }, - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Count of OSD Hosts that have all daemons down", - "fill": 2, - "gridPos": { - "h": 7, - "w": 4, - "x": 12, - "y": 9 - }, - "hideTimeOverride": true, - "id": 14, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "count(\n (count by(instance) (ceph_disk_occupation * \n on(ceph_daemon) group_right(instance) ceph_osd_up == 0) - \n count by(instance) (ceph_disk_occupation)) == 0)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "down", - "refId": "A", - "step": 4, - "textEditor": true - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0 - } - ], - "timeFrom": "5m", - "timeShift": null, - "title": "OSDs Hosts Down", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "last" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "PG's Stuck alert", - "noDataState": "ok", - "notifications": [ - { - "id": 1 - } - ] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "This chart shows whether there are pg's in a stuck state, that need manual intervention to resolve.", - "fill": 2, - "gridPos": { - "h": 7, - "w": 4, - "x": 16, - "y": 9 - }, - "hideTimeOverride": true, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(ceph_osd_numpg) - scalar(ceph_pg_active)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "# pg's stuck inactive", - "refId": "A", - "step": 240, - "textEditor": true - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0 - } - ], - "timeFrom": "6h", - "timeShift": null, - "title": "PG's Stuck", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - "total" - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "lt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "min" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "OSD Host Loss Check alert", - "noDataState": "no_data", - "notifications": [ - { - "id": 1 - } - ] - }, - "aliasColors": { - "Largest OSD Host": "#890F02" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "This graph checks the cluster @ 90% full is enough to support the loss of the largest OSD host", - "fill": 1, - "gridPos": { - "h": 7, - "w": 4, - "x": 20, - "y": 9 - }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 4, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(\n sum(ceph_osd_stat_bytes - ceph_osd_stat_bytes_used)\n) * 0.9 -\nmax(\n sum by (instance) (\n ceph_osd_stat_bytes + on (ceph_daemon) group_left (instance) (ceph_disk_occupation*0)\n )\n)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "refId": "A", - "step": 40, - "textEditor": true - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "lt", - "value": 0 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "OSD Host Loss Check", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 1000 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "30s", - "handler": 1, - "name": "Slow OSD responses alert", - "noDataState": "ok", - "notifications": [ - { - "id": 1 - } - ] - }, - "aliasColors": { - "Largest OSD Host": "#890F02" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Graph checking for OSD Latencies that are above 1s.", - "fill": 1, - "gridPos": { - "h": 7, - "w": 4, - "x": 0, - "y": 16 - }, - "hideTimeOverride": true, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxDataPoints": "", - "minSpan": 4, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "(\n (\n irate(node_disk_read_time_ms[5m]) / clamp_min(irate(node_disk_reads_completed[5m]), 0.001) +\n irate(node_disk_write_time_ms[5m]) / clamp_min(irate(node_disk_writes_completed[5m]), 0.001)\n ) and on (instance, device) ceph_disk_occupation\n) >= 1000", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}.{{device}}", - "refId": "A", - "step": 40, - "textEditor": true - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 1000 - } - ], - "timeFrom": "1h", - "timeShift": null, - "title": "Slow OSD responses", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": "ms", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 10 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "max" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "30s", - "handler": 1, - "name": "Network Errors alert", - "noDataState": "no_data", - "notifications": [ - { - "id": 1 - } - ] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Checks all interfaces for dropped/error packets, and alerts if more than 10 are seen in a 5m interval", - "fill": 1, - "gridPos": { - "h": 7, - "w": 4, - "x": 4, - "y": 16 - }, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 4, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (instance, device) (\n irate(node_network_receive_drop{device=~\"(eth|en|bond|ib|mlx).*\"}[5m]) +\n irate(node_network_receive_errs{device=~\"(eth|en|bond|ib|mlx).*\"}[5m]) +\n irate(node_network_transmit_drop{device=~\"(eth|en|bond|ib|mlx).*\"}[5m]) +\n irate(node_network_transmit_errs{device=~\"(eth|en|bond|ib|mlx).*\"}[5m])\n) > 0", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}} / {{device}}", - "refId": "A", - "step": 40, - "textEditor": true - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 10 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Network Packet Drops/Errors", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 85 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "5m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Pool Capacity alert", - "noDataState": "no_data", - "notifications": [ - { - "id": 1 - } - ] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 5, - "gridPos": { - "h": 7, - "w": 4, - "x": 8, - "y": 16 - }, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail) * 100 + on (pool_id) group_left (name) (ceph_pool_metadata*0)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{name}}", - "refId": "A", - "step": 40, - "textEditor": true - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 85 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Pool Capacity", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "10s", - "now" - ] - }, - "reducer": { - "params": [], - "type": "last" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "30s", - "handler": 1, - "name": "MONs Down alert", - "noDataState": "ok", - "notifications": [ - { - "id": 1 - } - ] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Count of MONs currently not in quorum/down", - "fill": 2, - "gridPos": { - "h": 7, - "w": 4, - "x": 12, - "y": 16 - }, - "hideTimeOverride": true, - "id": 13, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 4, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "count(ceph_mon_quorum_status != 1)", - "format": "time_series", - "hide": false, - "instant": false, - "intervalFactor": 1, - "legendFormat": "down", - "refId": "A", - "step": 4, - "textEditor": true - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0 - } - ], - "timeFrom": "5m", - "timeShift": null, - "title": "MONs Down", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "7", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 85 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1h", - "now" - ] - }, - "reducer": { - "params": [], - "type": "avg" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "Cluster Capacity alert", - "noDataState": "no_data", - "notifications": [ - { - "id": 1 - } - ] - }, - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "This trigger raises a notification if the raw used crosses the 85% capacity threshold of the ceph cluster", - "fill": 1, - "gridPos": { - "h": 7, - "w": 4, - "x": 16, - "y": 16 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 4, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(ceph_osd_stat_bytes_used) / sum(ceph_osd_stat_bytes) * 100 ", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Raw Capacity Used %", - "refId": "A", - "step": 40, - "textEditor": true - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 85 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Cluster Capacity", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "label": "", - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "alert": { - "conditions": [ - { - "evaluator": { - "params": [ - 0 - ], - "type": "gt" - }, - "operator": { - "type": "and" - }, - "query": { - "params": [ - "A", - "1m", - "now" - ] - }, - "reducer": { - "params": [], - "type": "last" - }, - "type": "query" - } - ], - "executionErrorState": "alerting", - "frequency": "60s", - "handler": 1, - "name": "OSDs with High PG Count", - "noDataState": "ok", - "notifications": [ - { - "id": 1 - } - ] - }, - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Shows the number of PGs that have a pg count > 275.", - "fill": 1, - "gridPos": { - "h": 7, - "w": 4, - "x": 20, - "y": 16 - }, - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "count(ceph_osd_numpg > 275)", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": [ - { - "colorMode": "critical", - "fill": true, - "line": true, - "op": "gt", - "value": 0 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "OSDs with High PG Count", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": "#NUM OSDs with high PG Count", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "refresh": "15s", - "schemaVersion": 16, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Alert Status", - "version": 17 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-at-a-glance.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-at-a-glance.json deleted file mode 100644 index 932166d..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-at-a-glance.json +++ /dev/null @@ -1,3252 +0,0 @@ -{ - "__requires": [ - { - "type": "panel", - "id": "alertlist", - "name": "Alert List", - "version": "5.0.0" - }, - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.4" - }, - { - "type": "panel", - "id": "grafana-piechart-panel", - "name": "Pie Chart", - "version": "1.3.0" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Local", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "vonage-status-panel", - "name": "Status Panel", - "version": "1.0.8" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "limit": 100, - "name": "Annotations & Alerts", - "showIn": 0, - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1526962541471, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "overview" - ], - "targetBlank": true, - "title": "Shortcuts", - "type": "dashboards" - } - ], - "panels": [ - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 0, - "y": 0 - }, - "height": "50px", - "id": 1, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-cluster", - "dashboard": "Ceph Cluster", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "Ceph Cluster", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "10%", - "prefix": "", - "prefixFontSize": "10%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "", - "transparent": true, - "type": "singlestat", - "valueFontSize": "35%", - "valueMaps": [ - { - "op": "=", - "text": "Cluster", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 2, - "y": 0 - }, - "height": "50px", - "id": 2, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-pools", - "dashboard": "Ceph Pools", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "Ceph Pools", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "10%", - "prefix": "", - "prefixFontSize": "10%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "", - "transparent": true, - "type": "singlestat", - "valueFontSize": "35%", - "valueMaps": [ - { - "op": "=", - "text": "Pools", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 4, - "y": 0 - }, - "height": "50px", - "id": 3, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-osd-information", - "dashboard": "Ceph OSD Information", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "Ceph OSD Information", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "10%", - "prefix": "", - "prefixFontSize": "10%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "", - "transparent": true, - "type": "singlestat", - "valueFontSize": "35%", - "valueMaps": [ - { - "op": "=", - "text": "OSDs", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 6, - "y": 0 - }, - "height": "50px", - "id": 40, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-rgw-workload", - "dashboard": "Ceph RGW Workload", - "includeVars": false, - "keepTime": true, - "targetBlank": true, - "title": "Ceph RGW Workload", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "10%", - "prefix": "", - "prefixFontSize": "10%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "", - "transparent": true, - "type": "singlestat", - "valueFontSize": "35%", - "valueMaps": [ - { - "op": "=", - "text": "S3/Swift", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 8, - "y": 0 - }, - "height": "50px", - "id": 5, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-backend-storage", - "dashboard": "Ceph Backend Storage", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "OSD Host Performance", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "10%", - "prefix": "", - "prefixFontSize": "10%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "", - "transparent": true, - "type": "singlestat", - "valueFontSize": "35%", - "valueMaps": [ - { - "op": "=", - "text": "OSD Hosts", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 2, - "x": 10, - "y": 0 - }, - "height": "50px", - "id": 6, - "interval": null, - "links": [ - { - "dashUri": "db/network-usage-by-server", - "dashboard": "Network Usage by Server", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "Network Usage by Server", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "10%", - "prefix": "", - "prefixFontSize": "10%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "", - "transparent": true, - "type": "singlestat", - "valueFontSize": "35%", - "valueMaps": [ - { - "op": "=", - "text": "Network", - "value": "null" - } - ], - "valueName": "current" - }, - { - "gridPos": { - "h": 4, - "w": 6, - "x": 18, - "y": 0 - }, - "id": 8, - "limit": 10, - "links": [ - { - "dashUri": "db/alert-status", - "dashboard": "Alert Status", - "targetBlank": true, - "title": "Alert Status", - "type": "dashboard" - } - ], - "minSpan": 4, - "onlyAlertsOnDashboard": false, - "show": "current", - "sortOrder": 3, - "stateFilter": [ - "alerting" - ], - "title": "Active Alerts", - "type": "alertlist" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 4 - }, - "id": 35, - "panels": [], - "repeat": null, - "title": "At a Glance", - "type": "row" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(1, 167, 1, 1)", - "rgba(255,165,0, 1)", - "rgba(255, 0, 0, 1)" - ], - "datasource": null, - "description": "Shows the overall health of the ceph cluster", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 2, - "x": 0, - "y": 5 - }, - "hideTimeOverride": true, - "id": 9, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-health", - "dashboard": "Ceph Health", - "includeVars": true, - "keepTime": false, - "targetBlank": true, - "title": "Ceph Health", - "type": "dashboard" - } - ], - "mappingType": 2, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "0", - "text": "OK", - "to": "0" - }, - { - "from": "1", - "text": "WARN", - "to": "4" - }, - { - "from": "5", - "text": "ERROR", - "to": "99" - }, - { - "from": "-10", - "text": "NODATA", - "to": "0" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "ceph_health_status", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "textEditor": true - } - ], - "thresholds": "1,5", - "timeFrom": "1m", - "timeShift": null, - "title": "Health", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "OK", - "value": "0" - }, - { - "op": "=", - "text": "WARN", - "value": "4" - }, - { - "op": "=", - "text": "ERROR", - "value": "8" - } - ], - "valueName": "current" - }, - { - "clusterName": "MONs", - "colorMode": "Panel", - "colors": { - "crit": "rgba(245, 54, 54, 0.9)", - "disable": "rgba(128, 128, 128, 0.9)", - "ok": "rgb(1,167,1)", - "warn": "rgba(237, 129, 40, 0.9)" - }, - "cornerRadius": 0, - "datasource": null, - "displayName": "MONs", - "flipCard": false, - "flipTime": 5, - "fontFormat": "Regular", - "gridPos": { - "h": 6, - "w": 2, - "x": 2, - "y": 5 - }, - "hideTimeOverride": true, - "id": 10, - "isAutoScrollOnOverflow": false, - "isGrayOnNoData": true, - "isHideAlertsOnDisable": false, - "isIgnoreOKColors": false, - "links": [], - "minSpan": 2, - "namePrefix": "", - "targets": [ - { - "aggregation": "Last", - "alias": "total", - "displayType": "Regular", - "expr": "count(ceph_mon_quorum_status)", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "legendFormat": "total", - "refId": "D", - "textEditor": true, - "valueHandler": "Text Only" - }, - { - "aggregation": "Last", - "alias": "quorum", - "displayAliasType": "Always", - "displayType": "Regular", - "displayValueWithAlias": "When Alias Displayed", - "expr": "count(ceph_mon_quorum_status == 1)", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "legendFormat": "quorum", - "refId": "E", - "textEditor": true, - "valueHandler": "String Threshold", - "warn": "1" - }, - { - "aggregation": "Last", - "alias": "down", - "crit": 2, - "decimals": 2, - "displayAliasType": "Always", - "displayType": "Regular", - "displayValueWithAlias": "When Alias Displayed", - "expr": "count(ceph_mon_quorum_status != 1)", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "down", - "refId": "A", - "textEditor": true, - "units": "none", - "valueHandler": "Number Threshold", - "warn": 1 - } - ], - "timeFrom": "1m", - "timeShift": null, - "title": "", - "type": "vonage-status-panel" - }, - { - "clusterName": "OSDs", - "colorMode": "Panel", - "colors": { - "crit": "rgba(245, 54, 54, 0.9)", - "disable": "rgba(128, 128, 128, 0.9)", - "ok": "rgb(1,167,1)", - "warn": "rgba(237, 129, 40, 0.9)" - }, - "cornerRadius": 0, - "datasource": null, - "displayName": "OSDs", - "flipCard": false, - "flipTime": 5, - "fontFormat": "Regular", - "gridPos": { - "h": 6, - "w": 2, - "x": 4, - "y": 5 - }, - "hideTimeOverride": true, - "id": 11, - "isAutoScrollOnOverflow": false, - "isGrayOnNoData": false, - "isHideAlertsOnDisable": false, - "isIgnoreOKColors": false, - "links": [ - { - "dashUri": "db/ceph-osd-information", - "dashboard": "Ceph OSD Information", - "targetBlank": true, - "title": "Ceph OSD Information", - "type": "dashboard" - } - ], - "minSpan": 2, - "namePrefix": "", - "targets": [ - { - "aggregation": "Last", - "alias": "total", - "displayType": "Regular", - "expr": "count(ceph_osd_up)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "total", - "refId": "E", - "textEditor": true, - "valueHandler": "Text Only" - }, - { - "aggregation": "Last", - "alias": "in", - "displayType": "Regular", - "expr": "count(ceph_osd_up == 1)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "up", - "refId": "F", - "textEditor": true, - "valueHandler": "Text Only" - }, - { - "aggregation": "Last", - "alias": "up", - "displayType": "Regular", - "expr": "count(ceph_osd_in == 1)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "in", - "refId": "A", - "textEditor": true, - "valueHandler": "Text Only" - }, - { - "aggregation": "Last", - "alias": "out", - "displayType": "Regular", - "expr": "count(ceph_osd_in == 0)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "out", - "refId": "C", - "textEditor": true, - "valueHandler": "Text Only" - }, - { - "aggregation": "Last", - "alias": "down", - "crit": 5, - "decimals": 2, - "displayAliasType": "Always", - "displayType": "Regular", - "displayValueWithAlias": "When Alias Displayed", - "expr": "count(ceph_osd_up == 0)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "down", - "refId": "B", - "textEditor": true, - "units": "none", - "valueHandler": "Number Threshold", - "warn": 1 - } - ], - "timeFrom": "1m", - "timeShift": null, - "title": "", - "type": "vonage-status-panel" - }, - { - "clusterName": "OSD Hosts", - "colorMode": "Panel", - "colors": { - "crit": "rgba(245, 54, 54, 0.9)", - "disable": "rgba(128, 128, 128, 0.9)", - "ok": "rgb(1,167,1)", - "warn": "rgba(237, 129, 40, 0.9)" - }, - "cornerRadius": 0, - "datasource": null, - "displayName": "OSD Hosts", - "flipCard": false, - "flipTime": 5, - "fontFormat": "Regular", - "gridPos": { - "h": 6, - "w": 2, - "x": 6, - "y": 5 - }, - "hideTimeOverride": true, - "id": 12, - "isAutoScrollOnOverflow": false, - "isGrayOnNoData": false, - "isHideAlertsOnDisable": false, - "isIgnoreOKColors": false, - "links": [ - { - "dashUri": "db/ceph-health", - "dashboard": "Ceph Health", - "targetBlank": true, - "title": "Ceph Health", - "type": "dashboard" - } - ], - "minSpan": 2, - "targets": [ - { - "aggregation": "Last", - "alias": "total", - "displayType": "Regular", - "expr": "count(\n count(\n ceph_disk_occupation\n ) by (instance)\n)", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "legendFormat": "total", - "refId": "A", - "textEditor": true, - "valueHandler": "Text Only" - }, - { - "aggregation": "Last", - "alias": "up", - "displayType": "Regular", - "expr": "count(\n count(\n ceph_disk_occupation\n ) by (instance)) -\ncount(\n (count by(instance) (ceph_disk_occupation * \n on(ceph_daemon) group_right(instance) ceph_osd_up == 0) - \n count by(instance) (ceph_disk_occupation)) == 0)", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "legendFormat": "up", - "refId": "B", - "textEditor": true, - "valueHandler": "Text Only" - }, - { - "aggregation": "Last", - "alias": "down", - "crit": 2, - "decimals": 2, - "displayAliasType": "Always", - "displayType": "Regular", - "displayValueWithAlias": "When Alias Displayed", - "expr": "count(\n (count by(instance) (ceph_disk_occupation * \n on(ceph_daemon) group_right(instance) ceph_osd_up == 0) - \n count by(instance) (ceph_disk_occupation)) == 0)", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "down", - "refId": "C", - "textEditor": true, - "units": "none", - "url": "dashboard/db/ceph-health", - "valueHandler": "Number Threshold", - "warn": 1 - } - ], - "timeFrom": "30s", - "timeShift": null, - "title": "", - "type": "vonage-status-panel" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": null, - "decimals": 0, - "description": "Number of RGW daemons active", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 2, - "x": 8, - "y": 5 - }, - "id": 39, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-rgw-workload", - "dashboard": "Ceph RGW Workload", - "targetBlank": true, - "title": "Ceph RGW Workload", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(ceph_rgw_metadata)", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "RGWs", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": null, - "decimals": 0, - "description": "Number of MDS daemons active", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 2, - "x": 10, - "y": 5 - }, - "id": 41, - "interval": null, - "links": [ - { - "dashUri": "db/mds-performance", - "dashboard": "MDS Performance", - "targetBlank": true, - "title": "MDS Performance", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(ceph_mds_metadata)", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "MDS", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(1, 167, 1, 1)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "decimals": 0, - "format": "short", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 12, - "y": 5 - }, - "hideTimeOverride": true, - "id": 15, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-cluster", - "dashboard": "Ceph Cluster", - "params": "panelId=3&fullscreen&orgId=1", - "targetBlank": true, - "title": "Cluster Capacity Information", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": "%", - "postfixFontSize": "40%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(ceph_osd_stat_bytes_used) / sum(ceph_osd_stat_bytes) * 100", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 2, - "refId": "A", - "textEditor": true - } - ], - "thresholds": "70,90", - "timeFrom": "1m", - "timeShift": null, - "title": "Capacity Utilization", - "type": "singlestat", - "valueFontSize": "70%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 1, - "description": "Shows the growth rate based on osd usage over the past $growth_window.", - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 2, - "x": 16, - "y": 5 - }, - "id": 16, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "topk(1, ceph_cluster_total_used_bytes offset 1d) - ignoring (instance,job) topk(1, ceph_cluster_total_used_bytes offset 7d)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "$growth_window Growth Rate", - "type": "singlestat", - "valueFontSize": "70%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 0, - "description": "Shows the estimated number of weeks left, based on consumption over the past $growth_window.", - "format": "locale", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 2, - "x": 18, - "y": 5 - }, - "id": 17, - "interval": null, - "links": [], - "mappingType": 2, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - }, - { - "from": "-99999999999999999999999", - "text": "No Growth", - "to": "0" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "(ceph_cluster_total_bytes - ceph_cluster_total_used_bytes) /\n scalar(topk(1, ceph_cluster_total_used_bytes offset 1d) - ignoring (instance,job) topk(1, ceph_cluster_total_used_bytes offset 7d))\n", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 1, - "refId": "A", - "textEditor": true - } - ], - "thresholds": "", - "title": "Weeks Remaining", - "type": "singlestat", - "valueFontSize": "70%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "aliasColors": { - "Unknown": "#bf1b00", - "active + clean": "#01a701", - "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg_active_clean": "#01a701", - "collectd.obj-mon-1.storage.lab.cephmetrics.gauge.ceph.mon.num_pg_peering": "#ffa500", - "peering": "#0A50A1" - }, - "breakPoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": "" - }, - "datasource": null, - "fontSize": "100%", - "format": "none", - "gridPos": { - "h": 6, - "w": 4, - "x": 20, - "y": 5 - }, - "height": "", - "hideTimeOverride": true, - "id": 18, - "interval": null, - "legend": { - "percentage": false, - "show": true, - "values": true - }, - "legendType": "Under graph", - "links": [ - { - "dashUri": "db/ceph-cluster", - "dashboard": "Ceph Cluster", - "includeVars": false, - "keepTime": false, - "targetBlank": true, - "title": "Ceph Cluster Information", - "type": "dashboard" - } - ], - "maxDataPoints": "1", - "minSpan": 4, - "nullPointMode": "connected", - "pieType": "pie", - "strokeWidth": "", - "targets": [ - { - "expr": "ceph_pg_active", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "Active", - "refId": "B", - "textEditor": true - }, - { - "expr": "ceph_pg_degraded", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "Degraded", - "refId": "C", - "textEditor": true - }, - { - "expr": "ceph_pg_peering", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "Peering", - "refId": "D", - "textEditor": true - }, - { - "expr": "ceph_pg_unknown", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "legendFormat": "Unknown", - "refId": "E" - } - ], - "timeFrom": "1m", - "timeShift": null, - "title": "Placement Group Status", - "type": "grafana-piechart-panel", - "valueName": "current" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 11 - }, - "id": 36, - "panels": [], - "repeat": null, - "title": "Performance", - "type": "row" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(251,251,251, 0.97)", - "rgba(255,165,0, 0.89)", - "rgba(255, 0, 0, 1)" - ], - "datasource": null, - "decimals": 1, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 0, - "y": 12 - }, - "id": 20, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-pools", - "dashboard": "Ceph Pools", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "Ceph Pools", - "type": "dashboard" - } - ], - "mappingType": 2, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "90", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "/s", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(irate(ceph_osd_recovery_ops[1m]))", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "textEditor": true - } - ], - "thresholds": "1,2", - "title": "Recovery/Backfill Ops", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(251,251,251, 0.97)", - "rgba(255,0,0,1)", - "rgba(255, 0, 0, 1)" - ], - "datasource": null, - "description": "This panel indicate whether scrub/deep scrub is running within the cluster. NB. If either of these features are turned off, the cluster will enter a WARN state. Click on the panel or the link below to look at cluster information in more detail", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 2, - "x": 4, - "y": 12 - }, - "id": 19, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-cluster", - "dashboard": "Ceph Cluster", - "includeVars": false, - "keepTime": false, - "targetBlank": true, - "title": "Ceph Cluster", - "type": "dashboard" - } - ], - "mappingType": 2, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "0", - "text": "INACTIVE", - "to": "0" - }, - { - "from": "1", - "text": "ACTIVE", - "to": "99999" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "ceph_pg_scrubbing{job=\"ceph\"}", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "refId": "A", - "textEditor": true - } - ], - "thresholds": "1", - "title": "Scrub", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "INACTIVE", - "value": "0" - }, - { - "op": "=", - "text": "ACTIVE", - "value": "1" - }, - { - "op": "=", - "text": "DISABLED", - "value": "2" - }, - { - "op": "=", - "text": "", - "value": "" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 0, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 6, - "y": 12 - }, - "id": 21, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-pools", - "dashboard": "Ceph Pools", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "Ceph Pools", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "90", - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(\n rate(ceph_pool_wr[$__interval])\n)\n+ \nsum(\n rate(ceph_pool_rd[$__interval])\n)", - "format": "time_series", - "groupBy": [], - "hide": false, - "intervalFactor": 1, - "legendFormat": "IOPS", - "policy": "default", - "rawQuery": false, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - } - ] - ], - "tags": [], - "textEditor": true - } - ], - "thresholds": "", - "title": "Client IOPS", - "type": "singlestat", - "valueFontSize": "100%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 1, - "format": "decbytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 10, - "y": 12 - }, - "id": 22, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-pools", - "dashboard": "Ceph Pools", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "Ceph Pools", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "90", - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": "/s", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(rate(ceph_pool_wr_bytes[$__interval]) + rate(ceph_pool_rd_bytes[$__interval]))", - "format": "time_series", - "groupBy": [], - "hide": false, - "intervalFactor": 1, - "policy": "default", - "rawQuery": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - } - ] - ], - "tags": [], - "textEditor": true - } - ], - "thresholds": "", - "title": "Client Throughput", - "type": "singlestat", - "valueFontSize": "70%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 0, - "format": "short", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 2, - "x": 14, - "y": 12 - }, - "id": 23, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(ceph_pool_metadata)", - "format": "time_series", - "groupBy": [], - "intervalFactor": 2, - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - } - ] - ], - "tags": [], - "textEditor": true - } - ], - "thresholds": "", - "title": "Pools", - "type": "singlestat", - "valueFontSize": "70%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "aliasColors": { - "Reads": "#01a701", - "Writes": "#82B5D8" - }, - "breakPoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": null, - "description": "Shows the read/write threshold of client IOPS serviced by the ceph cluster", - "fontSize": "80%", - "format": "none", - "gridPos": { - "h": 6, - "w": 4, - "x": 16, - "y": 12 - }, - "height": "230", - "id": 25, - "interval": null, - "legend": { - "percentage": false, - "show": false, - "values": false - }, - "legendType": "Under graph", - "links": [], - "maxDataPoints": "90", - "minSpan": 4, - "nullPointMode": "connected", - "pieType": "pie", - "strokeWidth": 1, - "targets": [ - { - "expr": "round(sum(irate(ceph_pool_rd[30s])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "reads", - "refId": "A", - "textEditor": true - }, - { - "expr": "round(sum(irate(ceph_pool_wr[30s])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "writes", - "refId": "B", - "textEditor": true - } - ], - "title": "Client Read/Write Ratio", - "type": "grafana-piechart-panel", - "valueName": "current" - }, - { - "aliasColors": { - "95%ile Commit Latency": "#447EBC", - "Apply Latency Max": "#890F02", - "Commit Latency": "#447EBC", - "apply": "#508642", - "commit": "#0a50a1" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Shows the OSD apply and commit latency at the $percentile%ile across the cluster over the past 15 minutes", - "fill": 0, - "gridPos": { - "h": 6, - "w": 4, - "x": 20, - "y": 12 - }, - "hideTimeOverride": true, - "id": 26, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - { - "dashUri": "db/ceph-osd-information", - "dashboard": "Ceph OSD Information", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "Ceph OSD Information", - "type": "dashboard" - } - ], - "minSpan": 4, - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile($percentile / 100, ceph_osd_commit_latency_ms)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "commit", - "refId": "A", - "textEditor": true - }, - { - "expr": "quantile($percentile / 100, ceph_osd_apply_latency_ms)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "apply", - "refId": "B", - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": "15m", - "timeShift": null, - "title": "OSD Apply vs Commit Latency", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": false, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "decimals": 0, - "description": "CPU usage is presented based on the $percentile%ile across all OSD hosts", - "format": "short", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 0, - "y": 18 - }, - "id": 27, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "90", - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": " %", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "quantile(\n $percentile / 100, (\n avg (\n sum (\n irate(node_cpu{mode=~\"(system|user|irq|nice)\", instance=~'[[osd_servers_raw]].*'}[1m]))\n by (instance,cpu)\n ) by (instance)\n ) * 100\n)", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "refId": "A", - "textEditor": true - } - ], - "thresholds": "70,90", - "title": "OSD Hosts CPU Busy", - "type": "singlestat", - "valueFontSize": "100%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 0, - "description": "RAM Usage shows the $percentile%ile of RAM used across all OSD hosts", - "format": "short", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 2, - "x": 4, - "y": 18 - }, - "id": 31, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-backend-storage", - "dashboard": "Ceph Backend Storage", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "Ceph OSD Host Performance", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": " %", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(46, 161, 15, 0)", - "full": false, - "lineColor": "rgb(164, 139, 4)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "quantile(\n $percentile/100, \n (\n node_memory_MemTotal{job='node'} - node_memory_MemFree{job='node'} - node_memory_Buffers{job='node'} - node_memory_Cached{job='node'}\n ) / node_memory_MemTotal{job='node'} * 100\n)", - "format": "time_series", - "groupBy": [], - "hide": false, - "intervalFactor": 2, - "policy": "default", - "rawQuery": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - } - ] - ], - "tags": [], - "textEditor": true - } - ], - "thresholds": "", - "title": "RAM Util.", - "type": "singlestat", - "valueFontSize": "70%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 0, - "description": "Total IOPS from all OSDs in the cluster", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 6, - "y": 18 - }, - "id": 28, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-backend-storage", - "dashboard": "Ceph Backend Storage", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "Ceph OSD Host Performance", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "90", - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(\n sum(\n rate(\n node_disk_reads_completed[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation\n)\n+\nsum(\n sum(\n rate(\n node_disk_writes_completed[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation\n)", - "format": "time_series", - "groupBy": [], - "hide": false, - "intervalFactor": 1, - "policy": "default", - "rawQuery": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - } - ] - ], - "tags": [], - "textEditor": true - } - ], - "thresholds": "", - "title": "Disk IOPS", - "type": "singlestat", - "valueFontSize": "100%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 1, - "format": "decbytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 10, - "y": 18 - }, - "id": 29, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-backend-storage", - "dashboard": "Ceph Backend Storage", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "Ceph OSD Host Performance", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "90", - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": "/s", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "# should only include OSD hosts\nsum(\n sum(\n rate(\n node_disk_bytes_read[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation\n)\n+\nsum(\n sum(\n rate(\n node_disk_bytes_written[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation\n)", - "format": "time_series", - "groupBy": [], - "hide": false, - "intervalFactor": 1, - "policy": "default", - "rawQuery": true, - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - } - ] - ], - "tags": [], - "textEditor": true - }, - { - "expr": "# will include non-OSD hosts\nsum(\n rate(\n node_disk_bytes_read[$__interval]\n ) + \n rate(\n node_disk_bytes_written[$__interval]\n )\n)", - "format": "time_series", - "groupBy": [], - "hide": true, - "intervalFactor": 1, - "policy": "default", - "rawQuery": true, - "refId": "B", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - } - ] - ], - "tags": [], - "textEditor": true - } - ], - "thresholds": "", - "title": "Disk Throughput", - "type": "singlestat", - "valueFontSize": "70%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 0, - "description": "The count of the number of disks in the cluster that are over $disk_full_threshold% full.", - "format": "short", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 2, - "x": 14, - "y": 18 - }, - "id": 30, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-backend-storage", - "dashboard": "Ceph Backend Storage", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "Ceph OSD Host Performance", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(\n (ceph_osd_stat_bytes_used / ceph_osd_stat_bytes) > ($disk_full_threshold / 100)\n)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "textEditor": true - } - ], - "thresholds": "", - "title": "Nearly Full Disks", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(1, 167, 1,1)", - "rgba(255,165,0,1)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "decimals": 0, - "description": "This panel shows the 5th %ile disk latency, indicating that 95% of the OSDs are delivering this latency or higher", - "format": "short", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 16, - "y": 18 - }, - "id": 32, - "interval": null, - "links": [ - { - "dashUri": "db/latency-by-server", - "dashboard": "Latency by Server", - "includeVars": true, - "targetBlank": true, - "title": "Latency by Server", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "90", - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": " ms", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "quantile(\n 5 / 100,\n (irate(node_disk_read_time_ms[1m]) + irate(node_disk_write_time_ms[1m]) / \n (irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed[1m])) \n +\n ignoring(ceph_daemon,job) ceph_disk_occupation))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "refId": "C" - }, - { - "expr": "max(\n irate(node_disk_read_time_ms[30s]) / irate(node_disk_reads_completed[30s])\n +\n irate(node_disk_write_time_ms[30s]) / irate(node_disk_writes_completed[30s])\n + ignoring(ceph_daemon,job) ceph_disk_occupation\n)", - "format": "time_series", - "hide": true, - "intervalFactor": 2, - "refId": "B" - }, - { - "expr": "sum(\n (irate(node_disk_read_time_ms[1m]) + irate(node_disk_write_time_ms[1m]) / \n (irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed[1m])) \n +\n ignoring(ceph_daemon,job) ceph_disk_occupation)) / count(ceph_osd_up)", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "20,60", - "title": "Disk Latency", - "type": "singlestat", - "valueFontSize": "100%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - }, - { - "aliasColors": { - "average": "#0a50a1", - "average %util": "#1f78c1" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Chart shows the disk utilization over the past 15 mins expressed as an average across all OSDs, and at the $percentile%ile.", - "fill": 1, - "gridPos": { - "h": 6, - "w": 4, - "x": 20, - "y": 18 - }, - "hideTimeOverride": true, - "id": 33, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [ - { - "dashUri": "db/disk-busy-by-server", - "dashboard": "Disk Busy by Server", - "includeVars": true, - "targetBlank": true, - "title": "Disk Busy by Server", - "type": "dashboard" - } - ], - "minSpan": 4, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "95%ile", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg (\n max by (device) (\n irate(node_disk_io_time_ms[1m]) \n and on (instance, device) ceph_disk_occupation\n ) / 10\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "average", - "refId": "A", - "textEditor": false - }, - { - "expr": "quantile(\n $percentile/100, \n (\n max by (device) (\n irate(node_disk_io_time_ms[1m]) \n and on (instance, device) ceph_disk_occupation\n ) / 10\n )\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "$percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": "15m", - "timeShift": null, - "title": "Disk Utilization", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "label": "", - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "refresh": "15s", - "schemaVersion": 16, - "style": "dark", - "tags": [ - "overview" - ], - "templating": { - "list": [ - { - "allValue": null, - "current": { - "selected": true, - "text": "95", - "value": "95" - }, - "hide": 0, - "includeAll": false, - "label": "Percentile", - "multi": false, - "name": "percentile", - "options": [ - { - "selected": false, - "text": "80", - "value": "80" - }, - { - "selected": false, - "text": "85", - "value": "85" - }, - { - "selected": false, - "text": "90", - "value": "90" - }, - { - "selected": true, - "text": "95", - "value": "95" - }, - { - "selected": false, - "text": "98", - "value": "98" - } - ], - "query": "80,85,90,95,98", - "type": "custom" - }, - { - "allValue": null, - "current": { - "text": "85", - "value": "85" - }, - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "disk_full_threshold", - "options": [ - { - "selected": true, - "text": "85", - "value": "85" - } - ], - "query": "85", - "type": "custom" - }, - { - "allValue": null, - "current": { - "text": "7d", - "value": "7d" - }, - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "growth_window", - "options": [ - { - "selected": true, - "text": "7d", - "value": "7d" - } - ], - "query": "7d", - "type": "custom" - }, - { - "allValue": "", - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": true, - "label": "OSD Host", - "multi": false, - "name": "osd_servers", - "options": [], - "query": "label_values(ceph_disk_occupation, instance)", - "refresh": 1, - "regex": "([^.]*).*", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": true, - "label": "OSD Host", - "multi": false, - "name": "osd_servers_raw", - "options": [], - "query": "label_values(ceph_disk_occupation, instance)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Ceph - At A Glance", - "version": 43 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-backend-storage.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-backend-storage.json deleted file mode 100644 index 2f0474e..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-backend-storage.json +++ /dev/null @@ -1,1268 +0,0 @@ -{ - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.4" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Local", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "table", - "name": "Table", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1525149605368, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "overview" - ], - "targetBlank": true, - "title": "Shortcuts", - "type": "dashboards" - } - ], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 13, - "panels": [], - "repeat": null, - "title": "Disk/OSD Host Summary", - "type": "row" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 2, - "x": 0, - "y": 1 - }, - "id": 1, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(ceph_osd_up < 0.5)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 60 - } - ], - "thresholds": "", - "title": "OSDs down", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": null, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 4, - "x": 2, - "y": 1 - }, - "id": 2, - "links": [], - "minSpan": 4, - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true - }, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "date" - }, - { - "alias": "% Full", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "Host and Disk", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "% Full", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "Current", - "thresholds": [], - "type": "number", - "unit": "none" - }, - { - "alias": "OSD Id", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Metric", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "ceph_osd_stat_bytes_used / ceph_osd_stat_bytes * 100 > $disk_full_threshold", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ceph_daemon}}", - "refId": "A", - "step": 40, - "textEditor": true - } - ], - "title": "Disks Near Full", - "transform": "timeseries_aggregations", - "type": "table" - }, - { - "columns": [], - "datasource": null, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 12, - "x": 6, - "y": 1 - }, - "id": 17, - "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 1, - "desc": false - }, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "OSD Host", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "instance", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Total Capacity", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 1, - "pattern": "Value #A", - "thresholds": [], - "type": "number", - "unit": "bytes" - }, - { - "alias": "# Drives", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "Value #B", - "thresholds": [], - "type": "number", - "unit": "none" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sum by (instance) (\n ceph_disk_occupation{instance=~\"($osd_servers)\"}*0 + on (ceph_daemon) group_right(instance) ceph_osd_stat_bytes\n)", - "format": "table", - "instant": true, - "intervalFactor": 1, - "refId": "A" - }, - { - "expr": "count by (instance) (\n ceph_disk_occupation{instance=~\"($osd_servers)\"}\n)", - "format": "table", - "instant": true, - "intervalFactor": 1, - "refId": "B" - } - ], - "title": "OSD Host Capacity Summary", - "transform": "table", - "type": "table" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 14, - "panels": [ - { - "cards": { - "cardPadding": null, - "cardRound": null - }, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateRdYlGn", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "timeseries", - "datasource": null, - "description": "The heatmap categorizes disk utilization into discrete buckets (e.g util 0-5) and shows the frequency of the number of disks that fall within that range as a color. The color chosen depends on the number of disks in the 'bucket', ranging from green (low) to red (high). Hover over a colored block to show the number of disks at a given util% for that time interval (20secs).", - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 9 - }, - "heatmap": {}, - "highlightCards": true, - "id": 5, - "legend": { - "show": false - }, - "links": [], - "minSpan": 12, - "targets": [ - { - "expr": "irate(node_disk_io_time_ms{instance=~\"[[osd_servers]]\"}[1m]) / 10 and on (instance, device) ceph_disk_occupation", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 10, - "textEditor": true - } - ], - "title": "Disk Drive Utilization Heatmap - $osd_servers", - "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": 180, - "xBucketSize": "", - "yAxis": { - "decimals": null, - "format": "short", - "logBase": 1, - "max": "100", - "min": "0", - "show": true, - "splitFactor": null - }, - "yBucketNumber": null, - "yBucketSize": 5 - }, - { - "aliasColors": { - "Read Throughput": "#629E51", - "Write Throughput": "#E0752D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 9 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n irate(node_disk_bytes_read{instance=~\"($osd_servers)\"}[5m]) and on (instance, device) ceph_disk_occupation\n)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Read throughtput", - "refId": "A", - "step": 10, - "textEditor": true - }, - { - "expr": "sum(\n irate(node_disk_bytes_written{instance=~\"($osd_servers)\"}[5m]) and on (instance, device) ceph_disk_occupation\n)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Write throughtput", - "refId": "B", - "step": 10, - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Disk Throughput - $osd_servers", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - "Read Latency": "#629E51", - "Write Latency": "#E0752D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 17 - }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - { - "dashUri": "db/latency-by-server", - "dashboard": "Latency by Server", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "Latency by Server", - "type": "dashboard" - } - ], - "minSpan": 12, - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "quantile($percentile/100.0, (\n irate(node_disk_read_time_ms[5m]) / clamp_min(irate(node_disk_reads_completed[5m]), 0.001)\n and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Read latency", - "refId": "A", - "step": 10, - "textEditor": true - }, - { - "expr": "quantile($percentile/100.0, (\n irate(node_disk_write_time_ms[5m]) / clamp_min(irate(node_disk_writes_completed[5m]), 0.001)\n and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Write latency", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Disk Latency - $osd_servers OSDs @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - "disk busy %": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 17 - }, - "id": 8, - "legend": { - "avg": false, - "current": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [ - { - "dashUri": "db/disk-busy-by-server", - "dashboard": "Disk Busy by Server", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "Disk Busy by Server", - "type": "dashboard" - } - ], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile($percentile/100, (\n max by (device) (\n irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) / 10\n))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "disk busy %", - "refId": "A", - "step": 10, - "textEditor": true - } - ], - "thresholds": [ - { - "colorMode": "custom", - "fill": false, - "line": true, - "lineColor": "rgba(178, 0, 0, 0.29)", - "op": "gt", - "value": 80 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk Utilization - $osd_servers OSDs at $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "", - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - "IOPS/spindle": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 25 - }, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile($percentile/100.0, (\n avg by (device) (\n irate(node_disk_reads_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) +\n avg by (device) (\n irate(node_disk_writes_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n )\n))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "IOPS/spindle", - "refId": "A", - "step": 10, - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "IOPS per Disk @ $percentile%ile - $osd_servers OSDs", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - "IOPS": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 25 - }, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - { - "dashUri": "db/iops-by-server", - "dashboard": "IOPS by Server", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "IOPS by Server", - "type": "dashboard" - } - ], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile($percentile/100.0, (\n sum(\n irate(node_disk_reads_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n ) +\n sum(\n irate(node_disk_writes_completed[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers)\"}\n )\n))", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 10, - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Total Disk IOPS - $osd_servers OSDs", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "title": "Disk/OSD Load Summary", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 15, - "panels": [ - { - "aliasColors": { - "CPU Busy": "#447EBC", - "CPU Busy @ 95%ile": "#890F02", - "Cluster-wide CPU Busy @ 95%ile": "#890F02", - "Max CPU Busy": "#BF1B00", - "Max CPU Busy - all OSD Hosts": "#BF1B00" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 3, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 10 - }, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Cluster-wide CPU Busy @ 95%ile", - "fill": 0 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile($percentile / 100.0, (\n sum by (instance) (\n irate(node_cpu{mode=~\"(irq|nice|system|user)\"}[5m])\n ) / sum by (instance) (\n irate(node_cpu[5m])\n )\n) * 100)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Cluster-wide CPU Busy @ $percentile%ile", - "refId": "A", - "step": 10, - "textEditor": true - }, - { - "expr": "avg(\n sum by (instance) (\n irate(node_cpu{instance=~\"($osd_servers)\", mode=~\"(irq|nice|system|user)\"}[5m])\n ) / sum by (instance) (\n irate(node_cpu{instance=~\"($osd_servers)\"}[5m])\n )\n) * 100", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Average OSD Host(s) CPU Busy", - "refId": "B", - "step": 10, - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "CPU Utilization - $osd_servers", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "", - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - "Network load (rx+tx)": "#3F6833" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 10 - }, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - { - "dashUri": "db/network-usage-by-server", - "dashboard": "Network Usage by Server", - "includeVars": true, - "keepTime": true, - "targetBlank": true, - "title": "Network Usage by Server", - "type": "dashboard" - } - ], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum (irate(node_network_receive_bytes{instance=~\"($osd_servers)\", device=~\"(eth|en|bond|ib|mlx).*\"}[5m])) + \nsum (irate(node_network_transmit_bytes{instance=~\"($osd_servers)\", device=~\"(eth|en|bond|ib|mlx).*\"}[5m]))", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Network load (rx+tx)", - "refId": "A", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Network Load - $osd_servers", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "title": "OSD Host CPU and Network Load", - "type": "row" - } - ], - "refresh": "10s", - "schemaVersion": 16, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": null, - "current": { - "selected": true, - "text": "95", - "value": "95" - }, - "hide": 0, - "includeAll": false, - "label": "Percentile", - "multi": false, - "name": "percentile", - "options": [ - { - "selected": false, - "text": "80", - "value": "80" - }, - { - "selected": false, - "text": "85", - "value": "85" - }, - { - "selected": false, - "text": "90", - "value": "90" - }, - { - "selected": true, - "text": "95", - "value": "95" - }, - { - "selected": false, - "text": "98", - "value": "98" - } - ], - "query": "80,85,90,95,98", - "type": "custom" - }, - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 0, - "includeAll": true, - "label": "OSD Hostname", - "multi": true, - "name": "osd_servers", - "options": [], - "query": "ceph_disk_occupation", - "refresh": 1, - "regex": "/instance=\"([^\"]*)\"/", - "sort": 3, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "selected": true, - "text": "85", - "value": "85" - }, - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "disk_full_threshold", - "options": [ - { - "selected": true, - "text": "85", - "value": "85" - } - ], - "query": "85", - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Ceph Backend Storage", - "version": 14 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-cluster.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-cluster.json deleted file mode 100644 index b84c75c..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-cluster.json +++ /dev/null @@ -1,2588 +0,0 @@ -{ - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.4" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Local", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "text", - "name": "Text", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "overview" - ], - "targetBlank": true, - "title": "Shortcuts", - "type": "dashboards" - } - ], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 29, - "panels": [], - "repeat": null, - "title": "Cluster Configuration", - "type": "row" - }, - { - "content": "", - "gridPos": { - "h": 3, - "w": 2, - "x": 0, - "y": 1 - }, - "id": 1, - "links": [], - "minSpan": 2, - "mode": "markdown", - "title": "", - "transparent": true, - "type": "text" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 0, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 2, - "y": 1 - }, - "id": 2, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(ceph_mon_metadata)", - "format": "time_series", - "groupBy": [], - "instant": true, - "intervalFactor": 1, - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - } - ] - ], - "step": 60, - "tags": [], - "textEditor": true - } - ], - "thresholds": "", - "title": "MONs", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 0, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 4, - "y": 1 - }, - "id": 3, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(sum by (instance) (ceph_disk_occupation))", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 60, - "textEditor": true - } - ], - "thresholds": "", - "title": "OSD Hosts", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 0, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 6, - "y": 1 - }, - "id": 4, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(ceph_mds_metadata)", - "format": "time_series", - "groupBy": [], - "instant": true, - "intervalFactor": 1, - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - } - ] - ], - "step": 60, - "tags": [], - "textEditor": true - } - ], - "thresholds": "", - "title": "MDS", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 0, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 8, - "y": 1 - }, - "id": 5, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(ceph_rgw_metadata)", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A", - "textEditor": true - } - ], - "thresholds": "", - "title": "RGW Hosts", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 0, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 10, - "y": 1 - }, - "id": 6, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "max(ceph_iscsi_gateway_tpg_total)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "textEditor": true - } - ], - "thresholds": "", - "title": "iSCSI Hosts", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 0, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 12, - "y": 1 - }, - "id": 7, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(max by (id) (ceph_osd_metadata))", - "format": "time_series", - "groupBy": [], - "intervalFactor": 2, - "legendFormat": "", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - } - ] - ], - "step": 60, - "tags": [], - "textEditor": true - } - ], - "thresholds": "", - "title": "OSDs", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 16, - "y": 1 - }, - "id": 8, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(ceph_pool_metadata)", - "format": "time_series", - "groupBy": [], - "intervalFactor": 2, - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - } - ] - ], - "step": 60, - "tags": [], - "textEditor": true - } - ], - "thresholds": "", - "title": "Pools", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 1, - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 4, - "x": 18, - "y": 1 - }, - "id": 9, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "ceph_cluster_total_bytes - ceph_cluster_total_used_bytes", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A", - "step": 60, - "textEditor": true - } - ], - "thresholds": "", - "title": "Unused Capacity", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 4 - }, - "id": 30, - "panels": [], - "repeat": null, - "title": "Cluster Flags", - "type": "row" - }, - { - "content": "", - "gridPos": { - "h": 3, - "w": 4, - "x": 0, - "y": 5 - }, - "id": 10, - "links": [], - "minSpan": 4, - "mode": "markdown", - "title": "", - "transparent": true, - "type": "text" - }, - { - "content": "

Cluster Flags:

", - "gridPos": { - "h": 3, - "w": 2, - "x": 4, - "y": 5 - }, - "height": "95", - "id": 11, - "links": [], - "minSpan": 2, - "mode": "html", - "title": "", - "transparent": true, - "type": "text" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "description": "Scrub activity takes place daily basis across the OSD's and performs object size and attribute checks. Scrub activity can be controlled with the \"ceph osd scrub\" command.", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 6, - "y": 5 - }, - "height": "95", - "hideTimeOverride": true, - "id": 12, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "2*scalar(ceph_osd_flag_noscrub) + scalar(ceph_pg_scrubbing >bool 0)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "thresholds": "1,2", - "timeFrom": "1m", - "timeShift": null, - "title": "SCRUB", - "type": "singlestat", - "valueFontSize": "40%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "ENABLED", - "value": "0" - }, - { - "op": "=", - "text": "ACTIVE", - "value": "1" - }, - { - "op": "=", - "text": "DISABLED", - "value": "2" - }, - { - "op": "=", - "text": "DISABLED", - "value": "3" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "description": "Deep scrub acts on the objects within placement groups (PGs). Objects are read, and checksum's compared to protect against silent bit-rot events. Although a weekly activity, in large clusters is normal to see deep-scrub active on a daily basis. Scrub activity can be controlled with the \"ceph osd scrub\" command.", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 8, - "y": 5 - }, - "height": "95", - "hideTimeOverride": true, - "id": 13, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "2*scalar(ceph_osd_flag_nodeep_scrub) + scalar(ceph_pg_deep >bool 0)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "thresholds": "1,2", - "timeFrom": "1m", - "title": "DEEP", - "type": "singlestat", - "valueFontSize": "40%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "ENABLED", - "value": "0" - }, - { - "op": "=", - "text": "ACTIVE", - "value": "1" - }, - { - "op": "=", - "text": "DISABLED", - "value": "2" - }, - { - "op": "=", - "text": "DISABLED", - "value": "3" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "description": "With the recovery flag enabled, disruption in the cluster will result in data being recreated to freespace within the cluster to honor the pool's protection schema. Under some circumstances, you may use the norecover flag to prevent automatic recovery.", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 10, - "y": 5 - }, - "height": "95", - "hideTimeOverride": true, - "id": 14, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "2*scalar(ceph_osd_flag_norecover) + scalar(ceph_pg_recovering >bool 0)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "thresholds": "1,2", - "timeFrom": "1m", - "title": "RECOVERY", - "type": "singlestat", - "valueFontSize": "40%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "ENABLED", - "value": "0" - }, - { - "op": "=", - "text": "ACTIVE", - "value": "1" - }, - { - "op": "=", - "text": "DISABLED", - "value": "2" - }, - { - "op": "=", - "text": "DISABLED", - "value": "3" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 12, - "y": 5 - }, - "height": "95", - "hideTimeOverride": true, - "id": 15, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "2*scalar(ceph_osd_flag_nobackfill) + scalar(ceph_pg_backfilling >bool 0)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - }, - { - "expr": "", - "format": "time_series", - "intervalFactor": 1, - "refId": "B" - } - ], - "thresholds": "1,2", - "timeFrom": "1m", - "title": "BACKFILL", - "type": "singlestat", - "valueFontSize": "40%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "ENABLED", - "value": "0" - }, - { - "op": "=", - "text": "ACTIVE", - "value": "1" - }, - { - "op": "=", - "text": "DISABLED", - "value": "2" - }, - { - "op": "=", - "text": "DISABLED", - "value": "3" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 14, - "y": 5 - }, - "height": "95", - "hideTimeOverride": true, - "id": 16, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "2*scalar(ceph_osd_flag_norebalance)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "thresholds": "1,2", - "timeFrom": "1m", - "title": "REBALANCE", - "type": "singlestat", - "valueFontSize": "40%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "ENABLED", - "value": "0" - }, - { - "op": "=", - "text": "ACTIVE", - "value": "1" - }, - { - "op": "=", - "text": "DISABLED", - "value": "2" - }, - { - "op": "=", - "text": "DISABLED", - "value": "3" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "description": "The OUT flag setting allows the mon's to mark OSD's as out of the configuration when they stop sending hearbeats to the mon's. By marking them OUT, recovery takes place. However, for planned maintenance you can set the cluster to noout to disable this behavior.", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 16, - "y": 5 - }, - "height": "95", - "hideTimeOverride": true, - "id": 17, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "2*scalar(ceph_osd_flag_noout)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "thresholds": "1,2", - "timeFrom": "1m", - "title": "OUT", - "type": "singlestat", - "valueFontSize": "40%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "ENABLED", - "value": "0" - }, - { - "op": "=", - "text": "ACTIVE", - "value": "1" - }, - { - "op": "=", - "text": "DISABLED", - "value": "2" - }, - { - "op": "=", - "text": "DISABLED", - "value": "3" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "description": "The down feature allows OSD's to mark their peers as DOWN when they are not reachable. However, if there is a poor network or planned outages, you may want to set this flag to nodown to prevent OSD's flapping between up/down states", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 18, - "y": 5 - }, - "height": "95", - "hideTimeOverride": true, - "id": 18, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "2*scalar(ceph_osd_flag_nodown) + scalar(ceph_pg_down >bool 0)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "thresholds": "1,2", - "timeFrom": "1m", - "title": "DOWN", - "type": "singlestat", - "valueFontSize": "40%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "ENABLED", - "value": "0" - }, - { - "op": "=", - "text": "ACTIVE", - "value": "1" - }, - { - "op": "=", - "text": "DISABLED", - "value": "2" - }, - { - "op": "=", - "text": "DISABLED", - "value": "3" - } - ], - "valueName": "current" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 31, - "panels": [ - { - "aliasColors": { - "Raw": "#3F6833", - "Used": "#E0752D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Shows the Capacity within the cluster over the past 7 days", - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 9 - }, - "hideTimeOverride": true, - "id": 19, - "legend": { - "avg": false, - "current": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "alias": "Used", - "expr": "scalar(ceph_cluster_total_used_bytes)", - "format": "time_series", - "groupBy": [], - "intervalFactor": 2, - "legendFormat": "Used", - "policy": "default", - "refId": "B", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - } - ] - ], - "step": 3600, - "tags": [], - "textEditor": true - }, - { - "alias": "Raw Capacity", - "expr": "scalar(ceph_cluster_total_bytes)", - "format": "time_series", - "groupBy": [], - "intervalFactor": 2, - "legendFormat": "Raw", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - } - ] - ], - "step": 3600, - "tags": [], - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": "7d", - "timeShift": null, - "title": "Cluster Capacity - Past 7 Days", - "tooltip": { - "shared": true, - "sort": 1, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - "Raw": "#3F6833", - "Used": "#E0752D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "decimals": 2, - "description": "Shows the Capacity within each pool over the past 7 days", - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 9 - }, - "hideTimeOverride": true, - "id": 20, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "alias": "Used", - "expr": "max by (name) (\n ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail) * 100 + on (pool_id) group_left (name) ceph_pool_metadata\n)", - "format": "time_series", - "groupBy": [], - "intervalFactor": 2, - "legendFormat": "{{name}}", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - } - ] - ], - "step": 3600, - "tags": [], - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": "7d", - "timeShift": null, - "title": "Pool Capacity - Past 7 Days", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "label": "", - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - "Raw": "#3F6833", - "Used": "#E0752D" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "decimals": 2, - "description": "Shows the Capacity within the cluster over the past 7 days", - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 9 - }, - "hideTimeOverride": true, - "id": 21, - "legend": { - "avg": false, - "current": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "alias": "Used", - "expr": "scalar(ceph_cluster_total_objects)", - "format": "time_series", - "groupBy": [], - "intervalFactor": 2, - "legendFormat": "RADOS Objects", - "policy": "default", - "refId": "B", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - } - ] - ], - "step": 3600, - "tags": [], - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": "7d", - "timeShift": null, - "title": "RADOS Object History - Past 7 Days", - "tooltip": { - "shared": true, - "sort": 1, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "RADOS Object Count", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Each bar indicates the number of OSD's that have a PG count in a specific range as shown on the x axis.", - "fill": 1, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "id": 37, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideEmpty": false, - "hideZero": false, - "max": false, - "min": false, - "rightSide": false, - "show": false, - "total": false, - "values": false - }, - "lines": false, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "ceph_osd_numpg\n", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "legendFormat": "PGs per OSD", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Distribution of PGs per OSD", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": 20, - "mode": "histogram", - "name": null, - "show": true, - "values": [ - "total" - ] - }, - "yaxes": [ - { - "decimals": 0, - "format": "short", - "label": "# of OSDs", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "columns": [], - "datasource": null, - "description": "This table shows all OSDs sorted by their PG Count. The PG count is color coded.", - "fontSize": "100%", - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "id": 35, - "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 5, - "desc": true - }, - "styles": [ - { - "alias": "OSD", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "ceph_daemon", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Device", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "device", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Host", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "instance", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "# PGs", - "colorMode": "value", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "Value", - "thresholds": [ - "200", - "250" - ], - "type": "number", - "unit": "none" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "ceph_osd_numpg * on(ceph_daemon) group_left(instance,device) ceph_disk_occupation", - "format": "table", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "title": "OSDs by PG Counts", - "transform": "table", - "type": "table" - } - ], - "repeat": null, - "title": "Cluster Capacity", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 32, - "panels": [ - { - "columns": [], - "datasource": "Local", - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 4, - "x": 0, - "y": 10 - }, - "hideTimeOverride": true, - "id": 22, - "links": [], - "minSpan": 4, - "pageSize": 10, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true - }, - "styles": [ - { - "alias": "Host", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "ceph_daemon", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Version", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "short_vers", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(ceph_mon_metadata,\"short_vers\",\"$1\",\"ceph_version\",\"ceph version(.*) (.*) (.*) (.*)\")", - "format": "table", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "", - "refId": "D" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "MONs", - "transform": "table", - "type": "table" - }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": null, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 4, - "x": 4, - "y": 10 - }, - "hideTimeOverride": true, - "id": 24, - "links": [], - "minSpan": 4, - "pageSize": 10, - "scroll": true, - "showHeader": true, - "sort": { - "col": 6, - "desc": false - }, - "styles": [ - { - "alias": "OSD", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "ceph_daemon", - "thresholds": [], - "type": "number", - "unit": "none" - }, - { - "alias": "Version", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "short_vers", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(ceph_osd_metadata,\"short_vers\",\"$1\",\"ceph_version\",\"ceph version(.*) (.*) (.*) (.*)\")", - "format": "table", - "instant": true, - "intervalFactor": 1, - "legendFormat": "{{id}} - {{short_vers}}", - "refId": "A" - } - ], - "timeFrom": "2m", - "timeShift": null, - "title": "OSDs", - "transform": "table", - "type": "table" - }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": null, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 4, - "x": 8, - "y": 10 - }, - "hideTimeOverride": true, - "id": 26, - "links": [], - "minSpan": 4, - "pageSize": 10, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true - }, - "styles": [ - { - "alias": "RGW Host", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "ceph_daemon", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Version", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "short_vers", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(ceph_rgw_metadata,\"short_vers\",\"$1\",\"ceph_version\",\"ceph version(.*) (.*) (.*) (.*)\")", - "format": "table", - "instant": true, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A", - "textEditor": true - } - ], - "timeFrom": "2m", - "title": "RGWs", - "transform": "table", - "type": "table" - }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": null, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 4, - "x": 12, - "y": 10 - }, - "hideTimeOverride": true, - "id": 33, - "links": [], - "minSpan": 4, - "pageSize": 10, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true - }, - "styles": [ - { - "alias": "MDS Host", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "ceph_daemon", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Version", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "short_vers", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(ceph_mds_metadata,\"short_vers\",\"$1\",\"ceph_version\",\"ceph version(.*) (.*) (.*) (.*)\")", - "format": "table", - "instant": true, - "intervalFactor": 1, - "legendFormat": "{{id}} - {{short_vers}}", - "refId": "A", - "textEditor": true - } - ], - "timeFrom": "2m", - "title": "MDSs", - "transform": "table", - "type": "table" - }, - { - "content": "", - "gridPos": { - "h": 7, - "w": 2, - "x": 16, - "y": 10 - }, - "id": 27, - "links": [], - "minSpan": 2, - "mode": "markdown", - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "title": "Ceph Daemon Version Information", - "type": "row" - } - ], - "refresh": false, - "schemaVersion": 16, - "style": "dark", - "tags": [ - "overview" - ], - "templating": { - "list": [] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Ceph Cluster", - "version": 12 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-health.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-health.json deleted file mode 100644 index e50b2bc..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-health.json +++ /dev/null @@ -1,2343 +0,0 @@ -{ - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.4" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Local", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1526964924155, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "overview" - ], - "targetBlank": true, - "title": "Shortcuts", - "type": "dashboards" - } - ], - "panels": [ - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(1, 167, 1, 1)", - "rgba(255,165,0, 1)", - "rgba(255, 0, 0, 1)" - ], - "datasource": null, - "description": "Shows the overall health of the ceph cluster.", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 24, - "x": 0, - "y": 0 - }, - "height": "70", - "hideTimeOverride": true, - "id": 1, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "0", - "text": "HEALTH OK", - "to": "1" - }, - { - "from": "1", - "text": "HEALTH WARNING", - "to": "4" - }, - { - "from": "5", - "text": "HEALTH ERROR", - "to": "99" - }, - { - "from": "-10", - "text": "NODATA", - "to": "0" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "ceph_health_status", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "thresholds": "1,5", - "timeFrom": "1m", - "timeShift": null, - "title": "", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "HEALTH OK", - "value": "0" - }, - { - "op": "=", - "text": "HEALTH WARN", - "value": "1" - }, - { - "op": "=", - "text": "HEALTH ERROR", - "value": "2" - } - ], - "valueName": "current" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 3 - }, - "id": 22, - "panels": [ - { - "aliasColors": { - "Ceph Health": "#0a50a1" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "The chart plots the clusters health, over time. The colored bands show 3 distinct areas; green (OK), yellow(WARN) and red(ERROR). The plot line in blue is this clusters current health, so you can see over time how long the cluster spends in an OK, WARN or ERROR state", - "fill": 0, - "gridPos": { - "h": 9, - "w": 24, - "x": 0, - "y": 4 - }, - "height": "350", - "hideTimeOverride": true, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "maxDataPoints": "", - "minSpan": 6, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": true, - "targets": [ - { - "expr": "ceph_health_status ", - "format": "time_series", - "instant": false, - "intervalFactor": 2, - "legendFormat": "Ceph Health", - "refId": "A", - "step": 600, - "textEditor": true - } - ], - "thresholds": [ - { - "colorMode": "custom", - "fill": true, - "fillColor": "#9ac48a", - "line": false, - "op": "lt", - "value": 0.1 - }, - { - "colorMode": "custom", - "fill": true, - "fillColor": "rgba(244, 213, 152, 0.58)", - "line": false, - "op": "lt", - "value": 1.1 - }, - { - "colorMode": "custom", - "fill": true, - "fillColor": "rgba(163, 0, 0, 0.3)", - "line": false, - "op": "gt", - "value": 1.1 - } - ], - "timeFrom": "3d", - "timeShift": null, - "title": "Health History - Last 3 days", - "tooltip": { - "shared": false, - "sort": 1, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "", - "logBase": 1, - "max": "2", - "min": "-0.5", - "show": false - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "title": "Cluster Health History", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 4 - }, - "id": 23, - "panels": [ - { - "content": "

MONs

", - "gridPos": { - "h": 5, - "w": 2, - "x": 0, - "y": 5 - }, - "id": 6, - "links": [], - "minSpan": 2, - "mode": "html", - "title": "", - "transparent": true, - "type": "text" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "#d44a3a", - "rgba(237, 129, 40, 0.89)", - "#299c46" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 3, - "x": 2, - "y": 5 - }, - "id": 28, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "repeat": "mon_servers", - "repeatDirection": "v", - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "label_replace(ceph_mon_quorum_status{ceph_daemon=~\"[[mon_servers]]\"},\"mon_host\",\"$2\",\"ceph_daemon\",\"(.*)_(.*)\")", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "0.5,1", - "title": "$mon_servers", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "OK", - "value": "1" - }, - { - "op": "=", - "text": "DOWN", - "value": "0" - } - ], - "valueName": "current" - }, - { - "content": "

Cluster Flags:

", - "description": "Show cluster flags that determine automatic maintenance and recovery operations", - "gridPos": { - "h": 3, - "w": 2, - "x": 8, - "y": 5 - }, - "id": 31, - "links": [], - "minSpan": 2, - "mode": "html", - "title": "", - "transparent": true, - "type": "text" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "description": "Scrub activity takes place daily basis across the OSD's and performs object size and attribute checks. Scrub activity can be controlled with the \"ceph osd scrub\" command.", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 10, - "y": 5 - }, - "height": "95", - "hideTimeOverride": true, - "id": 7, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "2*scalar(ceph_osd_flag_noscrub) + scalar(ceph_pg_scrubbing >bool 0)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "thresholds": "1,2", - "timeFrom": "1m", - "title": "SCRUB", - "type": "singlestat", - "valueFontSize": "40%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "ENABLED", - "value": "0" - }, - { - "op": "=", - "text": "ACTIVE", - "value": "1" - }, - { - "op": "=", - "text": "DISABLED", - "value": "2" - }, - { - "op": "=", - "text": "DISABLED", - "value": "3" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "description": "Deep scrub acts on the objects within placement groups (PGs). Objects are read, and checksum's compared to protect against silent bit-rot events. Although a weekly activity, in large clusters is normal to see deep-scrub active on a daily basis. Scrub activity can be controlled with the \"ceph osd scrub\" command.", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 12, - "y": 5 - }, - "height": "95", - "hideTimeOverride": true, - "id": 8, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "2*scalar(ceph_osd_flag_nodeep_scrub) + scalar(ceph_pg_deep >bool 0)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "thresholds": "1,2", - "timeFrom": "1m", - "title": "DEEP", - "type": "singlestat", - "valueFontSize": "40%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "ENABLED", - "value": "0" - }, - { - "op": "=", - "text": "ACTIVE", - "value": "1" - }, - { - "op": "=", - "text": "DISABLED", - "value": "2" - }, - { - "op": "=", - "text": "DISABLED", - "value": "3" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "description": "The OUT flag setting allows the mon's to mark OSD's as out of the configuration when they stop sending hearbeats to the mon's. By marking them OUT, recovery takes place. However, for planned maintenance you can set the cluster to noout to disable this behavior.", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 14, - "y": 5 - }, - "height": "95", - "hideTimeOverride": true, - "id": 12, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "2*scalar(ceph_osd_flag_noout)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "thresholds": "1,2", - "timeFrom": "1m", - "timeShift": null, - "title": "OUT", - "type": "singlestat", - "valueFontSize": "40%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "ENABLED", - "value": "0" - }, - { - "op": "=", - "text": "ACTIVE", - "value": "1" - }, - { - "op": "=", - "text": "DISABLED", - "value": "2" - }, - { - "op": "=", - "text": "DISABLED", - "value": "3" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "description": "The down feature allows OSD's to mark their peers as DOWN when they are not reachable. However, if there is a poor network or planned outages, you may want to set this flag to nodown to prevent OSD's flapping between up/down states", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 16, - "y": 5 - }, - "height": "95", - "hideTimeOverride": true, - "id": 13, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "2*scalar(ceph_osd_flag_nodown) + scalar(ceph_pg_down >bool 0)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "thresholds": "1,2", - "timeFrom": "1m", - "timeShift": null, - "title": "DOWN", - "type": "singlestat", - "valueFontSize": "40%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "ENABLED", - "value": "0" - }, - { - "op": "=", - "text": "ACTIVE", - "value": "1" - }, - { - "op": "=", - "text": "DISABLED", - "value": "2" - }, - { - "op": "=", - "text": "DISABLED", - "value": "3" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 18, - "y": 5 - }, - "height": "95", - "hideTimeOverride": true, - "id": 34, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "2*scalar(ceph_osd_flag_nobackfill) + scalar(ceph_pg_backfilling >bool 0)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - }, - { - "expr": "", - "format": "time_series", - "intervalFactor": 1, - "refId": "B" - } - ], - "thresholds": "1,2", - "timeFrom": "1m", - "title": "BACKFILL", - "type": "singlestat", - "valueFontSize": "40%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "ENABLED", - "value": "0" - }, - { - "op": "=", - "text": "ACTIVE", - "value": "1" - }, - { - "op": "=", - "text": "DISABLED", - "value": "2" - }, - { - "op": "=", - "text": "DISABLED", - "value": "3" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 20, - "y": 5 - }, - "height": "95", - "hideTimeOverride": true, - "id": 11, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "2*scalar(ceph_osd_flag_norebalance)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "thresholds": "1,2", - "timeFrom": "1m", - "title": "REBALANCE", - "type": "singlestat", - "valueFontSize": "40%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "ENABLED", - "value": "0" - }, - { - "op": "=", - "text": "ACTIVE", - "value": "1" - }, - { - "op": "=", - "text": "DISABLED", - "value": "2" - }, - { - "op": "=", - "text": "DISABLED", - "value": "3" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "description": "With the recovery flag enabled, disruption in the cluster will result in data being recreated to freespace within the cluster to honor the pool's protection schema. Under some circumstances, you may use the norecover flag to prevent automatic recovery.", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 22, - "y": 5 - }, - "height": "95", - "hideTimeOverride": true, - "id": 9, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "2*scalar(ceph_osd_flag_norecover) + scalar(ceph_pg_recovering >bool 0)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "thresholds": "1,2", - "timeFrom": "1m", - "title": "RECOVERY", - "type": "singlestat", - "valueFontSize": "40%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "ENABLED", - "value": "0" - }, - { - "op": "=", - "text": "ACTIVE", - "value": "1" - }, - { - "op": "=", - "text": "DISABLED", - "value": "2" - }, - { - "op": "=", - "text": "DISABLED", - "value": "3" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "#d44a3a", - "rgba(237, 129, 40, 0.89)", - "#299c46" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 3, - "x": 2, - "y": 7 - }, - "id": 69, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "repeat": null, - "repeatDirection": "v", - "repeatIteration": 1526963039202, - "repeatPanelId": 28, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "label_replace(ceph_mon_quorum_status{ceph_daemon=~\"[[mon_servers]]\"},\"mon_host\",\"$2\",\"ceph_daemon\",\"(.*)_(.*)\")", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "0.5,1", - "title": "$mon_servers", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "OK", - "value": "1" - }, - { - "op": "=", - "text": "DOWN", - "value": "0" - } - ], - "valueName": "current" - }, - { - "content": "

OSD
Hosts
Down

", - "description": "Shows which OSD hosts have all defined osd daemons in a down state", - "gridPos": { - "h": 4, - "w": 2, - "x": 8, - "y": 8 - }, - "id": 54, - "links": [], - "minSpan": 2, - "mode": "html", - "title": "", - "transparent": true, - "type": "text" - }, - { - "columns": [], - "datasource": null, - "fontSize": "100%", - "gridPos": { - "h": 4, - "w": 10, - "x": 10, - "y": 8 - }, - "id": 58, - "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 2, - "desc": false - }, - "styles": [ - { - "alias": "Hostname", - "colorMode": "row", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "instance", - "thresholds": [ - "" - ], - "type": "string", - "unit": "short" - }, - { - "alias": "OSDs Active", - "colorMode": "row", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "Value", - "thresholds": [ - "1" - ], - "type": "hidden", - "unit": "none" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "(count by(instance) (ceph_disk_occupation * on(ceph_daemon) group_right(instance) ceph_osd_up == 0) - count by(instance) (ceph_disk_occupation))", - "format": "table", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "title": "", - "transform": "table", - "transparent": false, - "type": "table" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "#d44a3a", - "rgba(237, 129, 40, 0.89)", - "#299c46" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 2, - "w": 3, - "x": 2, - "y": 9 - }, - "id": 70, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "repeat": null, - "repeatDirection": "v", - "repeatIteration": 1526963039202, - "repeatPanelId": 28, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "label_replace(ceph_mon_quorum_status{ceph_daemon=~\"[[mon_servers]]\"},\"mon_host\",\"$2\",\"ceph_daemon\",\"(.*)_(.*)\")", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "thresholds": "0.5,1", - "title": "$mon_servers", - "type": "singlestat", - "valueFontSize": "50%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - }, - { - "op": "=", - "text": "OK", - "value": "1" - }, - { - "op": "=", - "text": "DOWN", - "value": "0" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "title": "Cluster State", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 - }, - "id": 64, - "panels": [ - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": null, - "filterNull": false, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 5, - "x": 0, - "y": 6 - }, - "id": 18, - "links": [], - "minSpan": 6, - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": null, - "desc": false - }, - "styles": [ - { - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "date" - }, - { - "alias": "Object State", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Metric", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Count", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "Current", - "thresholds": [], - "type": "number", - "unit": "none" - }, - { - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 0, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "none" - } - ], - "targets": [ - { - "alias": "Objects", - "expr": "ceph_cluster_total_objects", - "format": "time_series", - "groupBy": [ - { - "params": [ - "$__interval" - ], - "type": "time" - }, - { - "params": [ - "null" - ], - "type": "fill" - } - ], - "intervalFactor": 1, - "legendFormat": "Total Objects", - "policy": "default", - "refId": "A", - "resultFormat": "time_series", - "select": [ - [ - { - "params": [ - "value" - ], - "type": "field" - }, - { - "params": [], - "type": "mean" - } - ] - ], - "step": 20, - "tags": [], - "textEditor": true - }, - { - "expr": "", - "format": "table", - "intervalFactor": 1, - "legendFormat": "Objects misplaced", - "refId": "B" - }, - { - "expr": "", - "format": "table", - "intervalFactor": 1, - "legendFormat": "Objects degraded", - "refId": "C" - }, - { - "expr": "", - "format": "table", - "intervalFactor": 1, - "legendFormat": "Objects unfound", - "refId": "D" - } - ], - "title": "Object Summary", - "transform": "timeseries_aggregations", - "type": "table" - }, - { - "columns": [ - { - "text": "Avg", - "value": "avg" - } - ], - "datasource": null, - "filterNull": false, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 5, - "x": 5, - "y": 6 - }, - "id": 20, - "links": [], - "minSpan": 6, - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": null, - "desc": false - }, - "styles": [ - { - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "date" - }, - { - "alias": "PG State", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Metric", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Count", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "Current", - "thresholds": [], - "type": "number", - "unit": "none" - }, - { - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 0, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "none" - } - ], - "targets": [ - { - "expr": "ceph_pg_total", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "legendFormat": "PGs", - "refId": "A", - "step": 20 - }, - { - "expr": "ceph_pg_active", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "PGs Active", - "refId": "B", - "step": 20 - }, - { - "expr": "ceph_pg_clean", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "PGs Active+clean", - "refId": "C", - "step": 20 - }, - { - "expr": "ceph_pg_peering", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "PGs Peering", - "refId": "D", - "step": 20 - }, - { - "expr": "ceph_pg_unknown", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "PG's Unknown", - "refId": "E" - } - ], - "title": "PG Summary", - "transform": "timeseries_aggregations", - "type": "table" - } - ], - "title": "RADOS Information", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 6 - }, - "id": 24, - "panels": [ - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": null, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 3, - "x": 0, - "y": 14 - }, - "id": 14, - "links": [], - "minSpan": 2, - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true - }, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "date" - }, - { - "alias": "% Full", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "Current", - "thresholds": [], - "type": "hidden", - "unit": "none" - }, - { - "alias": "Host.OSD Id", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Metric", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "ceph_osd_stat_bytes_used / ceph_osd_stat_bytes * 100 > $disk_full_threshold", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ceph_daemon}}", - "refId": "A", - "step": 60, - "textEditor": true - } - ], - "title": "Disks Near Full", - "transform": "timeseries_aggregations", - "type": "table" - }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": null, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 6, - "x": 3, - "y": 14 - }, - "hideTimeOverride": true, - "id": 15, - "links": [], - "minSpan": 2, - "pageSize": 100, - "scroll": true, - "showHeader": true, - "sort": { - "col": 1, - "desc": false - }, - "styles": [ - { - "alias": "OSD ID", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "_id", - "thresholds": [], - "type": "number", - "unit": "none" - }, - { - "alias": "Hostname", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "osd_host", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Device", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "device", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(ceph_osd_up,\"_id\",\"$1\",\"ceph_daemon\",\"osd.(.*)\") * on(ceph_daemon) \n group_left(osd_host,device) label_replace(ceph_disk_occupation,\"osd_host\",\"$1\",\"instance\",\"(.+?)\\\\.(.*)\") < 0.5", - "format": "table", - "hide": false, - "instant": true, - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 2, - "textEditor": true - } - ], - "timeFrom": "1m", - "timeShift": null, - "title": "OSD's Down", - "transform": "table", - "type": "table" - }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": null, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 6, - "x": 9, - "y": 14 - }, - "hideTimeOverride": true, - "id": 16, - "links": [], - "minSpan": 2, - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 1, - "desc": false - }, - "styles": [ - { - "alias": "OSD ID", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "_id", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Hostname", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "osd_host", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Device", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "device", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(ceph_osd_in,\"_id\",\"$1\",\"ceph_daemon\",\"osd.(.*)\") * on(ceph_daemon) \n group_left(osd_host,device) label_replace(ceph_disk_occupation,\"osd_host\",\"$1\",\"instance\",\"(.+?)\\\\.(.*)\") < 0.5", - "format": "table", - "hide": false, - "instant": true, - "intervalFactor": 1, - "refId": "B" - } - ], - "timeFrom": "5m", - "timeShift": null, - "title": "OSDs Out", - "transform": "table", - "type": "table" - }, - { - "columns": [], - "datasource": null, - "description": "This table shows all OSDs with > 275 PG's", - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 9, - "x": 15, - "y": 14 - }, - "id": 68, - "links": [], - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 5, - "desc": true - }, - "styles": [ - { - "alias": "OSD", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "ceph_daemon", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Device", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "device", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Host", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "instance", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "# PGs", - "colorMode": "row", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "Value", - "thresholds": [ - "200", - "250" - ], - "type": "number", - "unit": "none" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "(ceph_osd_numpg > 275) * on(ceph_daemon) group_left(instance,device) ceph_disk_occupation", - "format": "table", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "title": "OSDs with High PG Counts", - "transform": "table", - "type": "table" - }, - { - "content": "", - "gridPos": { - "h": 7, - "w": 2, - "x": 6, - "y": 21 - }, - "id": 17, - "links": [], - "minSpan": 2, - "mode": "html", - "title": "", - "transparent": true, - "type": "text" - }, - { - "content": "", - "gridPos": { - "h": 7, - "w": 2, - "x": 16, - "y": 21 - }, - "id": 19, - "links": [], - "minSpan": 2, - "mode": "markdown", - "title": "", - "transparent": true, - "type": "text" - } - ], - "repeat": null, - "title": "Storage Information", - "type": "row" - } - ], - "refresh": "10s", - "schemaVersion": 16, - "style": "dark", - "tags": [ - "overview" - ], - "templating": { - "list": [ - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "osd_servers", - "options": [], - "query": "ceph_disk_occupation", - "refresh": 1, - "regex": "/instance=\"([^\"]*)\"/", - "sort": 3, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "selected": true, - "text": "85", - "value": "85" - }, - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "disk_full_threshold", - "options": [ - { - "selected": true, - "text": "85", - "value": "85" - } - ], - "query": "85", - "type": "custom" - }, - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "mon_servers", - "options": [], - "query": "label_values(ceph_mon_quorum_status, ceph_daemon)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Ceph Health", - "version": 43 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-osd-information.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-osd-information.json deleted file mode 100644 index b0cb210..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-osd-information.json +++ /dev/null @@ -1,2021 +0,0 @@ -{ - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "grafana-piechart-panel", - "name": "Pie Chart", - "version": "1.3.3" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Local", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "table", - "name": "Table", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1531263612973, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "tags": [ - "overview" - ], - "title": "Shortcuts", - "type": "dashboards" - } - ], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 24, - "panels": [], - "repeat": null, - "title": "OSD Summary", - "type": "row" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 2, - "x": 0, - "y": 1 - }, - "hideTimeOverride": true, - "id": 1, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(max by (ceph_daemon) (ceph_osd_metadata))", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "thresholds": "", - "timeFrom": "1m", - "title": "OSDs", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 2, - "x": 2, - "y": 1 - }, - "hideTimeOverride": true, - "id": 2, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(ceph_osd_up > 0.5)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "thresholds": "", - "timeFrom": "1m", - "title": "OSDs UP", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "cacheTimeout": null, - "colorBackground": true, - "colorValue": false, - "colors": [ - "rgba(251,251,251,0.97)", - "rgba(255,165,0, 1)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": null, - "decimals": 0, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 6, - "w": 2, - "x": 4, - "y": 1 - }, - "hideTimeOverride": true, - "id": 3, - "interval": null, - "links": [ - { - "dashUri": "db/ceph-health", - "dashboard": "Ceph Health", - "targetBlank": true, - "title": "Ceph Health", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(ceph_osd_up < 0.5)", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 2 - } - ], - "thresholds": "1,3", - "timeFrom": "1m", - "title": "OSDs DOWN", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" - }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": null, - "fontSize": "100%", - "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 1 - }, - "hideTimeOverride": true, - "id": 5, - "links": [], - "maxDataPoints": "", - "minSpan": 4, - "pageSize": 50, - "scroll": true, - "showHeader": true, - "sort": { - "col": 4, - "desc": false - }, - "styles": [ - { - "alias": "Hostname", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "instance", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "OSD", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "ceph_osd", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "device", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [], - "type": "number", - "unit": "bytes" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "sum by (instance, ceph_daemon) (\n ceph_disk_occupation*0 + on (ceph_daemon) group_right(instance,device,osd_id) ceph_osd_stat_bytes\n)", - "format": "time_series", - "hide": true, - "instant": true, - "intervalFactor": 1, - "legendFormat": "", - "refId": "A", - "step": 2, - "textEditor": true - }, - { - "expr": "label_replace(ceph_disk_occupation,\"ceph_osd\",\"$1\",\"ceph_daemon\",\"osd.(.*)\") * on(ceph_osd) group_right(instance,device) \nlabel_replace(ceph_osd_stat_bytes{ceph_daemon=~\"osd.[[osd_id]]\"},\"ceph_osd\",\"$1\",\"ceph_daemon\",\"osd.(.*)\")", - "format": "table", - "hide": false, - "instant": true, - "intervalFactor": 1, - "refId": "B" - } - ], - "timeFrom": "1m", - "timeShift": null, - "title": "OSD Size", - "transform": "table", - "type": "table" - }, - { - "aliasColors": {}, - "breakPoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": null, - "fontSize": "80%", - "format": "none", - "gridPos": { - "h": 6, - "w": 4, - "x": 12, - "y": 1 - }, - "id": 4, - "interval": null, - "legend": { - "show": true, - "values": true - }, - "legendType": "Under graph", - "links": [], - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "pie", - "strokeWidth": 1, - "targets": [ - { - "expr": "count by(device_class) (ceph_osd_metadata)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device_class}}", - "refId": "A" - } - ], - "title": "OSD Types Summary", - "type": "grafana-piechart-panel", - "valueName": "current" - }, - { - "aliasColors": {}, - "breakPoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": "0.05" - }, - "datasource": null, - "description": "The pie chart shows the various OSD sizes used within the cluster", - "fontSize": "80%", - "format": "none", - "gridPos": { - "h": 6, - "w": 4, - "x": 16, - "y": 1 - }, - "height": "220", - "hideTimeOverride": true, - "id": 27, - "interval": null, - "legend": { - "header": "", - "percentage": false, - "show": true, - "sideWidth": null, - "sortDesc": true, - "values": true - }, - "legendType": "Under graph", - "links": [], - "maxDataPoints": "", - "minSpan": 6, - "nullPointMode": "connected", - "pieType": "pie", - "strokeWidth": "1", - "targets": [ - { - "expr": "count(ceph_osd_stat_bytes < 1099511627776)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "<1 TB", - "refId": "A", - "step": 2 - }, - { - "expr": "count(ceph_osd_stat_bytes >= 1099511627776 < 2199023255552)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "<2 TB", - "refId": "B", - "step": 2 - }, - { - "expr": "count(ceph_osd_stat_bytes >= 2199023255552 < 3298534883328)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "<3TB", - "refId": "C", - "step": 2 - }, - { - "expr": "count(ceph_osd_stat_bytes >= 3298534883328 < 4398046511104)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "<4TB", - "refId": "D", - "step": 2 - }, - { - "expr": "count(ceph_osd_stat_bytes >= 4398046511104 < 6597069766656)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "<6TB", - "refId": "E", - "step": 2 - }, - { - "expr": "count(ceph_osd_stat_bytes >= 6597069766656 < 8796093022208)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "<8TB", - "refId": "F", - "step": 2 - }, - { - "expr": "count(ceph_osd_stat_bytes >= 8796093022208 < 10995116277760)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "<10TB", - "refId": "G", - "step": 2 - }, - { - "expr": "count(ceph_osd_stat_bytes >= 10995116277760 < 13194139533312)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "<12TB", - "refId": "H", - "step": 2 - }, - { - "expr": "count(ceph_osd_stat_bytes >= 13194139533312)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "12TB+", - "refId": "I", - "step": 2 - } - ], - "timeFrom": "2m", - "timeShift": null, - "title": "OSD Size Summary", - "type": "grafana-piechart-panel", - "valueName": "current" - }, - { - "aliasColors": { - "Non-Encrypted": "#E5AC0E" - }, - "breakPoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": null, - "fontSize": "80%", - "format": "none", - "gridPos": { - "h": 6, - "w": 4, - "x": 20, - "y": 1 - }, - "height": "200px", - "hideTimeOverride": true, - "id": 7, - "interval": null, - "legend": { - "percentage": false, - "show": true, - "values": true - }, - "legendType": "Under graph", - "links": [], - "maxDataPoints": "1", - "minSpan": 4, - "nullPointMode": "connected", - "pieType": "pie", - "strokeWidth": 1, - "targets": [ - { - "expr": "count(ceph_bluefs_wal_total_bytes)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "bluestore", - "refId": "A", - "step": 240 - }, - { - "expr": "count(ceph_osd_metadata) - count(ceph_bluefs_wal_total_bytes)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "filestore", - "refId": "B", - "step": 240 - }, - { - "expr": "absent(ceph_bluefs_wal_total_bytes)*count(ceph_osd_metadata)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "filestore", - "refId": "C", - "step": 240 - } - ], - "timeFrom": "2m", - "timeShift": null, - "title": "OSD Objectstore Types", - "type": "grafana-piechart-panel", - "valueName": "current" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 7 - }, - "id": 25, - "panels": [ - { - "content": "

Ceph Filestore I/O Process

\n

\nA write request is first committed to a journal using direct-io (apply). Once this write is complete, the data is persisted to HDD by a second 'buffered' write operation (commit). The commit operation is basically a measure of time taken to perform a syncfs call to flush dirty pages to disk, and is therefore not a time associated with any specific client initiated operation.

The tables on the right show commit and apply latencies for all OSDs, or use the pull down above to focus on a specific OSD.\n", - "gridPos": { - "h": 5, - "w": 10, - "x": 0, - "y": 8 - }, - "height": "300", - "id": 8, - "links": [], - "minSpan": 6, - "mode": "html", - "title": "", - "transparent": true, - "type": "text" - }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": "Local", - "description": "Filestore OSDs", - "fontSize": "100%", - "gridPos": { - "h": 8, - "w": 2, - "x": 10, - "y": 8 - }, - "height": "310", - "hideTimeOverride": true, - "id": 9, - "links": [], - "minSpan": 2, - "pageSize": 0, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": false - }, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "date" - }, - { - "alias": "OSD ID", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Metric", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(\n ceph_disk_occupation{ceph_daemon=~\"osd.($filestore_osd_id)\"},\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{osd_num}}", - "refId": "A", - "step": 2, - "textEditor": true - } - ], - "timeFrom": "2m", - "timeShift": null, - "title": "", - "transform": "timeseries_aggregations", - "type": "table" - }, - { - "columns": [], - "datasource": "Local", - "description": "Apply latency covers the time taken to commit to the journal and complete the transaction", - "fontSize": "100%", - "gridPos": { - "h": 8, - "w": 5, - "x": 12, - "y": 8 - }, - "height": "310", - "hideTimeOverride": true, - "id": 12, - "links": [], - "minSpan": 4, - "pageSize": 0, - "scroll": true, - "showHeader": true, - "sort": { - "col": 6, - "desc": true - }, - "styles": [ - { - "alias": "OSD ID", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "osd_num", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Apply Latency", - "colorMode": "row", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [ - "50", - "150" - ], - "type": "number", - "unit": "ms" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(\n topk($max_devices,\n ceph_osd_apply_latency_ms{ceph_daemon=~\"osd.[[osd_id]]\"} + on (ceph_daemon) (ceph_filestore_journal_latency_count * 0)\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)", - "format": "table", - "instant": true, - "intervalFactor": 1, - "legendFormat": "{{ceph_daemon}}", - "refId": "A", - "textEditor": true - } - ], - "timeFrom": "2m", - "title": "", - "transform": "table", - "type": "table" - }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": "Local", - "description": "Commit latency is the time taken for writes to be flushed to disk as part of async kernel activity", - "fontSize": "100%", - "gridPos": { - "h": 8, - "w": 5, - "x": 17, - "y": 8 - }, - "height": "310", - "hideTimeOverride": true, - "id": 13, - "links": [], - "minSpan": 4, - "pageSize": 0, - "scroll": false, - "showHeader": true, - "sort": { - "col": 6, - "desc": true - }, - "styles": [ - { - "alias": "OSD ID", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "osd_num", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Commit Latency", - "colorMode": "row", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [ - "50", - "150" - ], - "type": "number", - "unit": "ms" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(\n topk($max_devices,\n ceph_osd_commit_latency_ms{ceph_daemon=~\"osd.[[osd_id]]\"} + on (ceph_daemon) (ceph_filestore_journal_latency_count * 0)\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n )", - "format": "table", - "instant": true, - "intervalFactor": 1, - "legendFormat": "{{ceph_daemon}}", - "refId": "A", - "textEditor": true - } - ], - "timeFrom": "2m", - "title": "", - "transform": "table", - "type": "table" - }, - { - "aliasColors": { - "95%ile Commit Latency": "#447EBC", - "Apply Latency Max": "#890F02" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Local", - "description": "Shows the latency for a given OSD, allowing you to compare a specific OSD against the $percentile%ile graph. Note that when the \"OSD Id\" pull-down shows **ALL**, the graph will be empty to avoid the chart being unreadable.", - "fill": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 16 - }, - "height": "300px", - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "maxDataPoints": "", - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Apply Latency Max", - "fill": 0 - }, - { - "alias": "95%ile Apply Latency", - "fill": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg(\n ceph_osd_commit_latency_ms{ceph_daemon=~\"osd.([[osd_id]])\"} + on (ceph_daemon) (ceph_filestore_journal_latency_count * 0)\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "commit", - "refId": "B", - "textEditor": true - }, - { - "expr": "avg(\n ceph_osd_apply_latency_ms{ceph_daemon=~\"osd.([[osd_id]])\"} + on (ceph_daemon) (ceph_filestore_journal_latency_count * 0)\n)", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "apply", - "refId": "A", - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Filestore Latency for OSD '$osd_id'", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Local", - "fill": 1, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "height": "300px", - "id": 15, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile(\n $percentile/100,\n ceph_osd_commit_latency_ms + on (ceph_daemon) (ceph_filestore_journal_latency_count * 0)\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "commit", - "refId": "B", - "textEditor": true - }, - { - "expr": "quantile(\n $percentile/100,\n ceph_osd_apply_latency_ms + on (ceph_daemon) (ceph_filestore_journal_latency_count * 0)\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "apply", - "refId": "A", - "textEditor": true - }, - { - "expr": "quantile(\n $percentile/100,\n rate(ceph_filestore_journal_latency_sum[$__interval]) / \n (rate(ceph_filestore_journal_latency_count[$__interval]) != 0)\n)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "journal", - "refId": "C", - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Filestore IO Summary - all OSD's @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "title": "Filestore OSD Latencies", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 26, - "panels": [ - { - "content": "

Ceph Bluestore I/O Process

\n

\nUnlike filestore, bluestore does not suffer from a double-write penalty (i.e write to journal then write to HDD). With bluestore, once a write is scheduled (submit and throttle latencies), it is done directly to the disk (AIO wait), and then the metadata relating to the object is changed (kv_latency). Writes are not considered complete until the kv store is updated.

The tables on the right focus on the top 10 Bluestore OSDs with the highest latencies.\n", - "gridPos": { - "h": 8, - "w": 6, - "x": 0, - "y": 9 - }, - "height": "300", - "id": 16, - "links": [], - "minSpan": 6, - "mode": "html", - "title": "", - "type": "text" - }, - { - "columns": [], - "datasource": "Local", - "description": "Bluestore OSDs", - "fontSize": "100%", - "gridPos": { - "h": 8, - "w": 2, - "x": 6, - "y": 9 - }, - "height": "310", - "hideTimeOverride": true, - "id": 17, - "links": [], - "minSpan": 2, - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 0, - "desc": true - }, - "styles": [ - { - "alias": "OSD ID", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(\n ceph_disk_occupation{ceph_daemon=~\"osd.($bluestore_osd_id)\"},\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "legendFormat": "{{osd_num}}", - "refId": "A", - "textEditor": true - } - ], - "timeFrom": "2m", - "timeShift": null, - "title": "", - "transform": "timeseries_aggregations", - "type": "table" - }, - { - "columns": [], - "datasource": "Local", - "description": "Time spent preparing the request (transaction)", - "fontSize": "100%", - "gridPos": { - "h": 8, - "w": 4, - "x": 8, - "y": 9 - }, - "height": "310", - "hideTimeOverride": true, - "id": 18, - "links": [], - "minSpan": 4, - "pageSize": 0, - "scroll": true, - "showHeader": true, - "sort": { - "col": null, - "desc": false - }, - "styles": [ - { - "alias": "OSD ID", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "osd_num", - "thresholds": [], - "type": "string", - "unit": "s" - }, - { - "alias": "Submit Latency", - "colorMode": "row", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [ - ".001", - ".003" - ], - "type": "number", - "unit": "s" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(\n (\n topk($max_devices,\n irate(ceph_bluestore_submit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_submit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "refId": "A", - "textEditor": true - } - ], - "timeFrom": "2m", - "title": "", - "transform": "table", - "type": "table" - }, - { - "columns": [], - "datasource": "Local", - "description": "Time requests wait due to throttling or busy conditions", - "fontSize": "100%", - "gridPos": { - "h": 8, - "w": 4, - "x": 12, - "y": 9 - }, - "height": "310", - "hideTimeOverride": true, - "id": 19, - "links": [], - "minSpan": 4, - "pageSize": 0, - "scroll": true, - "showHeader": true, - "sort": { - "col": 4, - "desc": false - }, - "styles": [ - { - "alias": "OSD ID", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "osd_num", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Throttle Latency", - "colorMode": "row", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [ - ".001", - ".003" - ], - "type": "number", - "unit": "s" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(\n (\n topk($max_devices,\n irate(ceph_bluestore_throttle_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_throttle_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "refId": "A", - "textEditor": true - } - ], - "timeFrom": "2m", - "title": "", - "transform": "table", - "type": "table" - }, - { - "columns": [], - "datasource": "Local", - "description": "Time spent waiting for the physical I/O request to complete", - "fontSize": "100%", - "gridPos": { - "h": 8, - "w": 4, - "x": 16, - "y": 9 - }, - "height": "310", - "hideTimeOverride": true, - "id": 20, - "links": [], - "minSpan": 4, - "pageSize": 0, - "scroll": true, - "showHeader": true, - "sort": { - "col": 5, - "desc": false - }, - "styles": [ - { - "alias": "OSD ID", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "osd_num", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "AIO Wait Time", - "colorMode": "row", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [ - ".020", - ".050" - ], - "type": "number", - "unit": "s" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(\n (\n topk($max_devices,\n irate(ceph_bluestore_state_aio_wait_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_state_aio_wait_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "refId": "A", - "textEditor": true - } - ], - "timeFrom": "2m", - "title": "", - "transform": "table", - "type": "table" - }, - { - "columns": [], - "datasource": "Local", - "description": "Time spent waiting for rocksdb (metadata store) to commit meta data", - "fontSize": "100%", - "gridPos": { - "h": 8, - "w": 4, - "x": 20, - "y": 9 - }, - "height": "310", - "hideTimeOverride": true, - "id": 21, - "links": [], - "minSpan": 4, - "pageSize": 0, - "scroll": true, - "showHeader": true, - "sort": { - "col": null, - "desc": false - }, - "styles": [ - { - "alias": "OSD ID", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "osd_num", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "KV Latency", - "colorMode": "row", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [ - ".020", - ".050" - ], - "type": "number", - "unit": "s" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(\n (\n topk($max_devices,\n irate(ceph_bluestore_kv_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_kv_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)", - "format": "table", - "instant": true, - "intervalFactor": 2, - "refId": "A", - "textEditor": true - } - ], - "timeFrom": "2m", - "title": "", - "transform": "table", - "type": "table" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Local", - "fill": 1, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 17 - }, - "height": "300", - "id": 22, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg(\n irate(ceph_bluestore_submit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_submit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Submit", - "refId": "A", - "textEditor": true - }, - { - "expr": "avg(\n irate(ceph_bluestore_throttle_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_throttle_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "Throttle", - "refId": "B", - "textEditor": true - }, - { - "expr": "avg(\n irate(ceph_bluestore_state_aio_wait_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_state_aio_wait_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "AIO Wait", - "refId": "C", - "textEditor": true - }, - { - "expr": "avg(\n irate(ceph_bluestore_kv_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_kv_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)", - "format": "time_series", - "hide": false, - "intervalFactor": 2, - "legendFormat": "KV Latency", - "refId": "D", - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Bluestore Latency for OSD '$osd_id'", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Local", - "description": "This charts shows the $percentile%ile latencies across all OSDs, which indicates overall performance, but does not represent any specific OSD", - "fill": 1, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 17 - }, - "height": "300px", - "id": 23, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile(\n $percentile/100,\n irate(ceph_bluestore_submit_lat_sum[$__interval]) / \n (irate(ceph_bluestore_submit_lat_count[$__interval]) != 0)\n)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Submit", - "refId": "A", - "textEditor": true - }, - { - "expr": "quantile(\n $percentile/100,\n irate(ceph_bluestore_throttle_lat_sum[$__interval]) / \n (irate(ceph_bluestore_throttle_lat_count[$__interval]) != 0)\n)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Throttle", - "refId": "B", - "textEditor": true - }, - { - "expr": "quantile(\n $percentile/100,\n irate(ceph_bluestore_state_aio_wait_lat_sum[$__interval]) / \n (irate(ceph_bluestore_state_aio_wait_lat_count[$__interval]) != 0)\n)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "AIO Wait", - "refId": "C", - "textEditor": true - }, - { - "expr": "quantile(\n $percentile/100,\n irate(ceph_bluestore_kv_lat_sum[$__interval]) / \n (irate(ceph_bluestore_kv_lat_count[$__interval]) != 0)\n)", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "KV Latency", - "refId": "D", - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "BlueStore IO Summary - all OSD's @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "title": "Bluestore OSD Latencies", - "type": "row" - } - ], - "refresh": "15s", - "schemaVersion": 16, - "style": "dark", - "tags": [ - "overview" - ], - "templating": { - "list": [ - { - "allValue": null, - "current": { - "selected": true, - "text": "95", - "value": "95" - }, - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "percentile", - "options": [ - { - "selected": true, - "text": "95", - "value": "95" - }, - { - "selected": false, - "text": "96", - "value": "96" - }, - { - "selected": false, - "text": "97", - "value": "97" - }, - { - "selected": false, - "text": "98", - "value": "98" - }, - { - "selected": false, - "text": "99", - "value": "99" - } - ], - "query": "95,96,97,98,99", - "type": "custom" - }, - { - "allValue": null, - "current": { - "selected": true, - "text": "10", - "value": "10" - }, - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "max_devices", - "options": [ - { - "selected": true, - "text": "10", - "value": "10" - } - ], - "query": "10", - "type": "custom" - }, - { - "allValue": ".*", - "current": {}, - "datasource": null, - "hide": 0, - "includeAll": true, - "label": "OSD Id", - "multi": false, - "name": "osd_id", - "options": [], - "query": "label_values(ceph_osd_metadata, ceph_daemon)", - "refresh": 1, - "regex": "/osd\\.(.*)/", - "sort": 3, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "bluestore_osd_id", - "options": [], - "query": "label_values(ceph_bluefs_bytes_written_wal, ceph_daemon)", - "refresh": 2, - "regex": "/osd\\.(.*)/", - "sort": 3, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": "", - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "filestore_osd_id", - "options": [], - "query": "label_values(ceph_filestore_journal_latency_count, ceph_daemon)", - "refresh": 2, - "regex": "/osd\\.(.*)/", - "sort": 3, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Ceph OSD Information", - "version": 31 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-pools.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-pools.json deleted file mode 100644 index 4f3473b..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-pools.json +++ /dev/null @@ -1,932 +0,0 @@ -{ - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.4" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Local", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "table", - "name": "Table", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1526263024209, - "links": [], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 14, - "panels": [], - "repeat": null, - "title": "Pool Overview", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 5, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 1 - }, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "(label_replace((rate(ceph_pool_rd{pool_id=~\"[[pool_id]]\"}[1m]) + rate(ceph_pool_wr{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) ", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{name}}", - "refId": "F" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Client IOPS by Pool", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 5, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 1 - }, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 2, - "links": [], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "(label_replace((rate(ceph_pool_rd_bytes{pool_id=~\"[[pool_id]]\"}[1m]) + rate(ceph_pool_wr_bytes{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) ", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{name}}", - "refId": "A", - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Client Throughput by Pool", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 15, - "panels": [], - "repeat": null, - "title": "Top 5's", - "type": "row" - }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": null, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 9 - }, - "id": 3, - "links": [], - "minSpan": 12, - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 6, - "desc": true - }, - "styles": [ - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Time", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "id", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "instance", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "job", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Pool Name", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "name", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Pool ID", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "pool_id", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "IOPS (R+W)", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "pattern": "Value", - "thresholds": [], - "type": "number", - "unit": "none" - } - ], - "targets": [ - { - "expr": "topk(5,(label_replace((irate(ceph_pool_rd{pool_id=~\"[[pool_id]]\"}[1m]) + irate(ceph_pool_wr{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) )", - "format": "table", - "instant": true, - "intervalFactor": 2, - "refId": "A", - "textEditor": true - } - ], - "title": "Top 5 Pools by Client IOPS", - "transform": "table", - "type": "table" - }, - { - "columns": [ - { - "text": "Current", - "value": "current" - } - ], - "datasource": null, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 9 - }, - "id": 4, - "links": [], - "minSpan": 12, - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 6, - "desc": true - }, - "styles": [ - { - "alias": "Time", - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "pattern": "Time", - "type": "hidden" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "id", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "instance", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "job", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Pool Name", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "name", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Pool ID", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "pool_id", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Throughput", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [], - "type": "number", - "unit": "decbytes" - } - ], - "targets": [ - { - "expr": "(label_replace((irate(ceph_pool_rd_bytes{pool_id=~\"[[pool_id]]\"}[1m]) + irate(ceph_pool_wr_bytes{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) ", - "format": "table", - "instant": true, - "intervalFactor": 2, - "refId": "A", - "textEditor": true - } - ], - "title": "Top 5 Pools by Throughput", - "transform": "table", - "type": "table" - }, - { - "columns": [], - "datasource": null, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 9 - }, - "id": 5, - "links": [], - "minSpan": 8, - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 5, - "desc": true - }, - "styles": [ - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Time", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "instance", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "job", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Pool Name", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "name", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Pool ID", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "pool_id", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Capacity Used", - "colorMode": "value", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [ - "70", - "85" - ], - "type": "number", - "unit": "percentunit" - } - ], - "targets": [ - { - "expr": "topk(5,((ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail)) * on(pool_id) group_left(name) ceph_pool_metadata))", - "format": "table", - "hide": false, - "instant": true, - "intervalFactor": 1, - "legendFormat": "", - "refId": "D" - } - ], - "title": "Top 5 Pools By Capacity Used", - "transform": "table", - "type": "table" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 16 - }, - "id": 16, - "panels": [ - { - "aliasColors": { - "read_op_per_sec": "#3F6833", - "write_op_per_sec": "#E5AC0E" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 17 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "(label_replace(irate(ceph_pool_rd[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"[[pool_name]]\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "reads", - "refId": "B" - }, - { - "expr": "(label_replace(irate(ceph_pool_wr[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"[[pool_name]]\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "writes", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Pool '$pool_name' Client IOPS", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - "read_op_per_sec": "#3F6833", - "write_op_per_sec": "#E5AC0E" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 17 - }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "(label_replace(irate(ceph_pool_rd_bytes[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"[[pool_name]]\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "reads", - "refId": "A" - }, - { - "expr": "(label_replace(irate(ceph_pool_wr_bytes[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"[[pool_name]]\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "writes", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Pool '$pool_name' Client Throughput", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": "pool_name", - "title": "Pool '$pool_name' Performance Details", - "type": "row" - } - ], - "refresh": "15s", - "schemaVersion": 16, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "pool_id", - "options": [], - "query": "label_values(ceph_pool_metadata,pool_id)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": true, - "label": "Pool Name", - "multi": false, - "name": "pool_name", - "options": [], - "query": "label_values(ceph_pool_metadata,name)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Ceph Pools", - "version": 16 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-rgw-workload.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-rgw-workload.json deleted file mode 100644 index 8585fc5..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/ceph-rgw-workload.json +++ /dev/null @@ -1,1180 +0,0 @@ -{ - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Local", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1530165442642, - "links": [], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 2, - "panels": [], - "title": "RGW Overview - All Gateways", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 1 - }, - "id": 29, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "avg(rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "GET AVG", - "refId": "A" - }, - { - "expr": "avg(rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "PUT AVG", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Average GET/PUT Latencies", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 7, - "x": 8, - "y": 1 - }, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(rgw_host) (label_replace(rate(ceph_rgw_req[30s]), \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{rgw_host}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Total Requests/sec by RGW Instance", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 15, - "y": 1 - }, - "id": 31, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "label_replace(rate(ceph_rgw_get_initial_lat_sum[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_get_initial_lat_count[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{rgw_host}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "GET Latencies by RGW Instance", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": false - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 7, - "w": 3, - "x": 21, - "y": 1 - }, - "id": 8, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(ceph_rgw_qlen)", - "format": "time_series", - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "Request Queue Length", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "avg" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Total bytes transferred in/out of all radosgw instances within the cluster", - "fill": 1, - "gridPos": { - "h": 6, - "w": 8, - "x": 0, - "y": 8 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(ceph_rgw_get_b[30s]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "GETs", - "refId": "A" - }, - { - "expr": "sum(rate(ceph_rgw_put_b[30s]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "PUTs", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Bandwidth Consumed by Type", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Total bytes transferred in/out through get/put operations, by radosgw instance", - "fill": 1, - "gridPos": { - "h": 6, - "w": 7, - "x": 8, - "y": 8 - }, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(rgw_host) (\n (label_replace(rate(ceph_rgw_get_b[30s]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")) + \n (label_replace(rate(ceph_rgw_put_b[30s]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\"))\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{rgw_host}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Bandwidth by RGW Instance", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts", - "fill": 1, - "gridPos": { - "h": 6, - "w": 6, - "x": 15, - "y": 8 - }, - "id": 32, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "label_replace(rate(ceph_rgw_put_initial_lat_sum[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_put_initial_lat_count[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{rgw_host}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "PUT Latencies by RGW Instance", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": false - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Failed HTTP Requests by RGW instance", - "fill": 1, - "gridPos": { - "h": 6, - "w": 3, - "x": 21, - "y": 8 - }, - "id": 41, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "label_replace(ceph_rgw_failed_req, \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{rgw_host}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Failed Requests", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": false, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": false - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 14 - }, - "id": 12, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 8, - "w": 6, - "x": 0, - "y": 15 - }, - "id": 34, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "scopedVars": { - "rgw_servers": { - "selected": false, - "text": "rgw.rhs-srv-01", - "value": "rgw.rhs-srv-01" - } - }, - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(ceph_rgw_get_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_get_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "GET", - "refId": "A" - }, - { - "expr": "rate(ceph_rgw_put_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_put_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "PUT", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$rgw_servers GET/PUT Latencies", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 8, - "w": 7, - "x": 6, - "y": 15 - }, - "id": 18, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "scopedVars": { - "rgw_servers": { - "selected": false, - "text": "rgw.rhs-srv-01", - "value": "rgw.rhs-srv-01" - } - }, - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "rate(ceph_rgw_get_b{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "GETs", - "refId": "B" - }, - { - "expr": "rate(ceph_rgw_put_b{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "PUTs", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Bandwidth by HTTP Operation", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "bytes", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - "GETs": "#7eb26d", - "Other": "#447ebc", - "PUTs": "#eab839", - "Requests": "#3f2b5b", - "Requests Failed": "#bf1b00" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 8, - "w": 7, - "x": 13, - "y": 15 - }, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "scopedVars": { - "rgw_servers": { - "selected": false, - "text": "rgw.rhs-srv-01", - "value": "rgw.rhs-srv-01" - } - }, - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "rate(ceph_rgw_failed_req{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Requests Failed", - "refId": "B" - }, - { - "expr": "rate(ceph_rgw_get{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "GETs", - "refId": "C" - }, - { - "expr": "rate(ceph_rgw_put{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "PUTs", - "refId": "D" - }, - { - "expr": "rate(ceph_rgw_req{ceph_daemon=~\"[[rgw_servers]]\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"[[rgw_servers]]\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"[[rgw_servers]]\"}[30s]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Other", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "HTTP Request Breakdown", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - "Failures": "#bf1b00", - "GETs": "#7eb26d", - "Other (HEAD,POST,DELETE)": "#447ebc", - "PUTs": "#eab839" - }, - "breakPoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": null, - "fontSize": "80%", - "format": "none", - "gridPos": { - "h": 8, - "w": 4, - "x": 20, - "y": 15 - }, - "id": 23, - "interval": null, - "legend": { - "show": true, - "values": true - }, - "legendType": "Under graph", - "links": [], - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "pie", - "scopedVars": { - "rgw_servers": { - "selected": false, - "text": "rgw.rhs-srv-01", - "value": "rgw.rhs-srv-01" - } - }, - "strokeWidth": 1, - "targets": [ - { - "expr": "rate(ceph_rgw_failed_req{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Failures", - "refId": "A" - }, - { - "expr": "rate(ceph_rgw_get{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "GETs", - "refId": "B" - }, - { - "expr": "rate(ceph_rgw_put{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "PUTs", - "refId": "C" - }, - { - "expr": "rate(ceph_rgw_req{ceph_daemon=~\"[[rgw_servers]]\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"[[rgw_servers]]\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"[[rgw_servers]]\"}[30s]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Other (DELETE,LIST)", - "refId": "D" - } - ], - "title": "Workload Breakdown", - "type": "grafana-piechart-panel", - "valueName": "current" - } - ], - "repeat": "rgw_servers", - "title": "RGW Host Detail : $rgw_servers", - "type": "row" - } - ], - "refresh": "15s", - "schemaVersion": 16, - "style": "dark", - "tags": [ - "overview" - ], - "templating": { - "list": [ - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "rgw_servers", - "options": [], - "query": "label_values(ceph_rgw_req, ceph_daemon)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Ceph RGW Workload", - "version": 26 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/cephmetrics-host.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/cephmetrics-host.json deleted file mode 100644 index c937e29..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/cephmetrics-host.json +++ /dev/null @@ -1,1000 +0,0 @@ -{ - "__inputs": [ - { - "name": "DS_LOCAL_PROMETHEUS", - "label": "local_prometheus", - "description": "", - "type": "datasource", - "pluginId": "prometheus", - "pluginName": "Prometheus" - } - ], - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.4" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Prometheus", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "links": [], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 9, - "panels": [], - "repeat": null, - "title": "CPU & RAM", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 1 - }, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 8, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "100 - (avg by (instance) (irate(node_cpu{job=\"cephmetrics\",mode=\"idle\"}[5m])) * 100)", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "legendFormat": "{{Busy %}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Cephmetrics Host CPU Busy %", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 1 - }, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 8, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "(avg by(mode) (irate(node_cpu{job=\"cephmetrics\",mode!=\"idle\"}[30s]))*100)", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{mode}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Cephmetrics Host CPU Usage Breakdown", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - "Installed": "#890f02" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 1 - }, - "id": 7, - "legend": { - "avg": false, - "current": true, - "max": false, - "min": false, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 8, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Installed", - "fill": 0, - "linewidth": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_MemTotal{job=\"cephmetrics\"} - node_memory_MemAvailable{job=\"cephmetrics\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "Used", - "refId": "C" - }, - { - "expr": "node_memory_MemTotal{job=\"cephmetrics\"} ", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Installed", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": "7d", - "timeShift": null, - "title": "RAM Usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 10, - "panels": [], - "repeat": null, - "title": "Disk", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 9 - }, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 8, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_io_time_ms{job='cephmetrics',device=~\"[h,s,v]d[a-z]\"}[1m]) / 10", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Disk Utilization", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "label": "%UTIL", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 9 - }, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 8, - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_reads_completed{job=\"cephmetrics\",device=~\"[h,s,v]d[a-z]\"}[30s]) + irate(node_disk_writes_completed{job=~\"cephmetrics\",device=~\"[h,s,v]d[a-z]\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Disk IOPS", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": "IOPS", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 9 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 8, - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "irate(node_disk_bytes_read{job='cephmetrics',device=~\"[h,s,v]d[a-z]\"}[1m]) + irate(node_disk_bytes_written{job='cephmetrics',device=~\"[h,s,v]d[a-z]\"}[1m]) ", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Disk Throughput", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": "Throughput", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 16 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 8, - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "(irate(node_disk_write_time_ms{job='cephmetrics',device=~\"[h,s,v]d[a-z]\"}[1m]) + irate(node_disk_read_time_ms{job='cephmetrics',device=~\"[h,s,v]d[a-z]\"}[1m])) / \n(irate(node_disk_reads_completed{job='cephmetrics',device=~\"[h,s,v]d[a-z]\"}[1m]) + irate(node_disk_writes_completed{job='cephmetrics',device=~\"[h,s,v]d[a-z]\"}[1m]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Disk Latency", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - "Size": "#890f02" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 16, - "x": 8, - "y": 16 - }, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Size", - "fill": 0, - "linewidth": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_filesystem_size{mountpoint='/var/lib/cephmetrics/data',fstype='xfs'} - node_filesystem_free{mountpoint='/var/lib/cephmetrics/data',fstype='xfs'}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Used", - "refId": "A" - }, - { - "expr": "node_filesystem_size{mountpoint='/var/lib/cephmetrics/data',fstype='xfs'}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Size", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Filesystem Usage - Prometheus Growth", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 23 - }, - "id": 11, - "panels": [], - "repeat": null, - "title": "Network", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 24 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n irate(node_network_receive_bytes{job='cephmetrics'}[1m]) \n ) by(instance)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Network RX", - "refId": "A" - }, - { - "expr": "sum(\n irate(node_network_transmit_bytes{job='cephmetrics'}[1m]) \n ) by(instance)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Network TX", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Network Load", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - "Size": "#890f02" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Shows data growth on '/' which includes the prometheus data. Prometheus installed 04/14, migrated to separate logical volume 04/22", - "fill": 1, - "gridPos": { - "h": 7, - "w": 16, - "x": 0, - "y": 31 - }, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "Size", - "fill": 0, - "linewidth": 2 - } - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_filesystem_size{mountpoint='/',fstype='xfs'} - node_filesystem_free{mountpoint='/',fstype='xfs'}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Used", - "refId": "A" - }, - { - "expr": "node_filesystem_size{mountpoint='/',fstype='xfs'}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Size", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": "14d", - "timeShift": null, - "title": "Filesystem Usage - '/'", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "refresh": "15s", - "schemaVersion": 16, - "style": "dark", - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Cephmetrics Host Metrics", - "version": 13 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/disk-busy-by-server.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/disk-busy-by-server.json deleted file mode 100644 index bc79b8e..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/disk-busy-by-server.json +++ /dev/null @@ -1,1520 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": true, - "links": [], - "refresh": "15s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "height": "400", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(label_replace((irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\"}[30s]) / 10),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{host}}", - "refId": "A", - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "OSD Hosts Disk Utilization Peak", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "", - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "osd_servers", - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatIteration": 1519090074308, - "repeatPanelId": 4, - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatIteration": 1519090074308, - "repeatPanelId": 4, - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatIteration": 1519090074308, - "repeatPanelId": 4, - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatIteration": 1519090074308, - "repeatPanelId": 4, - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatIteration": 1519090074308, - "repeatPanelId": 4, - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatIteration": 1519090074308, - "repeatPanelId": 4, - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatIteration": 1519090074308, - "repeatPanelId": 4, - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 10, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatIteration": 1519090074308, - "repeatPanelId": 4, - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 11, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatIteration": 1519090074308, - "repeatPanelId": 4, - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 12, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatIteration": 1519090074308, - "repeatPanelId": 4, - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatIteration": 1519090074308, - "repeatPanelId": 4, - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatIteration": 1519090074308, - "repeatPanelId": 4, - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 15, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatIteration": 1519090074308, - "repeatPanelId": 4, - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatIteration": 1519090074308, - "repeatPanelId": 4, - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 17, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 2, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "repeatIteration": 1519090074308, - "repeatPanelId": 4, - "seriesOverrides": [], - "spaceLength": 10, - "span": 2, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "quantile([[percentile]]/100,(irate(node_disk_io_time_ms{device=~\"[h,s,v]d[a-z]\",instance=~\"[[osd_servers]].*\"}[1m]) / 10))", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "all disks busy @ $percentile%ile", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Utilization @ $percentile%ile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": null, - "current": { - "text": "All", - "value": "$__all" - }, - "datasource": null, - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "osd_servers", - "options": [], - "query": "label_values(ceph_disk_occupation, instance)", - "refresh": 1, - "regex": "([^.]*).*", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "selected": true, - "text": "95", - "value": "95" - }, - "hide": 0, - "includeAll": false, - "label": "Percentile", - "multi": false, - "name": "percentile", - "options": [ - { - "selected": true, - "text": "95", - "value": "95" - }, - { - "selected": false, - "text": "96", - "value": "96" - }, - { - "selected": false, - "text": "97", - "value": "97" - }, - { - "selected": false, - "text": "98", - "value": "98" - }, - { - "selected": false, - "text": "99", - "value": "99" - } - ], - "query": "95,96,97,98,99", - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Disk Busy By Server" -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/iops-by-server.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/iops-by-server.json deleted file mode 100644 index b4da67b..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/iops-by-server.json +++ /dev/null @@ -1,295 +0,0 @@ -{ - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "4.6.3" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Local", - "version": "1.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [], - "refresh": "15s", - "rows": [ - { - "collapse": false, - "height": "250px", - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 0 - }, - "height": "400", - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [], - "spaceLength": 10, - "span": 12, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "label_replace(irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[1m]),\"host\",\"$1\",\"instance\",\"([^.]*).*\")", - "format": "time_series", - "hide": true, - "intervalFactor": 1, - "legendFormat": "{{host}}", - "refId": "B" - }, - { - "expr": "sum(\n sum(\n irate(node_disk_reads_completed{job=\"node\" }[1m]) + \n irate(node_disk_writes_completed{job=\"node\"}[1m]))\n by(instance, device) + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation)\n by(instance)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Disk IOPS Across All OSD Hosts", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "All Servers by IOPS", - "titleSize": "h6" - }, - { - "collapse": false, - "height": 250, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "osd_servers", - "seriesOverrides": [], - "spaceLength": 10, - "span": 4, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum(\n sum(\n irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[1m]) + irate(node_disk_writes_completed[1m]))\n by(instance,device) +\n ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation)\n \n \n", - "format": "time_series", - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Total IOPS for $osd_servers", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "IOPS Load by Server", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": "", - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": true, - "label": "OSD Host", - "multi": false, - "name": "osd_servers", - "options": [], - "query": "label_values(ceph_disk_occupation, instance)", - "refresh": 1, - "regex": "([^.]*).*", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "IOPS by Server", - "version": 6 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/iscsi-client-details.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/iscsi-client-details.json deleted file mode 100644 index f2bd608..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/iscsi-client-details.json +++ /dev/null @@ -1,447 +0,0 @@ -{ - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.4" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Local", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "table", - "name": "Table", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1526265032109, - "links": [], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 28, - "panels": [], - "repeat": null, - "title": "Client Details for $client_iqn", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 1 - }, - "id": 20, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "round(sum by(lun_name) (irate(ceph_iscsi_lun_iops{client_iqn=~\"[[client_iqn]]\"}[30s])))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{lun_name}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "IOPS Detail for $client_iqn", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 1 - }, - "id": 21, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "round(sum by(lun_name) (irate(ceph_iscsi_lun_read_bytes{client_iqn=~\"[[client_iqn]]\"}[30s])))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{lun_name}}(r)", - "refId": "A" - }, - { - "expr": "round(sum by(lun_name) (irate(ceph_iscsi_lun_write_bytes{client_iqn=~\"[[client_iqn]]\"}[30s])))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{lun_name}}(w)", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Throughput for $client_iqn", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "columns": [], - "datasource": null, - "fontSize": "100%", - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 8 - }, - "id": 24, - "links": [], - "minSpan": 12, - "pageSize": null, - "scroll": true, - "showHeader": true, - "sort": { - "col": 5, - "desc": true - }, - "styles": [ - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Time", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Gateway Owner", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "gw_owner", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "LUN Name (pool.image)", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "a_lun_name", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Size", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [], - "type": "number", - "unit": "bytes" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "max(label_replace(ceph_iscsi_client_lun{client_iqn=~\"[[client_iqn]]\"},\"a_lun_name\",\"$1\", \"lun_name\",\"(.*)\")) \n by(a_lun_name,client_iqn) +\non(a_lun_name) group_right(client_iqn) \n max(label_replace(ceph_iscsi_lun_mapped,\"a_lun_name\",\"$1\",\"lun_name\",\"(.*)\")) by(a_lun_name, gw_owner) +\non(a_lun_name) group_right(client_iqn, gw_owner) \n max(label_replace(ceph_iscsi_lun_size_bytes,\"a_lun_name\",\"$1\",\"lun_name\",\"(.*)\")) by(a_lun_name)\n\n", - "format": "table", - "hide": false, - "instant": true, - "intervalFactor": 1, - "refId": "C" - } - ], - "title": "LUN Details for $client_iqn", - "transform": "table", - "type": "table" - } - ], - "refresh": "15s", - "schemaVersion": 16, - "style": "dark", - "tags": [ - "iscsi" - ], - "templating": { - "list": [ - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 0, - "includeAll": false, - "label": "Client IQN", - "multi": false, - "name": "client_iqn", - "options": [], - "query": "label_values(ceph_iscsi_client_login,client_iqn)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "gateway_iqn", - "options": [], - "query": "label_values(ceph_iscsi_gateway_tpg_total, gw_iqn)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "(eth|bon|en|ib|mlx)", - "value": "(eth|bon|en|ib|mlx)" - }, - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "interfaces", - "options": [ - { - "selected": true, - "text": "(eth|bon|en|ib|mlx)", - "value": "(eth|bon|en|ib|mlx)" - } - ], - "query": "(eth|bon|en|ib|mlx)", - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "iSCSI Client Details", - "version": 7 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/iscsi-overview.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/iscsi-overview.json deleted file mode 100644 index d45c15e..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/iscsi-overview.json +++ /dev/null @@ -1,1554 +0,0 @@ -{ - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "grafana-piechart-panel", - "name": "Pie Chart", - "version": "1.3.3" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Local", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "text", - "name": "Text", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1530144424365, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "tags": [ - "iscsi" - ], - "targetBlank": true, - "title": "Clients", - "type": "dashboards" - } - ], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 25, - "panels": [], - "repeat": null, - "title": "iSCSI Gateway Group : $gateway_iqn", - "type": "row" - }, - { - "content": "", - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 1 - }, - "id": 8, - "links": [], - "minSpan": 4, - "mode": "markdown", - "title": "", - "transparent": true, - "type": "text" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 2, - "x": 4, - "y": 1 - }, - "id": 1, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "max(ceph_iscsi_gateway_tpg_total)", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "Gateways", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 2, - "x": 6, - "y": 1 - }, - "id": 3, - "interval": null, - "links": [ - { - "dashUri": "db/iscsi-client-details", - "dashboard": "iSCSI Client Details", - "targetBlank": true, - "title": "iSCSI Client Details", - "type": "dashboard" - } - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "max(count by(instance) (ceph_iscsi_client_login))", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "Clients", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 2, - "x": 8, - "y": 1 - }, - "id": 4, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "max(sum(ceph_iscsi_client_login) by(gw_name))", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "Sessions", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": null, - "decimals": 0, - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 10, - "y": 1 - }, - "id": 5, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "max(sum(ceph_iscsi_lun_size_bytes) by(gw_name))", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "Defined Capacity", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": null, - "decimals": 0, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 2, - "x": 14, - "y": 1 - }, - "id": 6, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "max(count by(instance) (ceph_iscsi_lun_mapped))", - "format": "time_series", - "intervalFactor": 2, - "refId": "B" - } - ], - "thresholds": "", - "title": "LUNs", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": null, - "decimals": 0, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 2, - "x": 16, - "y": 1 - }, - "id": 7, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 2, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "max(count by(instance) (ceph_iscsi_lun_mapped)) - max(count by(instance) (ceph_iscsi_lun_mapped == 1))", - "format": "time_series", - "hide": false, - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "Unused LUNs", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "content": "", - "gridPos": { - "h": 4, - "w": 6, - "x": 18, - "y": 1 - }, - "id": 9, - "links": [], - "minSpan": 6, - "mode": "markdown", - "title": "", - "transparent": true, - "type": "text" - }, - { - "content": "", - "gridPos": { - "h": 4, - "w": 2, - "x": 0, - "y": 5 - }, - "id": 10, - "links": [], - "minSpan": 2, - "mode": "markdown", - "title": "", - "transparent": true, - "type": "text" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": null, - "decimals": 0, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 2, - "y": 5 - }, - "height": "200", - "id": 11, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(sum(rate(ceph_iscsi_lun_iops[30s])) by(gw_name))", - "format": "time_series", - "instant": true, - "intervalFactor": 2, - "refId": "A" - } - ], - "thresholds": "", - "title": "IOPS", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": null, - "decimals": 1, - "format": "decbytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 6, - "y": 5 - }, - "height": "200", - "id": 12, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": "/s", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum((sum(rate(ceph_iscsi_lun_read_bytes[30s])) by(gw_name)) + (sum(rate(ceph_iscsi_lun_write_bytes[30s])) by(gw_name)))", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "Throughput", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 5, - "w": 6, - "x": 10, - "y": 5 - }, - "height": "200", - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 6, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "label_replace(\n (sum(\n rate(\n node_network_transmit_bytes{job=\"node\", instance=~\"($iscsi_gws).*\"}[30s])\n ) by(instance)),\n \"gw_name\", \"$1\",\"instance\",\"([^.]*).*\")", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{gw_name}}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Network Load by Gateway", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "breakPoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 - }, - "datasource": null, - "description": "LUNs are configured with a primary path (active), and a number of secondary paths (passive). Under normal circumstances, only the active/primary path is used for I/O. This chart shows the distribution of the active paths across each of the gateways.", - "fontSize": "80%", - "format": "short", - "gridPos": { - "h": 5, - "w": 6, - "x": 16, - "y": 5 - }, - "id": 14, - "interval": null, - "legend": { - "show": true, - "values": true - }, - "legendType": "Under graph", - "links": [], - "maxDataPoints": 3, - "minSpan": 6, - "nullPointMode": "connected", - "pieType": "pie", - "strokeWidth": 1, - "targets": [ - { - "expr": "max(count(ceph_iscsi_lun_mapped) by(instance,gw_owner)) by(gw_owner)", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "legendFormat": "{{gw_owner}}", - "refId": "A" - } - ], - "title": "Active LUN Paths by Gateway", - "type": "grafana-piechart-panel", - "valueName": "current" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 26, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Local", - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 11 - }, - "id": 15, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "round(\n sum by(gw_name) \n (rate(ceph_iscsi_lun_iops[30s])\n )\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{gw_name}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "IOPS by Gateway", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 11 - }, - "id": 16, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "round(\n sum by(gw_name) \n ((rate(ceph_iscsi_lun_read_bytes[30s])) + \n (rate(ceph_iscsi_lun_write_bytes[30s]))\n )\n )", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{gw_name}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Throughput by Gateway", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Local", - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 18 - }, - "id": 22, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "100 - (avg by (gw) \n (label_replace(\n irate(node_cpu{job=\"node\",instance=~\"($iscsi_gws).*\",mode=\"idle\"}[30s]),\n \"gw\",\"$1\",\"instance\",\"([^.]*).*\")\n ) * 100)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{gw}}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "CPU Busy %", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Local", - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 18 - }, - "id": 23, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "label_replace(\n ((node_memory_MemTotal{job=\"node\", instance=~\"($iscsi_gws).*\"} - node_memory_MemFree) / node_memory_MemTotal) * 100, \n \"gw\", \"$1\", \"instance\",\"([^.]*).*\")", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{gw}}", - "refId": "D" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "RAM Util%", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "repeat": null, - "title": "Gateway Load", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 11 - }, - "id": 27, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Local", - "fill": 1, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 12 - }, - "id": 17, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 24, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "round(sum by(shortname) (label_replace(rate(ceph_iscsi_lun_iops[30s]),\"shortname\",\"$2\",\"client_iqn\",\"(.*):(.*)\")))", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "legendFormat": "{{shortname}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "IOPS by Client (R+W)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Local", - "fill": 1, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 19 - }, - "id": 19, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 24, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "round(\n sum by(shortname) \n ((label_replace(rate(ceph_iscsi_lun_read_bytes[30s]),\"shortname\",\"$2\",\"client_iqn\",\"(.*):(.*)\")) +\n (label_replace(rate(ceph_iscsi_lun_write_bytes[30s]),\"shortname\",\"$2\",\"client_iqn\",\"(.*):(.*)\"))\n ))", - "format": "time_series", - "instant": false, - "intervalFactor": 1, - "legendFormat": "{{shortname}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Throughput by Client", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "title": "Client Workloads", - "type": "row" - } - ], - "refresh": "15s", - "schemaVersion": 16, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "gateway_iqn", - "options": [], - "query": "label_values(ceph_iscsi_gateway_tpg_total, gw_iqn)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "(eth|bon|en|ib|mlx)", - "value": "(eth|bon|en|ib|mlx)" - }, - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "interfaces", - "options": [ - { - "selected": true, - "text": "(eth|bon|en|ib|mlx)", - "value": "(eth|bon|en|ib|mlx)" - } - ], - "query": "(eth|bon|en|ib|mlx)", - "type": "custom" - }, - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "iscsi_gws", - "options": [], - "query": "label_values(ceph_iscsi_scrape_duration_seconds, gw_name)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "iSCSI Overview", - "version": 38 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/latency-by-server.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/latency-by-server.json deleted file mode 100644 index 7e9510f..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/latency-by-server.json +++ /dev/null @@ -1,304 +0,0 @@ -{ - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.4" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Local", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1524194437238, - "links": [], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 18, - "panels": [], - "repeat": null, - "title": "All OSD Hosts", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(\n (irate(node_disk_read_time_ms[30s]) + irate(node_disk_write_time_ms[30s]) / \n (irate(node_disk_reads_completed[30s]) + irate(node_disk_writes_completed[30s])) +\n ignoring(ceph_daemon,job) ceph_disk_occupation))\n by(instance)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "All OSD Hosts - Highest Latency", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": "ms", - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 19, - "panels": [], - "repeat": null, - "title": "Each OSD Host's Max Disk Latency", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 4, - "x": 0, - "y": 9 - }, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - { - "dashUri": "db/osd-node-detail", - "dashboard": "OSD Node Detail", - "includeVars": true, - "targetBlank": true, - "title": "OSD Node Detail", - "type": "dashboard" - } - ], - "minSpan": 4, - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": "osd_servers", - "repeatDirection": "h", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max(label_replace((irate(node_disk_write_time_ms{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_read_time_ms{instance=~\"[[osd_servers]].*\"}[30s])) / \n(irate(node_disk_writes_completed{instance=~\"[[osd_servers]].*\"}[30s]) + irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[30s])),\"host\",\"$1\",\"instance\",\"([^.]*).*\")) by(host)", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Max Latency", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": "ms", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "refresh": "15s", - "schemaVersion": 16, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "osd_servers", - "options": [], - "query": "label_values(ceph_disk_occupation, instance)", - "refresh": 1, - "regex": "([^.]*).*", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Latency by Server", - "version": 12 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/mds-performance.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/mds-performance.json deleted file mode 100644 index 6ef2390..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/mds-performance.json +++ /dev/null @@ -1,546 +0,0 @@ -{ - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.4" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Local", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1525407331553, - "links": [], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 8, - "panels": [], - "title": "MDS Overview", - "type": "row" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 3, - "x": 0, - "y": 1 - }, - "id": 11, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(ceph_mds_metadata)", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "Active MDS Servers", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 3, - "x": 3, - "y": 1 - }, - "id": 6, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(ceph_fs_metadata)", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "Filesystems", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 5, - "w": 3, - "x": 6, - "y": 1 - }, - "id": 12, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": true - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(ceph_mds_sessions_session_count)", - "format": "time_series", - "instant": true, - "intervalFactor": 1, - "refId": "A" - } - ], - "thresholds": "", - "title": "Clients", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 6 - }, - "id": 10, - "panels": [], - "title": "MDS Performance", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 7 - }, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(ceph_objecter_op_r{ceph_daemon=~\"($mds_servers).*\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "MDS Reads", - "refId": "A" - }, - { - "expr": "sum(ceph_objecter_op_w{ceph_daemon=~\"($mds_servers).*\"})", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "MDS Writes", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "MDS Workload - $mds_servers", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 7 - }, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "ceph_mds_server_handle_client_request{ceph_daemon=~\"($mds_servers).*\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{ceph_daemon}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Client Request Load - $mds_servers", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "refresh": "15s", - "schemaVersion": 16, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 0, - "includeAll": true, - "label": "MDS Server", - "multi": false, - "name": "mds_servers", - "options": [], - "query": "label_values(ceph_mds_inodes, ceph_daemon)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "MDS Performance", - "version": 3 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/network-usage-by-node.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/network-usage-by-node.json deleted file mode 100644 index 7349f9f..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/network-usage-by-node.json +++ /dev/null @@ -1,456 +0,0 @@ -{ - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.4" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Local", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1525134169600, - "links": [], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 4, - "panels": [], - "repeat": null, - "title": "Aggregated Network Load", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 1, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum (irate(node_network_receive_bytes{instance=~\"([[mon_servers]]).*\",device=~\"[[interfaces]].*\"}[30s])) + \nsum (irate(node_network_transmit_bytes{instance=~\"([[mon_servers]]).*\",device=~\"[[interfaces]].*\"}[30s]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "MONs", - "refId": "A" - }, - { - "expr": "sum (irate(node_network_receive_bytes{instance=~\"[[osd_servers]].*\",device=~\"[[interfaces]].*\"}[30s])) + \nsum (irate(node_network_transmit_bytes{instance=~\"[[osd_servers]].*\",device=~\"[[interfaces]].*\"}[30s]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "OSDs", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Cluster Network Load", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 5, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Local", - "fill": 1, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 2, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum (irate(node_network_receive_bytes{instance=~\"([[mon_servers]]).*\", device=~\"[[interfaces]].*\"}[30s])) by (instance) + \nsum (irate(node_network_transmit_bytes{instance=~\"([[mon_servers]]).*\", device=~\"[[interfaces]].*\"}[30s])) by (instance)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "MON Network Load", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "title": "MON Hosts", - "type": "row" - }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 6, - "panels": [ - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Local", - "fill": 1, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 11 - }, - "id": 3, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum \n (irate(node_network_receive_bytes{instance=~\"[[osd_servers]]\", device=~\"[[interfaces]].*\"}[30s]) + \n irate(node_network_transmit_bytes{instance=~\"[[osd_servers]]\", device=~\"[[interfaces]].*\"}[30s])) by(instance)\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "OSD Hosts Network Load", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "title": "OSD Hosts", - "type": "row" - } - ], - "refresh": "15s", - "schemaVersion": 16, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "osd_servers", - "options": [], - "query": "label_values(ceph_disk_occupation, instance)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "only_osds", - "options": [], - "query": "label_values(ceph_server_metadata{services=\"osd\"}, hostname)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "mon_servers", - "options": [], - "query": "label_values(ceph_mon_quorum_status, ceph_daemon)", - "refresh": 1, - "regex": "/mon.(.*)/", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "(eth|en|bond|mlx|ib)", - "value": "(eth|en|bond|mlx|ib)" - }, - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "interfaces", - "options": [ - { - "selected": true, - "text": "(eth|en|bond|mlx|ib)", - "value": "(eth|en|bond|mlx|ib)" - } - ], - "query": "(eth|en|bond|mlx|ib)", - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "Network Usage by Server", - "version": 19 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/osd-node-detail.json b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/osd-node-detail.json deleted file mode 100644 index 2dc231b..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/osd-node-detail.json +++ /dev/null @@ -1,1150 +0,0 @@ -{ - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.4" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "datasource", - "id": "prometheus", - "name": "Local", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "singlestat", - "name": "Singlestat", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "table", - "name": "Table", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1526509711107, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "tags": [ - "overview" - ], - "title": "Shortcuts", - "type": "dashboards" - } - ], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 11, - "panels": [], - "repeat": null, - "title": "'$osd_servers' OSD Overview", - "type": "row" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 1 - }, - "height": "160", - "id": 1, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "count(ceph_disk_occupation{device=~\"($device_id)\", instance=~\"($osd_servers).*\"})", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 40, - "textEditor": true - } - ], - "thresholds": "", - "title": "OSDs", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "columns": [], - "datasource": null, - "description": "", - "fontSize": "100%", - "gridPos": { - "h": 8, - "w": 13, - "x": 4, - "y": 1 - }, - "height": "160", - "hideTimeOverride": false, - "id": 3, - "links": [], - "minSpan": 6, - "pageSize": 20, - "scroll": true, - "showHeader": true, - "sort": { - "col": 1, - "desc": false - }, - "styles": [ - { - "alias": "Hostname", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "aa_hostname", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "OSD", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "ceph_daemon", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Device", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "device", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "Device Type", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "device_class", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Size", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "Value", - "thresholds": [], - "type": "number", - "unit": "bytes" - }, - { - "alias": "Ceph Version", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "ceph_ver", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "hidden", - "unit": "short" - } - ], - "targets": [ - { - "expr": "(label_replace(ceph_disk_occupation{instance=~\"($osd_servers).*\",device=~\"($device_id)\"},\"aa_hostname\",\"$1\",\"instance\",\"(.*)\") * \n on(ceph_daemon) group_left(aa_instance) ceph_osd_stat_bytes) *\n on(ceph_daemon) group_left(device_class,ceph_ver) label_replace(label_replace(ceph_osd_metadata,\"ceph_daemon\",\"osd.$1\",\"id\",\"(.*)\"),\"ceph_ver\",\"$1\",\"ceph_version\",\"ceph version (.*) (.*) (.*) (.*)\")", - "format": "table", - "hide": false, - "instant": true, - "intervalFactor": 1, - "refId": "C" - } - ], - "timeFrom": null, - "title": "Host OSD Breakdown", - "transform": "table", - "type": "table" - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "datasource": null, - "decimals": 0, - "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.", - "format": "bytes", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 5 - }, - "height": "160", - "id": 2, - "interval": null, - "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": "", - "minSpan": 4, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{device=~\"($device_id)\", instance=~\"($osd_servers).*\"})", - "format": "time_series", - "intervalFactor": 2, - "refId": "A", - "step": 40, - "textEditor": true - } - ], - "thresholds": "", - "title": "Raw Capacity", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 12, - "panels": [], - "repeat": null, - "title": "'$osd_servers' Performance Statistics", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 10 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by (device) (\n irate(node_disk_io_time_ms[5m]) and on (instance, device) ceph_disk_occupation{instance=~\"($osd_servers).*\", device=~\"($device_id)\"}\n) / 10", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{device}}", - "refId": "A", - "step": 10, - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk utilisation", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "%Util", - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 10 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by (device) (\n irate(node_disk_reads_completed{device=~\"($device_id)\", instance=~\"($osd_servers).*\"}[5m]) +\n irate(node_disk_writes_completed{device=~\"($device_id)\", instance=~\"($osd_servers).*\"}[5m])\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}}", - "refId": "A", - "step": 10, - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk IOPS", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "IOPS", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 17 - }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by (device) (\n irate(node_disk_write_time_ms{device=~ \"($device_id)\", instance=~\"($osd_servers).*\"}[5m])\n /\n clamp_min(irate(node_disk_writes_completed{device=~ \"($device_id)\", instance=~\"($osd_servers).*\"}[5m]), 0.001)\n+\n irate(node_disk_read_time_ms{device=~ \"($device_id)\", instance=~\"($osd_servers).*\"}[5m])\n /\n clamp_min(irate(node_disk_reads_completed{device=~ \"($device_id)\", instance=~\"($osd_servers).*\"}[5m]), 0.001)\n)", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{device}}", - "refId": "A", - "step": 10, - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Disk Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ms", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 17 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by (device) (\n irate(node_disk_bytes_read{device=~\"($device_id)\", instance=~\"($osd_servers).*\"}[5m]) + \n irate(node_disk_bytes_written{device=~\"($device_id)\", instance=~\"($osd_servers).*\"}[5m])\n)", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{device}}", - "refId": "A", - "step": 10, - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Throughput by Disk", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - "interrupt": "#447EBC", - "steal": "#6D1F62", - "system": "#890F02", - "user": "#3F6833", - "wait": "#C15C17" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown", - "fill": 3, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 24 - }, - "id": 9, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (mode) (\n irate(node_cpu{instance=~\"($osd_servers).*\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[5m])\n) / scalar(\n sum(irate(node_cpu{instance=~\"($osd_servers).*\"}[5m]))\n) * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{mode}}", - "refId": "A", - "step": 10, - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers CPU Utilisation", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "", - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - "Available": "#508642", - "Free": "#508642", - "Total": "#bf1b00", - "Used": "#bf1b00", - "total": "#bf1b00", - "used": "#0a50a1" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 24 - }, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - { - "alias": "total", - "color": "#bf1b00", - "fill": 0, - "linewidth": 2, - "stack": false - } - ], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(node_memory_MemTotal{instance=~\"[[osd_servers]].*\"}) - (\n sum(node_memory_MemFree{instance=~\"[[osd_servers]].*\"}) + \n sum(node_memory_Cached{instance=~\"[[osd_servers]].*\"}) + \n sum(node_memory_Buffers{instance=~\"[[osd_servers]].*\"}) +\n sum(node_memory_Slab{instance=~\"[[osd_servers]].*\"})\n )\n \n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "used", - "refId": "D" - }, - { - "expr": "sum(node_memory_MemFree{instance=~\"[[osd_servers]].*\"}) ", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Free", - "refId": "A" - }, - { - "expr": "sum(node_memory_Cached{instance=~\"[[osd_servers]].*\"}) + \nsum(node_memory_Buffers{instance=~\"[[osd_servers]].*\"}) +\nsum(node_memory_Slab{instance=~\"[[osd_servers]].*\"}) \n", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "buffers/cache", - "refId": "C" - }, - { - "expr": "sum(node_memory_MemTotal{instance=~\"[[osd_servers]].*\"})", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "total", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "RAM Usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 24 - }, - "id": 10, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "hideZero": true, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (device) (irate(node_network_receive_bytes{instance=~\"($osd_servers).*\", device=~\"(eth|en|bond|ib|mlx).*\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}}.rx", - "refId": "A", - "step": 10, - "textEditor": true - }, - { - "expr": "sum by (device) (irate(node_network_transmit_bytes{instance=~\"($osd_servers).*\", device=~\"(eth|en|bond|ib|mlx).*\"}[5m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}}.tx", - "refId": "B", - "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$osd_servers Network Load", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "refresh": "10s", - "schemaVersion": 16, - "style": "dark", - "tags": [ - "overview" - ], - "templating": { - "list": [ - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 0, - "includeAll": true, - "label": "OSD Host Name", - "multi": false, - "name": "osd_servers", - "options": [], - "query": "label_values(ceph_disk_occupation, instance)", - "refresh": 1, - "regex": "([^.]*).*", - "sort": 3, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": null, - "hide": 0, - "includeAll": true, - "label": "Disk Name", - "multi": true, - "name": "device_id", - "options": [], - "query": "ceph_disk_occupation", - "refresh": 1, - "regex": "/device=\"([^\"]*)\"/", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "selected": true, - "text": "60", - "value": "60" - }, - "hide": 2, - "includeAll": false, - "label": null, - "multi": false, - "name": "max_devices", - "options": [ - { - "selected": true, - "text": "60", - "value": "60" - } - ], - "query": "60", - "type": "custom" - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "OSD Node Detail", - "version": 15 -} diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/tests/__init__.py b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/tests/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/tests/test_mgr_dashboards.py b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/tests/test_mgr_dashboards.py deleted file mode 100644 index 4d417f4..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/tests/test_mgr_dashboards.py +++ /dev/null @@ -1,66 +0,0 @@ -import pytest - -from .util import TestDashboards, get_dashboards - - -def walk(obj, callback, parent_key=None, path=None): - if path is None: - path = '.' - if isinstance(obj, dict): - for key, value in obj.items(): - walk( - value, - callback, - parent_key=key, - path='{}["{}"]'.format(path, key), - ) - elif isinstance(obj, list): - for i in range(len(obj)): - walk( - obj[i], - callback, - parent_key=parent_key, - path='{}[{}]'.format(path, i), - ) - else: - callback(obj, parent_key, path) - - -class TestMgrDashboards(TestDashboards): - dashboards = get_dashboards() - - @pytest.mark.parametrize("name", dashboards.keys()) - def test_type(self, name): - assert name - obj = self.dashboards[name] - assert type(obj) is dict - - @pytest.mark.parametrize("name", dashboards.keys()) - def test_no_collectd(self, name): - def test(item, pkey, path): - if type(item) in (basestring, unicode): - assert 'collectd' not in item - walk(self.dashboards[name], test) - - @pytest.mark.parametrize("name", dashboards.keys()) - def test_no_ds_local(self, name): - def test(item, pkey, path): - if type(item) in (basestring, unicode): - assert '${DS_LOCAL}' not in item - walk(self.dashboards[name], test) - - @pytest.mark.parametrize("name", dashboards.keys()) - def test_no_influxdb_dstype(self, name): - def test(item, pkey, path): - if pkey == 'dsType' and type(item) in (basestring, unicode): - assert 'influxdb' not in item - walk(self.dashboards[name], test) - - @pytest.mark.parametrize("name", dashboards.keys()) - def test_no_influxdb_query(self, name): - def test(item, pkey, path): - if pkey == 'query': - assert 'SELECT' not in item - assert 'FROM' not in item - assert 'WHERE' not in item - walk(self.dashboards[name], test) diff --git a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/tests/util.py b/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/tests/util.py deleted file mode 120000 index 47f38a1..0000000 --- a/ansible/roles/ceph-grafana/files/dashboards/mgr-prometheus/tests/util.py +++ /dev/null @@ -1 +0,0 @@ -../../tests/util.py \ No newline at end of file diff --git a/ansible/roles/ceph-grafana/tasks/push_dashboards.yml b/ansible/roles/ceph-grafana/tasks/push_dashboards.yml index a2b5f22..48e5e64 100644 --- a/ansible/roles/ceph-grafana/tasks/push_dashboards.yml +++ b/ansible/roles/ceph-grafana/tasks/push_dashboards.yml @@ -11,7 +11,7 @@ get_mime: false connection: local become: false - with_fileglob: "dashboards/{{ backend.metrics }}-{{ backend.storage }}/*.json" + with_fileglob: "dashboards/cephmetrics/*.json" register: dashboard_files no_log: true