From: Jan Fajerski Date: Wed, 6 Mar 2019 08:25:24 +0000 (+0100) Subject: monitoring: update Grafana dashboards X-Git-Tag: v15.1.0~2671^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=e7a4437fdcfb5ee2156262f7692ebda5f2dab931;p=ceph-ci.git monitoring: update Grafana dashboards Fix various panels that used outdated metric names, cluncky or unnecessary label_replace calls. Also unify the style of many panels. Fixes: http://tracker.ceph.com/issues/39652 Signed-off-by: Jan Fajerski --- diff --git a/monitoring/grafana/dashboards/ceph-cluster.json b/monitoring/grafana/dashboards/ceph-cluster.json index 386f71688d4..eb691a1be30 100644 --- a/monitoring/grafana/dashboards/ceph-cluster.json +++ b/monitoring/grafana/dashboards/ceph-cluster.json @@ -35,7 +35,7 @@ "list": [] }, "description": "Ceph cluster overview", - "editable": true, + "editable": false, "gnetId": null, "graphTooltip": 0, "id": null, @@ -52,7 +52,7 @@ "rgb(255, 0, 0)" ], "datasource": "$datasource", - "editable": true, + "editable": false, "error": false, "format": "none", "gauge": { @@ -850,7 +850,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(deriv(ceph_pool_bytes_used{instance=~\"$instance\"}[1m]))", + "expr": "sum(deriv(ceph_pool_stored{instance=~\"$instance\"}[1m]))", "format": "time_series", "intervalFactor": 1, "refId": "A" diff --git a/monitoring/grafana/dashboards/cephfs-overview.json b/monitoring/grafana/dashboards/cephfs-overview.json index 70613924279..b131cc2079f 100644 --- a/monitoring/grafana/dashboards/cephfs-overview.json +++ b/monitoring/grafana/dashboards/cephfs-overview.json @@ -1,10 +1,11 @@ { + "__inputs": [], "__requires": [ { "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "5.0.0" + "version": "5.3.2" }, { "type": "panel", @@ -30,7 +31,7 @@ "gnetId": null, "graphTooltip": 0, "id": null, - "iteration": 1534386614546, + "iteration": 1557392920097, "links": [], "panels": [ { @@ -52,7 +53,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 0, "gridPos": { "h": 9, "w": 12, @@ -77,7 +78,12 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], "spaceLength": 10, "stack": true, "steppedLine": false, @@ -86,14 +92,14 @@ "expr": "sum(ceph_objecter_op_r{ceph_daemon=~\"($mds_servers).*\"})", "format": "time_series", "intervalFactor": 1, - "legendFormat": "MDS Reads", + "legendFormat": "Read Ops", "refId": "A" }, { "expr": "sum(ceph_objecter_op_w{ceph_daemon=~\"($mds_servers).*\"})", "format": "time_series", "intervalFactor": 1, - "legendFormat": "MDS Writes", + "legendFormat": "Write Ops", "refId": "B" } ], @@ -117,7 +123,7 @@ "yaxes": [ { "format": "none", - "label": null, + "label": "Reads(-) / Writes (+)", "logBase": 1, "max": null, "min": "0", @@ -131,7 +137,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -139,7 +149,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 0, "gridPos": { "h": 9, "w": 12, @@ -197,7 +207,7 @@ "yaxes": [ { "format": "none", - "label": null, + "label": "Client Requests", "logBase": 1, "max": null, "min": "0", @@ -211,7 +221,11 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "refresh": "15s", @@ -222,7 +236,6 @@ "list": [ { "current": { - "tags": [], "text": "default", "value": "default" }, @@ -233,6 +246,7 @@ "query": "prometheus", "refresh": 1, "regex": "", + "skipUrlSync": false, "type": "datasource" }, { @@ -248,6 +262,7 @@ "query": "label_values(ceph_mds_inodes, ceph_daemon)", "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], @@ -289,6 +304,6 @@ }, "timezone": "", "title": "MDS Performance", - "uid": "rRfFzWtik", + "uid": "tbO9LAiZz", "version": 2 } diff --git a/monitoring/grafana/dashboards/host-details.json b/monitoring/grafana/dashboards/host-details.json index 47df2dd4e06..df68b65e34b 100644 --- a/monitoring/grafana/dashboards/host-details.json +++ b/monitoring/grafana/dashboards/host-details.json @@ -1,10 +1,11 @@ { + "__inputs": [], "__requires": [ { "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "5.0.0" + "version": "5.3.2" }, { "type": "panel", @@ -36,7 +37,7 @@ "gnetId": null, "graphTooltip": 0, "id": null, - "iteration": 1538021829737, + "iteration": 1557386759572, "links": [ { "asDropdown": true, @@ -157,7 +158,7 @@ "dashes": false, "datasource": "$datasource", "description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown", - "fill": 3, + "fill": 1, "gridPos": { "h": 10, "w": 6, @@ -219,11 +220,11 @@ }, "yaxes": [ { - "format": "short", - "label": "", + "format": "percent", + "label": "% Utilization", "logBase": 1, "max": "100", - "min": "0", + "min": null, "show": true }, { @@ -234,7 +235,11 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -339,7 +344,7 @@ "yaxes": [ { "format": "bytes", - "label": null, + "label": "RAM used", "logBase": 1, "max": null, "min": "0", @@ -353,7 +358,11 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -362,7 +371,7 @@ "dashes": false, "datasource": "$datasource", "description": "Show the network load (rx,tx) across all interfaces (excluding loopback 'lo')", - "fill": 1, + "fill": 0, "gridPos": { "h": 10, "w": 6, @@ -391,9 +400,14 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/.*tx/", + "transform": "negative-Y" + } + ], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { @@ -434,10 +448,10 @@ "yaxes": [ { "format": "decbytes", - "label": null, + "label": "Send (-) / Receive (+)", "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -448,7 +462,11 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -456,7 +474,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 0, "gridPos": { "h": 5, "w": 3, @@ -482,24 +500,36 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/.*tx/", + "transform": "negative-Y" + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "irate(node_network_transmit_drop{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_transmit_drop_total{instance=~\"[[ceph_hosts]].*\"}[1m]) + \nirate(node_network_receive_drop{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_receive_drop_total{instance=~\"[[ceph_hosts]].*\"}[1m])", + "expr": "irate(node_network_receive_drop{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_receive_drop_total{instance=~\"[[ceph_hosts]].*\"}[1m])", "format": "time_series", "instant": false, "intervalFactor": 1, - "legendFormat": "{{device}}", + "legendFormat": "{{device}}.rx", "refId": "A" + }, + { + "expr": "irate(node_network_transmit_drop{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_transmit_drop_total{instance=~\"[[ceph_hosts]].*\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.tx", + "refId": "B" } ], "thresholds": [], "timeFrom": "15m", "timeShift": null, - "title": "Network Drops (last 15mins)", + "title": "Network drop rate", "tooltip": { "shared": true, "sort": 0, @@ -515,8 +545,8 @@ }, "yaxes": [ { - "format": "short", - "label": null, + "format": "pps", + "label": "Send (-) / Receive (+)", "logBase": 1, "max": null, "min": "0", @@ -530,7 +560,11 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "cacheTimeout": null, @@ -597,7 +631,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts).*\"})", + "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{exported_instance=~\"($ceph_hosts).*\"})", "format": "time_series", "intervalFactor": 2, "refId": "A", @@ -624,7 +658,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 0, "gridPos": { "h": 5, "w": 3, @@ -650,24 +684,36 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/.*tx/", + "transform": "negative-Y" + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "irate(node_network_transmit_errs{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_transmit_errs_total{instance=~\"[[ceph_hosts]].*\"}[1m])+ \nirate(node_network_receive_errs{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_receive_errs_total{instance=~\"[[ceph_hosts]].*\"}[1m])", + "expr": "irate(node_network_receive_errs{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_receive_errs_total{instance=~\"[[ceph_hosts]].*\"}[1m])", "format": "time_series", "instant": false, "intervalFactor": 1, - "legendFormat": "{{device}}", + "legendFormat": "{{device}}.rx", "refId": "A" + }, + { + "expr": "irate(node_network_transmit_errs{instance=~\"[[ceph_hosts]].*\"}[1m]) or irate(node_network_transmit_errs_total{instance=~\"[[ceph_hosts]].*\"}[1m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.tx", + "refId": "B" } ], "thresholds": [], "timeFrom": "15m", "timeShift": null, - "title": "Network Errors(last 15mins)", + "title": "Network error rate", "tooltip": { "shared": true, "sort": 0, @@ -683,8 +729,8 @@ }, "yaxes": [ { - "format": "short", - "label": null, + "format": "pps", + "label": "Send (-) / Receive (+)", "logBase": 1, "max": null, "min": "0", @@ -698,10 +744,14 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { - "collapsed": true, + "collapsed": false, "gridPos": { "h": 1, "w": 24, @@ -709,348 +759,391 @@ "y": 11 }, "id": 12, - "panels": [ + "panels": [], + "repeat": null, + "title": "OSD Disk Performance Statistics", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value", + "fill": 1, + "gridPos": { + "h": 9, + "w": 11, + "x": 0, + "y": 12 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value", - "fill": 1, - "gridPos": { - "h": 9, - "w": 11, - "x": 0, - "y": 12 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "((irate(node_disk_reads_completed{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts).*\"}[5m]) ) + \n(irate(node_disk_writes_completed{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts).*\"}[5m]))) *\non(instance, device) group_left(osd_id) label_replace(ceph_disk_occupation, \"osd_id\",\"$1\",\"ceph_daemon\",\"osd.(.*)\") ", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{device}}({{osd_id}})", - "refId": "A", - "step": 10, - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$ceph_hosts Disk IOPS", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "IOPS", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + "alias": "/.*reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(irate(node_disk_writes_completed{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) writes", + "refId": "A", + "step": 10, + "textEditor": true }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.", - "fill": 1, - "gridPos": { - "h": 9, - "w": 11, - "x": 12, - "y": 12 - }, - "id": 5, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "(\n (irate(node_disk_io_time_ms[5m]) / 10 ) or\n irate(node_disk_io_time_seconds_total[5m]) * 100)\n* on(instance, device) group_left(osd_id) label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts).*\"}, \"osd_id\",\"$1\",\"ceph_daemon\",\"osd.(.*)\")\n", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{device}}({{osd_id}})", - "refId": "C", - "step": 10, - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$ceph_hosts Disk utilisation", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": "%Util", - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + "expr": "(irate(node_disk_reads_completed{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) reads", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Disk IOPS", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": "0", + "show": true }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id", - "fill": 1, - "gridPos": { - "h": 9, - "w": 11, - "x": 0, - "y": 21 - }, - "id": 7, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "null as zero", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "max by(instance,device) (((\n (irate(node_disk_write_time_ms{ instance=~\"($ceph_hosts).*\"}[5m]) )\n / clamp_min(irate(node_disk_writes_completed{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001) or \n (irate(node_disk_read_time_ms{ instance=~\"($ceph_hosts).*\"}[5m]) )\n / clamp_min(irate(node_disk_reads_completed{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001)\n ) / 1000) or\n (irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts).*\"}[5m]) )\n / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001) or \n (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts).*\"}[5m]) )\n / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001)\n ) *\n on(instance,device) group_left(osd_id) label_replace(ceph_disk_occupation,\"osd_id\",\"$1\",\"ceph_daemon\",\"osd.(.*)\")", - "format": "time_series", - "hide": false, - "intervalFactor": 1, - "legendFormat": "{{device}}({{osd_id}})", - "refId": "D" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$ceph_hosts Disk Latency", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id", + "fill": 1, + "gridPos": { + "h": 9, + "w": 11, + "x": 12, + "y": 12 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/.*read/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(irate(node_disk_bytes_written{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) write", + "refId": "B" }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id", - "fill": 1, - "gridPos": { - "h": 9, - "w": 11, - "x": 12, - "y": 21 - }, - "id": 8, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "minSpan": 12, - "nullPointMode": "connected", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": " ((irate(node_disk_bytes_read{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts).*\"}[5m])) + \n (irate(node_disk_bytes_written{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts).*\"}[5m]))\n ) * \n on(instance,device) group_left(osd_id) label_replace(ceph_disk_occupation,\"osd_id\",\"$1\",\"ceph_daemon\",\"osd.(.*)\")", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{device}}({{osd_id}})", - "refId": "A", - "step": 10, - "textEditor": true - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "$ceph_hosts Throughput by Disk", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] + "expr": "(irate(node_disk_bytes_read{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) read", + "refId": "C" } ], - "repeat": null, - "title": "OSD Disk Performance Statistics", - "type": "row" + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Throughput by Disk", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id", + "fill": 1, + "gridPos": { + "h": 9, + "w": 11, + "x": 0, + "y": 21 + }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(instance,device) (((\n (irate(node_disk_write_time_ms{ instance=~\"($ceph_hosts).*\"}[5m]) )\n / clamp_min(irate(node_disk_writes_completed{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001) or \n (irate(node_disk_read_time_ms{ instance=~\"($ceph_hosts).*\"}[5m]) )\n / clamp_min(irate(node_disk_reads_completed{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001)\n ) / 1000) or\n (irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts).*\"}[5m]) )\n / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001) or \n (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts).*\"}[5m]) )\n / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001)\n ) *\n on(instance,device) group_left(osd_id) label_replace(ceph_disk_occupation,\"osd_id\",\"$1\",\"ceph_daemon\",\"osd.(.*)\")", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{device}}({{osd_id}})", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Disk Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.", + "fill": 1, + "gridPos": { + "h": 9, + "w": 11, + "x": 12, + "y": 21 + }, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(\n (irate(node_disk_io_time_ms[5m]) / 10 ) or\n irate(node_disk_io_time_seconds_total[5m]) * 100)\n* on(instance, device) group_left(osd_id) label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts).*\"}, \"osd_id\",\"$1\",\"ceph_daemon\",\"osd.(.*)\")\n", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "{{device}}({{osd_id}})", + "refId": "C", + "step": 10, + "textEditor": true + }, + { + "expr": "(irate(node_disk_io_time_ms{instance=~\"($ceph_hosts).*\"}[5m] * 1000) or irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Disk utilisation", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "%Util", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "refresh": "10s", @@ -1063,7 +1156,6 @@ "list": [ { "current": { - "tags": [], "text": "default", "value": "default" }, @@ -1074,6 +1166,7 @@ "query": "prometheus", "refresh": 1, "regex": "", + "skipUrlSync": false, "type": "datasource" }, { @@ -1088,7 +1181,8 @@ "options": [], "query": "label_values(node_scrape_collector_success, instance) ", "refresh": 1, - "regex": "([^.]*).*", + "regex": "([^:]*).*", + "skipUrlSync": false, "sort": 3, "tagValuesQuery": "", "tags": [], @@ -1129,6 +1223,6 @@ }, "timezone": "browser", "title": "Host Details", - "uid": "7IGu2Ttmz", - "version": 11 + "uid": "rtOg0AiWz", + "version": 3 } diff --git a/monitoring/grafana/dashboards/hosts-overview.json b/monitoring/grafana/dashboards/hosts-overview.json index 5ccf55a7800..dbb7bdda74a 100644 --- a/monitoring/grafana/dashboards/hosts-overview.json +++ b/monitoring/grafana/dashboards/hosts-overview.json @@ -1,10 +1,11 @@ { + "__inputs": [], "__requires": [ { "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "5.0.0" + "version": "5.3.2" }, { "type": "panel", @@ -36,7 +37,7 @@ "gnetId": null, "graphTooltip": 0, "id": null, - "iteration": 1538079414024, + "iteration": 1557393917915, "links": [], "panels": [ { @@ -430,7 +431,7 @@ "tableColumn": "", "targets": [ { - "expr": "avg (\n ((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100)\n ) *\n on(instance, device) ceph_disk_occupation{instance=~\"($osd_hosts).*\"}\n)", + "expr": "avg (\n ((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100)\n ) *\n on(instance, device) label_replace(label_replace(ceph_disk_occupation{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")\n)", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -540,7 +541,7 @@ "dashes": false, "datasource": "$datasource", "description": "Show the top 10 busiest hosts by cpu", - "fill": 1, + "fill": 0, "gridPos": { "h": 9, "w": 12, @@ -567,7 +568,7 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { @@ -598,12 +599,12 @@ "yaxes": [ { "decimals": 1, - "format": "percentunit", + "format": "percent", "label": null, "logBase": 1, - "max": null, + "max": "100", "min": "0", - "show": false + "show": true }, { "format": "short", @@ -613,7 +614,11 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -622,7 +627,7 @@ "dashes": false, "datasource": "$datasource", "description": "Top 10 hosts by network load", - "fill": 1, + "fill": 0, "gridPos": { "h": 9, "w": 12, @@ -649,11 +654,11 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "topk(10, (sum by(instance) (\n (\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ) +\n (\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[1m])\n ))\n )\n)", + "expr": "topk(10, (sum by(instance) (\n (\n irate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m])\n ) +\n (\n irate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m]) or\n irate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[1m])\n ))\n )\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}", @@ -680,7 +685,7 @@ "yaxes": [ { "decimals": 1, - "format": "bytes", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -695,7 +700,11 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "refresh": "10s", @@ -706,7 +715,6 @@ "list": [ { "current": { - "tags": [], "text": "default", "value": "default" }, @@ -717,6 +725,7 @@ "query": "prometheus", "refresh": 1, "regex": "", + "skipUrlSync": false, "type": "datasource" }, { @@ -732,6 +741,7 @@ "query": "label_values(ceph_disk_occupation, instance)", "refresh": 1, "regex": "([^.]*).*", + "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [], @@ -752,6 +762,7 @@ "query": "label_values(ceph_mon_metadata, ceph_daemon)", "refresh": 1, "regex": "mon.(.*)", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], @@ -772,6 +783,7 @@ "query": "label_values(ceph_mds_inodes, ceph_daemon)", "refresh": 1, "regex": "mds.(.*)", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], @@ -792,6 +804,7 @@ "query": "label_values(ceph_rgw_qlen, ceph_daemon)", "refresh": 1, "regex": "rgw.(.*)", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], @@ -832,6 +845,6 @@ }, "timezone": "", "title": "Host Overview", - "uid": "lxnjcTAmk", - "version": 10 + "uid": "y0KGL0iZz", + "version": 3 } diff --git a/monitoring/grafana/dashboards/osd-device-details.json b/monitoring/grafana/dashboards/osd-device-details.json index 88209257df7..e78e6a77d6e 100644 --- a/monitoring/grafana/dashboards/osd-device-details.json +++ b/monitoring/grafana/dashboards/osd-device-details.json @@ -1,10 +1,11 @@ { + "__inputs": [], "__requires": [ { "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "5.0.0" + "version": "5.3.2" }, { "type": "panel", @@ -30,7 +31,7 @@ "gnetId": null, "graphTooltip": 0, "id": null, - "iteration": 1534385833420, + "iteration": 1557395861896, "links": [], "panels": [ { @@ -52,7 +53,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 0, "gridPos": { "h": 9, "w": 6, @@ -77,30 +78,35 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "read", + "transform": "negative-Y" + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "irate(ceph_osd_op_r_latency_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000", + "expr": "irate(ceph_osd_op_r_latency_sum{ceph_daemon=~\"$osd\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "READs", + "legendFormat": "read", "refId": "A" }, { - "expr": "irate(ceph_osd_op_w_latency_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000", + "expr": "irate(ceph_osd_op_w_latency_sum{ceph_daemon=~\"$osd\"}[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "WRITEs", + "legendFormat": "write", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "OSD $osd_id Latency", + "title": "$osd Latency", "tooltip": { "shared": true, "sort": 0, @@ -116,8 +122,8 @@ }, "yaxes": [ { - "format": "ms", - "label": null, + "format": "s", + "label": "Read (-) / Write (+)", "logBase": 1, "max": null, "min": "0", @@ -131,7 +137,11 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -139,7 +149,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 0, "gridPos": { "h": 9, "w": 6, @@ -164,20 +174,25 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "Reads", + "transform": "negative-Y" + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "irate(ceph_osd_op_r{ceph_daemon=~\"osd.[[osd_id]]\"}[1m])", + "expr": "irate(ceph_osd_op_r{ceph_daemon=~\"$osd\"}[1m])", "format": "time_series", "intervalFactor": 1, "legendFormat": "Reads", "refId": "A" }, { - "expr": "irate(ceph_osd_op_w{ceph_daemon=~\"osd.[[osd_id]]\"}[1m])", + "expr": "irate(ceph_osd_op_w{ceph_daemon=~\"$osd\"}[1m])", "format": "time_series", "intervalFactor": 1, "legendFormat": "Writes", @@ -187,7 +202,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "OSD $osd_id R/W IOPS", + "title": "$osd R/W IOPS", "tooltip": { "shared": true, "sort": 0, @@ -204,10 +219,10 @@ "yaxes": [ { "format": "short", - "label": null, + "label": "Read (-) / Write (+)", "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -218,7 +233,11 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -226,7 +245,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 0, "gridPos": { "h": 9, "w": 6, @@ -251,20 +270,25 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "Read Bytes", + "transform": "negative-Y" + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "irate(ceph_osd_op_r_out_bytes{ceph_daemon=~\"osd.[[osd_id]]\"}[1m])", + "expr": "irate(ceph_osd_op_r_out_bytes{ceph_daemon=~\"$osd\"}[1m])", "format": "time_series", "intervalFactor": 1, "legendFormat": "Read Bytes", "refId": "A" }, { - "expr": "irate(ceph_osd_op_w_in_bytes{ceph_daemon=~\"osd.[[osd_id]]\"}[1m])", + "expr": "irate(ceph_osd_op_w_in_bytes{ceph_daemon=~\"$osd\"}[1m])", "format": "time_series", "intervalFactor": 1, "legendFormat": "Write Bytes", @@ -274,7 +298,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "OSD $osd_id R/W Bytes", + "title": "$osd R/W Bytes", "tooltip": { "shared": true, "sort": 0, @@ -291,10 +315,10 @@ "yaxes": [ { "format": "bytes", - "label": null, + "label": "Read (-) / Write (+)", "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -305,7 +329,11 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "collapsed": false, @@ -326,7 +354,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 0, "gridPos": { "h": 9, "w": 6, @@ -351,20 +379,25 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "(irate(node_disk_read_time_ms[1m]) / irate(node_disk_reads_completed[1m]) and on (instance, device) ceph_disk_occupation{ceph_daemon=~\"osd.[[osd_id]]\"}) ", + "expr": "(irate(node_disk_read_time_seconds_total[1m]) / irate(node_disk_reads_completed_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\"))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}/{{device}} Reads", "refId": "A" }, { - "expr": "(irate(node_disk_write_time_ms[1m]) / irate(node_disk_writes_completed[1m]) and on (instance, device) ceph_disk_occupation{ceph_daemon=~\"osd.[[osd_id]]\"}) ", + "expr": "(irate(node_disk_write_time_seconds_total[1m]) / irate(node_disk_writes_completed_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\"))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}/{{device}} Writes", @@ -374,7 +407,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Physical Device Latency for OSD $osd_id", + "title": "Physical Device Latency for $osd", "tooltip": { "shared": true, "sort": 0, @@ -391,10 +424,10 @@ "yaxes": [ { "format": "ms", - "label": null, + "label": "Read (-) / Write (+)", "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -405,7 +438,11 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -413,7 +450,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 1, + "fill": 0, "gridPos": { "h": 9, "w": 6, @@ -438,30 +475,35 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "(irate(node_disk_reads_completed[1m]) and on (instance, device) ceph_disk_occupation{ceph_daemon=~\"osd.[[osd_id]]\"})", + "expr": "irate(node_disk_writes_completed_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{instance}} {{device}} READS", + "legendFormat": "{{device}} on {{instance}} Writes", "refId": "A" }, { - "expr": "(irate(node_disk_writes_completed[1m]) and on (instance, device) ceph_disk_occupation{ceph_daemon=~\"osd.[[osd_id]]\"}) ", + "expr": "irate(node_disk_reads_completed_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{instance}} {{device}} WRITES", + "legendFormat": "{{device}} on {{instance}} Reads", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Physical Device R/W IOPS for OSD $osd_id", + "title": "Physical Device R/W IOPS for $osd", "tooltip": { "shared": true, "sort": 0, @@ -478,7 +520,7 @@ "yaxes": [ { "format": "short", - "label": null, + "label": "Read (-) / Write (+)", "logBase": 1, "max": null, "min": null, @@ -492,7 +534,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -525,30 +571,35 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { - "expr": "(irate(node_disk_bytes_read[1m]) and on (instance, device) ceph_disk_occupation{ceph_daemon=~\"osd.[[osd_id]]\"})", + "expr": "irate(node_disk_read_bytes_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{instance}} {{device}} READS", + "legendFormat": "{{instance}} {{device}} Reads", "refId": "A" }, { - "expr": "(irate(node_disk_bytes_written[1m]) and on (instance, device) ceph_disk_occupation{ceph_daemon=~\"osd.[[osd_id]]\"}) ", + "expr": "irate(node_disk_written_bytes_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{instance}} {{device}} WRITES", + "legendFormat": "{{instance}} {{device}} Writes", "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Physical Device R/W Bytes for OSD $osd_id", + "title": "Physical Device R/W Bytes for $osd", "tooltip": { "shared": true, "sort": 0, @@ -564,11 +615,11 @@ }, "yaxes": [ { - "format": "bytes", - "label": null, + "format": "Bps", + "label": "Read (-) / Write (+)", "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -579,7 +630,11 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -618,17 +673,17 @@ "steppedLine": false, "targets": [ { - "expr": "(irate(node_disk_io_time_ms[1m]) and on (instance, device) ceph_disk_occupation{ceph_daemon=~\"osd.[[osd_id]]\"}) / 10", + "expr": "irate(node_disk_io_time_seconds_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{instance}} {{device}}", + "legendFormat": "{{device}} on {{instance}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Physical Device Util% for OSD $osd_id", + "title": "Physical Device Util% for $osd", "tooltip": { "shared": true, "sort": 0, @@ -644,7 +699,7 @@ }, "yaxes": [ { - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -659,7 +714,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "schemaVersion": 16, @@ -669,7 +728,6 @@ "list": [ { "current": { - "tags": [], "text": "default", "value": "default" }, @@ -680,6 +738,7 @@ "query": "prometheus", "refresh": 1, "regex": "", + "skipUrlSync": false, "type": "datasource" }, { @@ -688,13 +747,14 @@ "datasource": "$datasource", "hide": 0, "includeAll": false, - "label": "OSD Id", + "label": "OSD", "multi": false, - "name": "osd_id", + "name": "osd", "options": [], "query": "label_values(ceph_osd_metadata,ceph_daemon)", "refresh": 1, - "regex": "osd.(.*)", + "regex": "(.*)", + "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", "tags": [], @@ -705,7 +765,7 @@ ] }, "time": { - "from": "now-1h", + "from": "now-3h", "to": "now" }, "timepicker": { @@ -735,6 +795,6 @@ }, "timezone": "", "title": "OSD device details", - "uid": "MKj_9ipiz", + "uid": "CrAHE0iZz", "version": 3 } diff --git a/monitoring/grafana/dashboards/pool-detail.json b/monitoring/grafana/dashboards/pool-detail.json index 64f34ac9b90..2144c4a9dd5 100644 --- a/monitoring/grafana/dashboards/pool-detail.json +++ b/monitoring/grafana/dashboards/pool-detail.json @@ -1,10 +1,11 @@ { + "__inputs": [], "__requires": [ { "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "5.0.0" + "version": "5.3.2" }, { "type": "panel", @@ -14,8 +15,8 @@ }, { "type": "panel", - "id": "text", - "name": "Text", + "id": "singlestat", + "name": "Singlestat", "version": "5.0.0" } ], @@ -36,22 +37,262 @@ "gnetId": null, "graphTooltip": 0, "id": null, - "iteration": 1534394258671, + "iteration": 1551858875941, "links": [], "panels": [ { - "collapsed": false, + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, "gridPos": { - "h": 1, - "w": 24, + "h": 7, + "w": 7, "x": 0, "y": 0 }, - "id": 16, - "panels": [], - "repeat": null, - "title": "Pool '$pool_name' Performance Details", - "type": "row" + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "(ceph_pool_stored / ceph_pool_max_avail) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": ".7,.8", + "title": "Capacity used", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Time till pool is full assuming the average fill rate of the last 6 hours", + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 7, + "y": 0 + }, + "id": 14, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "((ceph_pool_max_avail - ceph_pool_stored) / deriv(ceph_pool_stored[6h])) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"} > 0", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Time till full", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "inf", + "value": "null" + }, + { + "op": "=", + "text": "inf", + "value": "N/A" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "deriv(ceph_pool_objects[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Objects per second", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$pool_name Object In-/Egress", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": "Objects out(-) / in(+) ", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -67,7 +308,7 @@ "h": 7, "w": 12, "x": 0, - "y": 1 + "y": 7 }, "id": 6, "legend": { @@ -88,20 +329,25 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "reads", + "transform": "negative-Y" + } + ], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "(label_replace(irate(ceph_pool_rd[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"[[pool_name]]\"})", + "expr": "irate(ceph_pool_rd[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}", "format": "time_series", "intervalFactor": 1, "legendFormat": "reads", "refId": "B" }, { - "expr": "(label_replace(irate(ceph_pool_wr[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"[[pool_name]]\"})", + "expr": "irate(ceph_pool_wr[1m]) * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}", "format": "time_series", "intervalFactor": 1, "legendFormat": "writes", @@ -111,7 +357,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Pool '$pool_name' Client IOPS", + "title": "$pool_name Client IOPS", "tooltip": { "shared": true, "sort": 0, @@ -127,11 +373,11 @@ }, "yaxes": [ { - "format": "none", - "label": "", + "format": "iops", + "label": "Read (-) / Write (+)", "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -142,7 +388,11 @@ "min": null, "show": true } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": { @@ -158,7 +408,7 @@ "h": 7, "w": 12, "x": 12, - "y": 1 + "y": 7 }, "id": 7, "legend": { @@ -179,20 +429,25 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "reads", + "transform": "negative-Y" + } + ], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "(label_replace(irate(ceph_pool_rd_bytes[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"[[pool_name]]\"})", + "expr": "irate(ceph_pool_rd_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}", "format": "time_series", "intervalFactor": 1, "legendFormat": "reads", "refId": "A" }, { - "expr": "(label_replace(irate(ceph_pool_wr_bytes[1m]),\"id\",\"$1\",\"pool_id\",\"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"[[pool_name]]\"})", + "expr": "irate(ceph_pool_wr_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}", "format": "time_series", "intervalFactor": 1, "legendFormat": "writes", @@ -202,7 +457,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Pool '$pool_name' Client Throughput", + "title": "$pool_name Client Throughput", "tooltip": { "shared": true, "sort": 0, @@ -218,11 +473,11 @@ }, "yaxes": [ { - "format": "decbytes", - "label": "", + "format": "Bps", + "label": "Read (-) / Write (+)", "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -233,7 +488,99 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 12, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_pool_objects * on(pool_id) group_left(instance,name) ceph_pool_metadata{name=~\"$pool_name\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Number of Objects", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "$pool_name Objects", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Objects", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } } ], "refresh": "15s", @@ -242,31 +589,10 @@ "tags": [], "templating": { "list": [ - { - "allValue": null, - "current": {}, - "datasource": "$datasource", - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "pool_id", - "options": [], - "query": "label_values(ceph_pool_metadata,pool_id)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, { "current": { - "tags": [], - "text": "default", - "value": "default" + "text": "Prometheus admin.virt1.home.fajerski.name:9090", + "value": "Prometheus admin.virt1.home.fajerski.name:9090" }, "hide": 0, "label": "Data Source", @@ -275,6 +601,7 @@ "query": "prometheus", "refresh": 1, "regex": "", + "skipUrlSync": false, "type": "datasource" }, { @@ -290,6 +617,7 @@ "query": "label_values(ceph_pool_metadata,name)", "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tags": [], @@ -330,7 +658,7 @@ ] }, "timezone": "browser", - "title": "Ceph Pool Detail", - "uid": "8ypfkWpik", - "version": 11 + "title": "Ceph Pool Details", + "uid": "-xyV8KCiz", + "version": 1 } diff --git a/monitoring/grafana/dashboards/pool-overview.json b/monitoring/grafana/dashboards/pool-overview.json index 505108d26ba..404d719ead0 100644 --- a/monitoring/grafana/dashboards/pool-overview.json +++ b/monitoring/grafana/dashboards/pool-overview.json @@ -1,24 +1,4 @@ { - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "table", - "name": "Table", - "version": "5.0.0" - } - ], "annotations": { "list": [ { @@ -35,31 +15,16 @@ "editable": false, "gnetId": null, "graphTooltip": 0, - "id": null, - "iteration": 1534386772937, + "iteration": 1551789900270, "links": [], "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 14, - "panels": [], - "repeat": null, - "title": "Pool Overview", - "type": "row" - }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 5, + "fill": 1, "gridPos": { "h": 7, "w": 12, @@ -85,24 +50,36 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/.* read/", + "transform": "negative-Y" + } + ], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "(label_replace((rate(ceph_pool_rd{pool_id=~\"[[pool_id]]\"}[1m]) + rate(ceph_pool_wr{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) ", + "expr": "topk($topk,rate(ceph_pool_rd[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata) ", "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "{{name}}", + "legendFormat": "{{name}} - read", "refId": "F" + }, + { + "expr": "topk($topk,rate(ceph_pool_wr[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata) ", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}} - write", + "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Client IOPS by Pool", + "title": "Top $topk Client IOPS by Pool", "tooltip": { "shared": true, "sort": 0, @@ -119,10 +96,10 @@ "yaxes": [ { "format": "none", - "label": null, + "label": "Read (-) / Write (+)", "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -133,7 +110,11 @@ "min": null, "show": false } - ] + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, @@ -141,7 +122,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fill": 5, + "fill": 1, "gridPos": { "h": 7, "w": 12, @@ -167,24 +148,36 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "/.* read/", + "transform": "negative-Y" + } + ], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "expr": "(label_replace((rate(ceph_pool_rd_bytes{pool_id=~\"[[pool_id]]\"}[1m]) + rate(ceph_pool_wr_bytes{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) ", + "expr": "topk($topk,rate(ceph_pool_rd_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{name}}", + "legendFormat": "{{name}} - read", "refId": "A", "textEditor": true + }, + { + "expr": "topk($topk,rate(ceph_pool_wr_bytes[1m]) + on(pool_id) group_left(instance,name) ceph_pool_metadata)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}} - write", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Client Throughput by Pool", + "title": "Top $topk Client Throughput by Pool", "tooltip": { "shared": true, "sort": 0, @@ -200,11 +193,11 @@ }, "yaxes": [ { - "format": "decbytes", - "label": null, + "format": "Bps", + "label": "Read (-) / Writes (+)", "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -215,21 +208,11 @@ "min": null, "show": false } - ] - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 15, - "panels": [], - "repeat": null, - "title": "Top 5's", - "type": "row" + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { "columns": [ @@ -365,7 +348,7 @@ ], "targets": [ { - "expr": "topk(5,(label_replace((irate(ceph_pool_rd{pool_id=~\"[[pool_id]]\"}[1m]) + irate(ceph_pool_wr{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) )", + "expr": "topk($topk,((irate(ceph_pool_rd[1m]) + irate(ceph_pool_wr[1m])) + on(pool_id) group_left(instance,name) ceph_pool_metadata))", "format": "table", "instant": true, "intervalFactor": 2, @@ -373,7 +356,7 @@ "textEditor": true } ], - "title": "Top 5 Pools by Client IOPS", + "title": "Top $topk Pools by Client IOPS", "transform": "table", "type": "table" }, @@ -502,7 +485,7 @@ ], "targets": [ { - "expr": "(label_replace((irate(ceph_pool_rd_bytes{pool_id=~\"[[pool_id]]\"}[1m]) + irate(ceph_pool_wr_bytes{pool_id=~\"[[pool_id]]\"}[1m])),\"id\", \"$1\", \"pool_id\", \"(.*)\") + on(pool_id) group_left(instance,name) ceph_pool_metadata{pool_id=~\"[[pool_id]]\"}) ", + "expr": "topk($topk,(irate(ceph_pool_rd_bytes[1m]) + irate(ceph_pool_wr_bytes[1m])) + on(pool_id) group_left(instance,name) ceph_pool_metadata) ", "format": "table", "instant": true, "intervalFactor": 2, @@ -510,7 +493,7 @@ "textEditor": true } ], - "title": "Top 5 Pools by Throughput", + "title": "Top $topk Pools by Throughput", "transform": "table", "type": "table" }, @@ -631,7 +614,7 @@ ], "targets": [ { - "expr": "topk(5,((ceph_pool_bytes_used / (ceph_pool_bytes_used + ceph_pool_max_avail)) * on(pool_id) group_left(name) ceph_pool_metadata))", + "expr": "topk($topk,((ceph_pool_stored / (ceph_pool_stored + ceph_pool_max_avail)) * on(pool_id) group_left(name) ceph_pool_metadata))", "format": "table", "hide": false, "instant": true, @@ -640,7 +623,7 @@ "refId": "D" } ], - "title": "Top 5 Pools By Capacity Used", + "title": "Top $topk Pools By Capacity Used", "transform": "table", "type": "table" } @@ -651,51 +634,10 @@ "tags": [], "templating": { "list": [ - { - "allValue": null, - "current": {}, - "datasource": "$datasource", - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "pool_id", - "options": [], - "query": "label_values(ceph_pool_metadata,pool_id)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": {}, - "datasource": "$datasource", - "hide": 2, - "includeAll": true, - "label": "Pool Name", - "multi": false, - "name": "pool_name", - "options": [], - "query": "label_values(ceph_pool_metadata,name)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false - }, { "current": { - "tags": [], - "text": "default", - "value": "default" + "text": "Prometheus admin.virt1.home.fajerski.name:9090", + "value": "Prometheus admin.virt1.home.fajerski.name:9090" }, "hide": 0, "label": "Data Source", @@ -704,7 +646,26 @@ "query": "prometheus", "refresh": 1, "regex": "", + "skipUrlSync": false, "type": "datasource" + }, + { + "current": { + "text": "3", + "value": "3" + }, + "hide": 0, + "label": "Top K", + "name": "topk", + "options": [ + { + "text": "3", + "value": "3" + } + ], + "query": "3", + "skipUrlSync": false, + "type": "textbox" } ] }, @@ -741,5 +702,5 @@ "timezone": "browser", "title": "Ceph Pools Overview", "uid": "z99hzWtmk", - "version": 3 + "version": 1 } diff --git a/monitoring/grafana/screenshots/host-details.png b/monitoring/grafana/screenshots/host-details.png index 4cb632adc02..bf963a9c75a 100644 Binary files a/monitoring/grafana/screenshots/host-details.png and b/monitoring/grafana/screenshots/host-details.png differ diff --git a/monitoring/grafana/screenshots/osd-performance.png b/monitoring/grafana/screenshots/osd-performance.png index 1f2c0397149..9bb7af44ed9 100644 Binary files a/monitoring/grafana/screenshots/osd-performance.png and b/monitoring/grafana/screenshots/osd-performance.png differ diff --git a/monitoring/grafana/screenshots/pool-details.png b/monitoring/grafana/screenshots/pool-details.png index 493d0d32506..1f300cdab69 100644 Binary files a/monitoring/grafana/screenshots/pool-details.png and b/monitoring/grafana/screenshots/pool-details.png differ diff --git a/monitoring/grafana/screenshots/pool-overview.png b/monitoring/grafana/screenshots/pool-overview.png index 2c6191216b3..ffdd28ea6c8 100644 Binary files a/monitoring/grafana/screenshots/pool-overview.png and b/monitoring/grafana/screenshots/pool-overview.png differ