From: Radu Toader Date: Wed, 30 Oct 2019 08:42:41 +0000 (+0200) Subject: mgr/dashboard: fix grafana dashboards X-Git-Tag: v14.2.5~11^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=c3d7cc9e5a408634d4e09eb0f85fa9dc22e5d088;p=ceph.git mgr/dashboard: fix grafana dashboards Fixes: https://tracker.ceph.com/issues/42542 Sort order was wrong for some dashboards, fixed empty / buggy Top 3 clients IOPS by pool / Throughput - in Pools Overall performance fixed Avg utilization Multiple series found - in Host Overall performance Fixed invalid dimensions for plot - in OSD Overall performance Signed-off-by: Radu Toader (cherry picked from commit 3beaf63761590fd35a72777fc56c0d5f4a6cad3b) --- diff --git a/monitoring/grafana/dashboards/cephfs-overview.json b/monitoring/grafana/dashboards/cephfs-overview.json index b131cc2079f1d..57922f55155a9 100644 --- a/monitoring/grafana/dashboards/cephfs-overview.json +++ b/monitoring/grafana/dashboards/cephfs-overview.json @@ -109,7 +109,7 @@ "title": "MDS Workload - $mds_servers", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -193,7 +193,7 @@ "title": "Client Request Load - $mds_servers", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", diff --git a/monitoring/grafana/dashboards/host-details.json b/monitoring/grafana/dashboards/host-details.json index df68b65e34b58..c4d139a68c007 100644 --- a/monitoring/grafana/dashboards/host-details.json +++ b/monitoring/grafana/dashboards/host-details.json @@ -207,7 +207,7 @@ "title": "CPU Utilisation", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -330,7 +330,7 @@ "title": "RAM Usage", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -434,7 +434,7 @@ "title": "Network Load", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -631,7 +631,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{exported_instance=~\"($ceph_hosts).*\"})", + "expr": "sum(ceph_osd_stat_bytes and on (ceph_daemon) ceph_disk_occupation{instance=~\"($ceph_hosts).*\"})", "format": "time_series", "intervalFactor": 2, "refId": "A", @@ -808,7 +808,7 @@ "steppedLine": false, "targets": [ { - "expr": "(irate(node_disk_writes_completed{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", + "expr": "(irate(node_disk_writes_completed{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}}) writes", @@ -817,7 +817,7 @@ "textEditor": true }, { - "expr": "(irate(node_disk_reads_completed{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", + "expr": "(irate(node_disk_reads_completed{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_reads_completed_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -848,7 +848,7 @@ "label": "Read (-) / Write (+)", "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -909,14 +909,14 @@ "steppedLine": false, "targets": [ { - "expr": "(irate(node_disk_bytes_written{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", + "expr": "(irate(node_disk_bytes_written{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_written_bytes_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}}) write", "refId": "B" }, { - "expr": "(irate(node_disk_bytes_read{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", + "expr": "(irate(node_disk_bytes_read{instance=~\"($ceph_hosts).*\"}[5m]) or irate(node_disk_read_bytes_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}}) read", @@ -946,7 +946,7 @@ "label": "Read (-) / Write (+)", "logBase": 1, "max": null, - "min": "0", + "min": null, "show": true }, { @@ -1002,11 +1002,11 @@ "steppedLine": false, "targets": [ { - "expr": "max by(instance,device) (((\n (irate(node_disk_write_time_ms{ instance=~\"($ceph_hosts).*\"}[5m]) )\n / clamp_min(irate(node_disk_writes_completed{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001) or \n (irate(node_disk_read_time_ms{ instance=~\"($ceph_hosts).*\"}[5m]) )\n / clamp_min(irate(node_disk_reads_completed{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001)\n ) / 1000) or\n (irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts).*\"}[5m]) )\n / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001) or \n (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts).*\"}[5m]) )\n / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001)\n ) *\n on(instance,device) group_left(osd_id) label_replace(ceph_disk_occupation,\"osd_id\",\"$1\",\"ceph_daemon\",\"osd.(.*)\")", + "expr": "max by(instance,device) ((irate(node_disk_write_time_seconds_total{ instance=~\"($ceph_hosts).*\"}[5m]) ) / clamp_min(irate(node_disk_writes_completed_total{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001) or (irate(node_disk_read_time_seconds_total{ instance=~\"($ceph_hosts).*\"}[5m]) ) / clamp_min(irate(node_disk_reads_completed_total{ instance=~\"($ceph_hosts).*\"}[5m]), 0.001) ) * on(instance,device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")", "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "{{device}}({{osd_id}})", + "legendFormat": "{{device}}({{ceph_daemon}})", "refId": "D" } ], @@ -1058,6 +1058,7 @@ "datasource": "$datasource", "description": "Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.", "fill": 1, + "fillGradient": 0, "gridPos": { "h": 9, "w": 11, @@ -1077,8 +1078,11 @@ "lines": true, "linewidth": 1, "links": [], - "minSpan": 12, + "maxPerRow": 2, "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, "percentage": false, "pointradius": 5, "points": false, @@ -1089,24 +1093,17 @@ "steppedLine": false, "targets": [ { - "expr": "(\n (irate(node_disk_io_time_ms[5m]) / 10 ) or\n irate(node_disk_io_time_seconds_total[5m]) * 100)\n* on(instance, device) group_left(osd_id) label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts).*\"}, \"osd_id\",\"$1\",\"ceph_daemon\",\"osd.(.*)\")\n", + "expr": "((irate(node_disk_io_time_ms{instance=~\"($ceph_hosts).*\"}[5m]) / 10 ) or irate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts).*\"}[5m]) * 100)* on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation{instance=~\"($ceph_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")", "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "{{device}}({{osd_id}})", - "refId": "C", - "step": 10, - "textEditor": true - }, - { - "expr": "(irate(node_disk_io_time_ms{instance=~\"($ceph_hosts).*\"}[5m] * 1000) or irate(node_disk_writes_completed_total{instance=~\"($ceph_hosts).*\"}[5m])) * on(instance, device) group_left(ceph_daemon) label_replace(label_replace(ceph_disk_occupation, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", - "format": "time_series", - "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}})", "refId": "A" } ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "$ceph_hosts Disk utilisation", "tooltip": { @@ -1124,7 +1121,7 @@ }, "yaxes": [ { - "format": "short", + "format": "percent", "label": "%Util", "logBase": 1, "max": "100", diff --git a/monitoring/grafana/dashboards/hosts-overview.json b/monitoring/grafana/dashboards/hosts-overview.json index dbb7bdda74a79..96e739d11fd14 100644 --- a/monitoring/grafana/dashboards/hosts-overview.json +++ b/monitoring/grafana/dashboards/hosts-overview.json @@ -431,7 +431,7 @@ "tableColumn": "", "targets": [ { - "expr": "avg (\n ((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100)\n ) *\n on(instance, device) label_replace(label_replace(ceph_disk_occupation{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")\n)", + "expr" : "avg (\n ((irate(node_disk_io_time_ms[5m]) / 10 ) or\n (irate(node_disk_io_time_seconds_total[5m]) * 100)\n ) *\n on(instance, device) label_replace(label_replace(ceph_disk_occupation{instance=~\"($osd_hosts).*\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")\n)", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -581,11 +581,12 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "CPU Busy - Top 10 Hosts", "tooltip": { "shared": true, - "sort": 1, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -667,11 +668,12 @@ ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Network Load - Top 10", "tooltip": { "shared": true, - "sort": 1, + "sort": 2, "value_type": "individual" }, "type": "graph", diff --git a/monitoring/grafana/dashboards/osd-device-details.json b/monitoring/grafana/dashboards/osd-device-details.json index e78e6a77d6e45..6b70778df4231 100644 --- a/monitoring/grafana/dashboards/osd-device-details.json +++ b/monitoring/grafana/dashboards/osd-device-details.json @@ -109,7 +109,7 @@ "title": "$osd Latency", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -205,7 +205,7 @@ "title": "$osd R/W IOPS", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -301,7 +301,7 @@ "title": "$osd R/W Bytes", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -390,14 +390,14 @@ "steppedLine": false, "targets": [ { - "expr": "(irate(node_disk_read_time_seconds_total[1m]) / irate(node_disk_reads_completed_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\"))", + "expr": "(irate(node_disk_read_time_seconds_total[1m]) / irate(node_disk_reads_completed_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\"))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}/{{device}} Reads", "refId": "A" }, { - "expr": "(irate(node_disk_write_time_seconds_total[1m]) / irate(node_disk_writes_completed_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\"))", + "expr": "(irate(node_disk_write_time_seconds_total[1m]) / irate(node_disk_writes_completed_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\"))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}/{{device}} Writes", @@ -410,7 +410,7 @@ "title": "Physical Device Latency for $osd", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -486,14 +486,14 @@ "steppedLine": false, "targets": [ { - "expr": "irate(node_disk_writes_completed_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", + "expr": "irate(node_disk_writes_completed_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} on {{instance}} Writes", "refId": "A" }, { - "expr": "irate(node_disk_reads_completed_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", + "expr": "irate(node_disk_reads_completed_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} on {{instance}} Reads", @@ -506,7 +506,7 @@ "title": "Physical Device R/W IOPS for $osd", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -582,14 +582,14 @@ "steppedLine": false, "targets": [ { - "expr": "irate(node_disk_read_bytes_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", + "expr": "irate(node_disk_read_bytes_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}} {{device}} Reads", "refId": "A" }, { - "expr": "irate(node_disk_written_bytes_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", + "expr": "irate(node_disk_written_bytes_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}} {{device}} Writes", @@ -602,7 +602,7 @@ "title": "Physical Device R/W Bytes for $osd", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -673,7 +673,7 @@ "steppedLine": false, "targets": [ { - "expr": "irate(node_disk_io_time_seconds_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1:9100\", \"exported_instance\", \"(.*)\")", + "expr": "irate(node_disk_io_time_seconds_total[1m]) and on (instance, device) label_replace(label_replace(ceph_disk_occupation{ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"(.*)\")", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} on {{instance}}", @@ -686,7 +686,7 @@ "title": "Physical Device Util% for $osd", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", diff --git a/monitoring/grafana/dashboards/osds-overview.json b/monitoring/grafana/dashboards/osds-overview.json index 3fc6d3152c25d..d81c6e3646e1a 100644 --- a/monitoring/grafana/dashboards/osds-overview.json +++ b/monitoring/grafana/dashboards/osds-overview.json @@ -112,7 +112,7 @@ "title": "OSD Read Latencies", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -289,7 +289,7 @@ "title": "OSD Write Latencies", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -786,7 +786,7 @@ "title": "Read/Write Profile", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", diff --git a/monitoring/grafana/dashboards/pool-overview.json b/monitoring/grafana/dashboards/pool-overview.json index 404d719ead0af..d8654599a9fd5 100644 --- a/monitoring/grafana/dashboards/pool-overview.json +++ b/monitoring/grafana/dashboards/pool-overview.json @@ -82,7 +82,7 @@ "title": "Top $topk Client IOPS by Pool", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -180,7 +180,7 @@ "title": "Top $topk Client Throughput by Pool", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -236,7 +236,7 @@ "scroll": true, "showHeader": true, "sort": { - "col": 6, + "col": 5, "desc": true }, "styles": [ @@ -382,7 +382,7 @@ "scroll": true, "showHeader": true, "sort": { - "col": 6, + "col": 5, "desc": true }, "styles": [ @@ -651,19 +651,19 @@ }, { "current": { - "text": "3", - "value": "3" + "text": "15", + "value": "15" }, "hide": 0, "label": "Top K", "name": "topk", "options": [ { - "text": "3", - "value": "3" + "text": "15", + "value": "15" } ], - "query": "3", + "query": "15", "skipUrlSync": false, "type": "textbox" } diff --git a/monitoring/grafana/dashboards/radosgw-detail.json b/monitoring/grafana/dashboards/radosgw-detail.json index 8aa02414a8fab..648abab89ce96 100644 --- a/monitoring/grafana/dashboards/radosgw-detail.json +++ b/monitoring/grafana/dashboards/radosgw-detail.json @@ -90,22 +90,23 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rgw_get_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_get_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s])", + "expr": "sum by (ceph_daemon) (rate(ceph_rgw_get_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_get_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s]))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "GET", + "legendFormat": "GET {{ceph_daemon}}", "refId": "A" }, { - "expr": "rate(ceph_rgw_put_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_put_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s])", + "expr": "sum by (ceph_daemon)(rate(ceph_rgw_put_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_put_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s]))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "PUT", + "legendFormat": "PUT {{ceph_daemon}}", "refId": "B" } ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "$rgw_servers GET/PUT Latencies", "tooltip": { @@ -147,6 +148,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { "h": 8, "w": 7, @@ -180,19 +182,20 @@ "expr": "rate(ceph_rgw_get_b{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "GETs", + "legendFormat": "GETs {{ceph_daemon}}", "refId": "B" }, { "expr": "rate(ceph_rgw_put_b{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "PUTs", + "legendFormat": "PUTs {{ceph_daemon}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "Bandwidth by HTTP Operation", "tooltip": { @@ -241,6 +244,7 @@ "dashes": false, "datasource": "$datasource", "fill": 1, + "fillGradient": 0, "gridPos": { "h": 8, "w": 7, @@ -274,33 +278,34 @@ "expr": "rate(ceph_rgw_failed_req{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "Requests Failed", + "legendFormat": "Requests Failed {{ceph_daemon}}", "refId": "B" }, { "expr": "rate(ceph_rgw_get{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "GETs", + "legendFormat": "GETs {{ceph_daemon}}", "refId": "C" }, { "expr": "rate(ceph_rgw_put{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "PUTs", + "legendFormat": "PUTs {{ceph_daemon}}", "refId": "D" }, { "expr": "rate(ceph_rgw_req{ceph_daemon=~\"[[rgw_servers]]\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"[[rgw_servers]]\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"[[rgw_servers]]\"}[30s]))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "Other", + "legendFormat": "Other {{ceph_daemon}}", "refId": "A" } ], "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, "title": "HTTP Request Breakdown", "tooltip": { @@ -374,28 +379,28 @@ "expr": "rate(ceph_rgw_failed_req{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "Failures", + "legendFormat": "Failures {{ceph_daemon}}", "refId": "A" }, { "expr": "rate(ceph_rgw_get{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "GETs", + "legendFormat": "GETs {{ceph_daemon}}", "refId": "B" }, { "expr": "rate(ceph_rgw_put{ceph_daemon=~\"[[rgw_servers]]\"}[30s])", "format": "time_series", "intervalFactor": 1, - "legendFormat": "PUTs", + "legendFormat": "PUTs {{ceph_daemon}}", "refId": "C" }, { "expr": "rate(ceph_rgw_req{ceph_daemon=~\"[[rgw_servers]]\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"[[rgw_servers]]\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"[[rgw_servers]]\"}[30s]))", "format": "time_series", "intervalFactor": 1, - "legendFormat": "Other (DELETE,LIST)", + "legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}", "refId": "D" } ], diff --git a/monitoring/grafana/dashboards/rbd-overview.json b/monitoring/grafana/dashboards/rbd-overview.json index 37eb650a6489f..f3df003ec49ec 100644 --- a/monitoring/grafana/dashboards/rbd-overview.json +++ b/monitoring/grafana/dashboards/rbd-overview.json @@ -105,7 +105,7 @@ "title": "IOPS", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -200,7 +200,7 @@ "title": "Throughput", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph", @@ -293,7 +293,7 @@ "title": "Average Latency", "tooltip": { "shared": true, - "sort": 0, + "sort": 2, "value_type": "individual" }, "type": "graph",