From 2d2bbdf490befb6f08db9691b0a3e2a915df8943 Mon Sep 17 00:00:00 2001 From: Aashish Sharma Date: Fri, 8 Oct 2021 15:00:09 +0530 Subject: [PATCH] mgr/dashboard: monitoring: grafonnet refactoring for radosgw dashboards This PR intends to refactor radosgw dashboards using grafonnet Fixes:https://tracker.ceph.com/issues/52777 Signed-off-by: Aashish Sharma (cherry picked from commit eb01954cd999430417555628e0099f645d371746) --- .../jsonnet/grafana_dashboards.jsonnet | 97 +- .../grafana/dashboards/radosgw-detail.json | 964 ++++++------- .../grafana/dashboards/radosgw-overview.json | 1265 +++++++++-------- .../dashboards/radosgw-sync-overview.json | 9 +- 4 files changed, 1236 insertions(+), 1099 deletions(-) diff --git a/monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet b/monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet index 11c89b212552f..925a712fbe4bb 100644 --- a/monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet +++ b/monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet @@ -18,12 +18,12 @@ local addAnnotationSchema(builtIn, datasource, enable, hide, iconColor, name, ty { "radosgw-sync-overview.json": local RgwSyncOverviewPanel(title, formatY1, labelY1, rgwMetric, x, y, w, h) = - graphPanelSchema(title, 'null as zero', true, formatY1, 'short', labelY1, null, 0, 1, '$datasource') + graphPanelSchema({}, title, '', 'null as zero', true, formatY1, 'short', labelY1, null, 0, 1, '$datasource') .addTargets( [addTargetSchema('sum by (source_zone) (rate(%s[30s]))' % rgwMetric, 1, 'time_series', '{{source_zone}}')]) + {gridPos: {x: x, y: y, w: w, h: h}}; dashboardSchema( - 'RGW Sync Overview', 'rgw-sync-overview', 'now-1h', '15s', 16, ["overview"], '', {refresh_intervals:['5s','10s','15s','30s','1m','5m','15m','30m','1h','2h','1d'],time_options:['5m','15m','1h','6h','12h','24h','2d','7d','30d']} + 'RGW Sync Overview', '', 'rgw-sync-overview', 'now-1h', '15s', 16, ["overview"], '', {refresh_intervals:['5s','10s','15s','30s','1m','5m','15m','30m','1h','2h','1d'],time_options:['5m','15m','1h','6h','12h','24h','2d','7d','30d']} ) .addAnnotation( addAnnotationSchema( @@ -36,7 +36,7 @@ local addAnnotationSchema(builtIn, datasource, enable, hide, iconColor, name, ty type='panel', id='graph', name='Graph', version='5.0.0' ) .addTemplate( - addTemplateSchema('rgw_servers', '$datasource', 'prometehus', 1, 2, true, 1) + addTemplateSchema('rgw_servers', '$datasource', 'prometehus', 1, true, 1, '', '') ) .addTemplate( g.template.datasource('datasource', 'prometheus', 'default', label='Data Source') @@ -52,3 +52,94 @@ local addAnnotationSchema(builtIn, datasource, enable, hide, iconColor, name, ty 'Unsuccessful Object Replications from Source Zone', 'short', 'Count/s', 'ceph_data_sync_from_zone_fetch_errors', 0, 7, 8, 7) ]) } +{ + "radosgw-overview.json": + local RgwOverviewPanel(title, description, formatY1, formatY2, expr1, legendFormat1, x, y, w, h) = + graphPanelSchema({}, title, description, 'null', false, formatY1, formatY2, null, null, 0, 1, '$datasource') + .addTargets( + [addTargetSchema(expr1, 1, 'time_series', legendFormat1)]) + {gridPos: {x: x, y: y, w: w, h: h}}; + + dashboardSchema( + 'RGW Overview', '', 'WAkugZpiz', 'now-1h', '15s', 16, ['overview'], '', {refresh_intervals:['5s','10s','15s','30s','1m','5m','15m','30m','1h','2h','1d'],time_options:['5m','15m','1h','6h','12h','24h','2d','7d','30d']} + ) + .addAnnotation( + addAnnotationSchema( + 1, '-- Grafana --', true, true, 'rgba(0, 211, 255, 1)', 'Annotations & Alerts', 'dashboard') + ) + .addRequired( + type='grafana', id='grafana', name='Grafana', version='5.0.0' + ) + .addRequired( + type='panel', id='graph', name='Graph', version='5.0.0' + ) + .addTemplate( + addTemplateSchema('rgw_servers', '$datasource', 'label_values(ceph_rgw_req, ceph_daemon)', 1, true, 1, '', '') + ) + .addTemplate( + g.template.datasource('datasource', 'prometheus', 'default', label='Data Source') + ) + .addPanels([ + addRowSchema(false, true, 'RGW Overview - All Gateways') + {gridPos: {x: 0, y: 0, w: 24, h: 1}}, + RgwOverviewPanel( + 'Average GET/PUT Latencies', '', 's', 'short', 'rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s])', 'GET AVG', 0, 1, 8, 7).addTargets( + [addTargetSchema('rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s])', 1, 'time_series', 'PUT AVG')]), + RgwOverviewPanel( + 'Total Requests/sec by RGW Instance', '', 'none', 'short', 'sum by(rgw_host) (label_replace(rate(ceph_rgw_req[30s]), \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"))', '{{rgw_host}}', 8, 1, 7, 7), + RgwOverviewPanel( + 'GET Latencies by RGW Instance', 'Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts', 's', 'short', 'label_replace(rate(ceph_rgw_get_initial_lat_sum[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_get_initial_lat_count[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")', '{{rgw_host}}', 15, 1, 6, 7), + RgwOverviewPanel( + 'Bandwidth Consumed by Type', 'Total bytes transferred in/out of all radosgw instances within the cluster', 'bytes', 'short', 'sum(rate(ceph_rgw_get_b[30s]))', 'GETs', 0, 8, 8, 6).addTargets( + [addTargetSchema('sum(rate(ceph_rgw_put_b[30s]))', 1, 'time_series', 'PUTs')]), + RgwOverviewPanel( + 'Bandwidth by RGW Instance', 'Total bytes transferred in/out through get/put operations, by radosgw instance', 'bytes', 'short', 'sum by(rgw_host) (\n (label_replace(rate(ceph_rgw_get_b[30s]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")) + \n (label_replace(rate(ceph_rgw_put_b[30s]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\"))\n)', '{{rgw_host}}', 8, 8, 7, 6), + RgwOverviewPanel( + 'PUT Latencies by RGW Instance', 'Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts', 's', 'short', 'label_replace(rate(ceph_rgw_put_initial_lat_sum[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_put_initial_lat_count[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")', '{{rgw_host}}', 15, 8, 6, 6) + ]) +} +{ + "radosgw-detail.json": + local RgwDetailsPanel(aliasColors, title, description, formatY1, formatY2, expr1, expr2, legendFormat1, legendFormat2, x, y, w, h) = + graphPanelSchema(aliasColors, title, description, 'null', false, formatY1, formatY2, null, null, 0, 1, '$datasource') + .addTargets( + [addTargetSchema(expr1, 1, 'time_series', legendFormat1),addTargetSchema(expr2, 1, 'time_series', legendFormat2)]) + {gridPos: {x: x, y: y, w: w, h: h}}; + + dashboardSchema( + 'RGW Instance Detail', '', 'x5ARzZtmk', 'now-1h', '15s', 16, ['overview'], '', {refresh_intervals:['5s','10s','15s','30s','1m','5m','15m','30m','1h','2h','1d'],time_options:['5m','15m','1h','6h','12h','24h','2d','7d','30d']} + ) + .addAnnotation( + addAnnotationSchema( + 1, '-- Grafana --', true, true, 'rgba(0, 211, 255, 1)', 'Annotations & Alerts', 'dashboard') + ) + .addRequired( + type='grafana', id='grafana', name='Grafana', version='5.0.0' + ) + .addRequired( + type='panel', id='grafana-piechart-panel', name='Pie Chart', version='1.3.3' + ) + .addRequired( + type='panel', id='graph', name='Graph', version='5.0.0' + ) + .addTemplate( + g.template.datasource('datasource', 'prometheus', 'default', label='Data Source') + ) + .addTemplate( + addTemplateSchema('rgw_servers', '$datasource', 'label_values(ceph_rgw_req, ceph_daemon)', 1, true, 1, '', '') + ) + .addPanels([ + addRowSchema(false, true, 'RGW Host Detail : $rgw_servers') + {gridPos: {x: 0, y: 0, w: 24, h: 1}}, + RgwDetailsPanel( + {}, '$rgw_servers GET/PUT Latencies', '', 's', 'short', 'sum by (ceph_daemon) (rate(ceph_rgw_get_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_get_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s]))', 'sum by (ceph_daemon)(rate(ceph_rgw_put_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_put_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s]))', 'GET {{ceph_daemon}}', 'PUT {{ceph_daemon}}', 0, 1, 6, 8), + RgwDetailsPanel( + {}, 'Bandwidth by HTTP Operation', '', 'bytes', 'short', 'rate(ceph_rgw_get_b{ceph_daemon=~\"$rgw_servers\"}[30s])', 'rate(ceph_rgw_put_b{ceph_daemon=~\"$rgw_servers\"}[30s])', 'GETs {{ceph_daemon}}', 'PUTs {{ceph_daemon}}', 6, 1, 7, 8), + RgwDetailsPanel( + {"GETs": "#7eb26d","Other": "#447ebc","PUTs": "#eab839","Requests": "#3f2b5b","Requests Failed": "#bf1b00"},'HTTP Request Breakdown', '', 'short', 'short', 'rate(ceph_rgw_failed_req{ceph_daemon=~\"$rgw_servers\"}[30s])', 'rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s])', 'Requests Failed {{ceph_daemon}}', 'GETs {{ceph_daemon}}', 13, 1, 7, 8) + .addTargets( + [addTargetSchema('rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s])', 1, 'time_series', 'PUTs {{ceph_daemon}}'),addTargetSchema('rate(ceph_rgw_req{ceph_daemon=~\"$rgw_servers\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s]))', 1, 'time_series', 'Other {{ceph_daemon}}')]), + addPieChartSchema( + {"GETs": "#7eb26d","Other (HEAD,POST,DELETE)": "#447ebc","PUTs": "#eab839","Requests": "#3f2b5b","Failures": "#bf1b00"},'$datasource', '', 'Under graph', 'pie', 'Workload Breakdown', 'current') + .addTarget(addTargetSchema('rate(ceph_rgw_failed_req{ceph_daemon=~\"$rgw_servers\"}[30s])', 1, 'time_series', 'Failures {{ceph_daemon}}')) + .addTarget(addTargetSchema('rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s])', 1, 'time_series', 'GETs {{ceph_daemon}}')) + .addTarget(addTargetSchema('rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s])', 1, 'time_series', 'PUTs {{ceph_daemon}}')) + .addTarget(addTargetSchema('rate(ceph_rgw_req{ceph_daemon=~\"$rgw_servers\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s]))', 1, 'time_series', 'Other (DELETE,LIST) {{ceph_daemon}}')) + {gridPos: {x: 20, y: 1, w: 4, h: 8}} + ]) +} diff --git a/monitoring/grafana/dashboards/radosgw-detail.json b/monitoring/grafana/dashboards/radosgw-detail.json index bf5b16b8a2c64..432eecc837ca4 100644 --- a/monitoring/grafana/dashboards/radosgw-detail.json +++ b/monitoring/grafana/dashboards/radosgw-detail.json @@ -1,491 +1,499 @@ { - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "grafana-piechart-panel", - "name": "Pie Chart", - "version": "1.3.3" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ + "__inputs": [ ], + "__requires": [ { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1534386250869, - "links": [], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 12, - "panels": [], - "repeat": null, - "title": "RGW Host Detail : $rgw_servers", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - "h": 8, - "w": 6, - "x": 0, - "y": 1 - }, - "id": 34, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (ceph_daemon) (rate(ceph_rgw_get_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_get_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "GET {{ceph_daemon}}", - "refId": "A" - }, - { - "expr": "sum by (ceph_daemon)(rate(ceph_rgw_put_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_put_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "PUT {{ceph_daemon}}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "$rgw_servers GET/PUT Latencies", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 7, - "x": 6, - "y": 1 - }, - "id": 18, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "rate(ceph_rgw_get_b{ceph_daemon=~\"$rgw_servers\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "GETs {{ceph_daemon}}", - "refId": "B" - }, - { - "expr": "rate(ceph_rgw_put_b{ceph_daemon=~\"$rgw_servers\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "PUTs {{ceph_daemon}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Bandwidth by HTTP Operation", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "bytes", - "label": "", - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": { - "GETs": "#7eb26d", - "Other": "#447ebc", - "PUTs": "#eab839", - "Requests": "#3f2b5b", - "Requests Failed": "#bf1b00" - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 7, - "x": 13, - "y": 1 + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.0.0" }, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "rate(ceph_rgw_failed_req{ceph_daemon=~\"$rgw_servers\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Requests Failed {{ceph_daemon}}", - "refId": "B" - }, - { - "expr": "rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "GETs {{ceph_daemon}}", - "refId": "C" - }, - { - "expr": "rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "PUTs {{ceph_daemon}}", - "refId": "D" - }, - { - "expr": "rate(ceph_rgw_req{ceph_daemon=~\"$rgw_servers\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Other {{ceph_daemon}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "HTTP Request Breakdown", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] + { + "id": "grafana-piechart-panel", + "name": "Pie Chart", + "type": "panel", + "version": "1.3.3" }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } ] - }, - { - "aliasColors": { - "Failures": "#bf1b00", - "GETs": "#7eb26d", - "Other (HEAD,POST,DELETE)": "#447ebc", - "PUTs": "#eab839" - }, - "breakPoint": "50%", - "cacheTimeout": null, - "combine": { - "label": "Others", - "threshold": 0 + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Host Detail : $rgw_servers", + "titleSize": "h6", + "type": "row" }, - "datasource": "$datasource", - "fontSize": "80%", - "format": "none", - "gridPos": { - "h": 8, - "w": 4, - "x": 20, - "y": 1 + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (ceph_daemon) (rate(ceph_rgw_get_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_get_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GET {{ceph_daemon}}", + "refId": "A" + }, + { + "expr": "sum by (ceph_daemon)(rate(ceph_rgw_put_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_put_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUT {{ceph_daemon}}", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$rgw_servers GET/PUT Latencies", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, - "id": 23, - "interval": null, - "legend": { - "show": true, - "values": true + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "gridPos": { + "h": 8, + "w": 7, + "x": 6, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rgw_get_b{ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs {{ceph_daemon}}", + "refId": "A" + }, + { + "expr": "rate(ceph_rgw_put_b{ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs {{ceph_daemon}}", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Bandwidth by HTTP Operation", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, - "legendType": "Under graph", - "links": [], - "maxDataPoints": 3, - "nullPointMode": "connected", - "pieType": "pie", - "strokeWidth": 1, - "targets": [ - { - "expr": "rate(ceph_rgw_failed_req{ceph_daemon=~\"$rgw_servers\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Failures {{ceph_daemon}}", - "refId": "A" - }, - { - "expr": "rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "GETs {{ceph_daemon}}", - "refId": "B" - }, - { - "expr": "rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "PUTs {{ceph_daemon}}", - "refId": "C" - }, - { - "expr": "rate(ceph_rgw_req{ceph_daemon=~\"$rgw_servers\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}", - "refId": "D" - } - ], - "title": "Workload Breakdown", - "type": "grafana-piechart-panel", - "valueName": "current" - } - ], - "refresh": "15s", - "schemaVersion": 16, - "style": "dark", - "tags": [ - "overview" - ], - "templating": { - "list": [ { - "current": { - "tags": [], - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data Source", - "name": "datasource", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" + "aliasColors": { + "GETs": "#7eb26d", + "Other": "#447ebc", + "PUTs": "#eab839", + "Requests": "#3f2b5b", + "Requests Failed": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "gridPos": { + "h": 8, + "w": 7, + "x": 13, + "y": 1 + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rgw_failed_req{ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Requests Failed {{ceph_daemon}}", + "refId": "A" + }, + { + "expr": "rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs {{ceph_daemon}}", + "refId": "B" + }, + { + "expr": "rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs {{ceph_daemon}}", + "refId": "C" + }, + { + "expr": "rate(ceph_rgw_req{ceph_daemon=~\"$rgw_servers\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Other {{ceph_daemon}}", + "refId": "D" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "HTTP Request Breakdown", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { - "allValue": null, - "current": {}, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": null, - "multi": false, - "name": "rgw_servers", - "options": [], - "query": "label_values(ceph_rgw_req, ceph_daemon)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false + "aliasColors": { + "Failures": "#bf1b00", + "GETs": "#7eb26d", + "Other (HEAD,POST,DELETE)": "#447ebc", + "PUTs": "#eab839", + "Requests": "#3f2b5b" + }, + "datasource": "$datasource", + "description": "", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 6, + "legend": { + "percentage": true, + "show": true, + "values": true + }, + "legendType": "Under graph", + "pieType": "pie", + "targets": [ + { + "expr": "rate(ceph_rgw_failed_req{ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Failures {{ceph_daemon}}", + "refId": "A" + }, + { + "expr": "rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs {{ceph_daemon}}", + "refId": "B" + }, + { + "expr": "rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs {{ceph_daemon}}", + "refId": "C" + }, + { + "expr": "rate(ceph_rgw_req{ceph_daemon=~\"$rgw_servers\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}", + "refId": "D" + } + ], + "title": "Workload Breakdown", + "type": "grafana-piechart-panel", + "valueName": "current" } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "RGW Instance Detail", - "uid": "x5ARzZtmk", - "version": 2 + ], + "refresh": "15s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "", + "multi": false, + "name": "rgw_servers", + "options": [ ], + "query": "label_values(ceph_rgw_req, ceph_daemon)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RGW Instance Detail", + "uid": "x5ARzZtmk", + "version": 0 } diff --git a/monitoring/grafana/dashboards/radosgw-overview.json b/monitoring/grafana/dashboards/radosgw-overview.json index 487d736b3d4e1..f996fed95e6e6 100644 --- a/monitoring/grafana/dashboards/radosgw-overview.json +++ b/monitoring/grafana/dashboards/radosgw-overview.json @@ -1,630 +1,663 @@ { - "__requires": [ - { - "type": "grafana", - "id": "grafana", - "name": "Grafana", - "version": "5.0.0" - }, - { - "type": "panel", - "id": "graph", - "name": "Graph", - "version": "5.0.0" - } - ], - "annotations": { - "list": [ + "__inputs": [ ], + "__requires": [ { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "id": null, - "iteration": 1534386107523, - "links": [], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 2, - "panels": [], - "title": "RGW Overview - All Gateways", - "type": "row" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 1 - }, - "id": 29, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "GET AVG", - "refId": "A" - }, - { - "expr": "rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s])", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "PUT AVG", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Average GET/PUT Latencies", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - "h": 7, - "w": 7, - "x": 8, - "y": 1 - }, - "id": 4, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(rgw_host) (label_replace(rate(ceph_rgw_req[30s]), \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{rgw_host}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Total Requests/sec by RGW Instance", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "none", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts", - "fill": 1, - "gridPos": { - "h": 7, - "w": 6, - "x": 15, - "y": 1 - }, - "id": 31, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "label_replace(rate(ceph_rgw_get_initial_lat_sum[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_get_initial_lat_count[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{rgw_host}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "GET Latencies by RGW Instance", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": null, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": false - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Total bytes transferred in/out of all radosgw instances within the cluster", - "fill": 1, - "gridPos": { - "h": 6, - "w": 8, - "x": 0, - "y": 8 - }, - "id": 6, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.0.0" }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum(rate(ceph_rgw_get_b[30s]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "GETs", - "refId": "A" - }, - { - "expr": "sum(rate(ceph_rgw_put_b[30s]))", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "PUTs", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Bandwidth Consumed by Type", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Total bytes transferred in/out through get/put operations, by radosgw instance", - "fill": 1, - "gridPos": { - "h": 6, - "w": 7, - "x": 8, - "y": 8 - }, - "id": 9, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(rgw_host) (\n (label_replace(rate(ceph_rgw_get_b[30s]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")) + \n (label_replace(rate(ceph_rgw_put_b[30s]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\"))\n)", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{rgw_host}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "Bandwidth by RGW Instance", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Overview - All Gateways", + "titleSize": "h6", + "type": "row" }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts", - "fill": 1, - "gridPos": { - "h": 6, - "w": 6, - "x": 15, - "y": 8 + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GET AVG", + "refId": "A" + }, + { + "expr": "rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUT AVG", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Average GET/PUT Latencies", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, - "id": 32, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "gridPos": { + "h": 7, + "w": 7, + "x": 8, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(rgw_host) (label_replace(rate(ceph_rgw_req[30s]), \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Total Requests/sec by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": true, - "steppedLine": false, - "targets": [ - { - "expr": "label_replace(rate(ceph_rgw_put_initial_lat_sum[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_put_initial_lat_count[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "{{rgw_host}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeShift": null, - "title": "PUT Latencies by RGW Instance", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts", + "fill": 1, + "gridPos": { + "h": 7, + "w": 6, + "x": 15, + "y": 1 + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(rate(ceph_rgw_get_initial_lat_sum[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_get_initial_lat_count[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "GET Latencies by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Total bytes transferred in/out of all radosgw instances within the cluster", + "fill": 1, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(ceph_rgw_get_b[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs", + "refId": "A" + }, + { + "expr": "sum(rate(ceph_rgw_put_b[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Bandwidth Consumed by Type", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, - "yaxes": [ - { - "decimals": null, - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": "0", - "show": false - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false - } - ] - } - ], - "refresh": "15s", - "schemaVersion": 16, - "style": "dark", - "tags": [ - "overview" - ], - "templating": { - "list": [ { - "allValue": null, - "current": {}, - "datasource": "$datasource", - "hide": 2, - "includeAll": true, - "label": null, - "multi": false, - "name": "rgw_servers", - "options": [], - "query": "label_values(ceph_rgw_req, ceph_daemon)", - "refresh": 1, - "regex": "", - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Total bytes transferred in/out through get/put operations, by radosgw instance", + "fill": 1, + "gridPos": { + "h": 6, + "w": 7, + "x": 8, + "y": 8 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(rgw_host) (\n (label_replace(rate(ceph_rgw_get_b[30s]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")) + \n (label_replace(rate(ceph_rgw_put_b[30s]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\"))\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Bandwidth by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] }, { - "current": { - "tags": [], - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data Source", - "name": "datasource", - "options": [], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts", + "fill": 1, + "gridPos": { + "h": 6, + "w": 6, + "x": 15, + "y": 8 + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(rate(ceph_rgw_put_initial_lat_sum[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_put_initial_lat_count[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "PUT Latencies by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "15s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "", - "title": "RGW Overview", - "uid": "WAkugZpiz", - "version": 2 + ], + "refresh": "15s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "", + "multi": false, + "name": "rgw_servers", + "options": [ ], + "query": "label_values(ceph_rgw_req, ceph_daemon)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "15s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RGW Overview", + "uid": "WAkugZpiz", + "version": 0 } diff --git a/monitoring/grafana/dashboards/radosgw-sync-overview.json b/monitoring/grafana/dashboards/radosgw-sync-overview.json index d6cd5e98a5e3c..442da57590652 100644 --- a/monitoring/grafana/dashboards/radosgw-sync-overview.json +++ b/monitoring/grafana/dashboards/radosgw-sync-overview.json @@ -29,6 +29,7 @@ } ] }, + "description": "", "editable": false, "gnetId": null, "graphTooltip": 0, @@ -42,6 +43,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", + "description": "", "fill": 1, "gridPos": { "h": 7, @@ -126,6 +128,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", + "description": "", "fill": 1, "gridPos": { "h": 7, @@ -210,6 +213,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", + "description": "", "fill": 1, "gridPos": { "h": 7, @@ -294,6 +298,7 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", + "description": "", "fill": 1, "gridPos": { "h": 7, @@ -386,9 +391,9 @@ "allValue": null, "current": { }, "datasource": "$datasource", - "hide": 2, + "hide": 0, "includeAll": true, - "label": null, + "label": "", "multi": false, "name": "rgw_servers", "options": [ ], -- 2.39.5