From 8d83126e5168d03b44ec476de08502e7a12d3bf2 Mon Sep 17 00:00:00 2001 From: Avan Thakkar Date: Thu, 18 Nov 2021 04:56:12 +0530 Subject: [PATCH] mgr/dashboard: introduce HAProxy metrics for RGW Fixes: https://tracker.ceph.com/issues/53311 Signed-off-by: Avan Thakkar --- .../jsonnet/grafana_dashboards.jsonnet | 82 ++- .../grafana/dashboards/radosgw-overview.json | 533 ++++++++++++++++++ .../tests/features/radosgw_overview.feature | 212 +++++++ 3 files changed, 821 insertions(+), 6 deletions(-) create mode 100644 monitoring/grafana/dashboards/tests/features/radosgw_overview.feature diff --git a/monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet b/monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet index b9f5bffaa808a..26a7a3c36edb5 100644 --- a/monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet +++ b/monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet @@ -3,8 +3,8 @@ local g = import 'grafana.libsonnet'; local dashboardSchema(title, description, uid, time_from, refresh, schemaVersion, tags, timezone, timepicker) = g.dashboard.new(title=title, description=description, uid=uid, time_from=time_from, refresh=refresh, schemaVersion=schemaVersion, tags=tags, timezone=timezone, timepicker=timepicker); -local graphPanelSchema(aliasColors, title, description, nullPointMode, stack, formatY1, formatY2, labelY1, labelY2, min, fill, datasource) = - g.graphPanel.new(aliasColors=aliasColors, title=title, description=description, nullPointMode=nullPointMode, stack=stack, formatY1=formatY1, formatY2=formatY2, labelY1=labelY1, labelY2=labelY2, min=min, fill=fill, datasource=datasource); +local graphPanelSchema(aliasColors, title, description, nullPointMode, stack, formatY1, formatY2, labelY1, labelY2, min, fill, datasource, legend_alignAsTable=false, legend_avg=false, legend_min=false, legend_max=false, legend_current=false, legend_values=false) = + g.graphPanel.new(aliasColors=aliasColors, title=title, description=description, nullPointMode=nullPointMode, stack=stack, formatY1=formatY1, formatY2=formatY2, labelY1=labelY1, labelY2=labelY2, min=min, fill=fill, datasource=datasource, legend_alignAsTable=legend_alignAsTable, legend_avg=legend_avg, legend_min=legend_min, legend_max=legend_max, legend_current=legend_current, legend_values=legend_values); local addTargetSchema(expr, intervalFactor, format, legendFormat) = g.prometheus.target(expr=expr, intervalFactor=intervalFactor, format=format, legendFormat=legendFormat); @@ -250,8 +250,8 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt } { "radosgw-overview.json": - local RgwOverviewPanel(title, description, formatY1, formatY2, expr1, legendFormat1, x, y, w, h) = - graphPanelSchema({}, title, description, 'null', false, formatY1, formatY2, null, null, 0, 1, '$datasource') + local RgwOverviewPanel(title, description, formatY1, formatY2, expr1, legendFormat1, x, y, w, h, datasource='$datasource', legend_alignAsTable=false, legend_avg=false, legend_min=false, legend_max=false, legend_current=false, legend_values=false) = + graphPanelSchema({}, title, description, 'null', false, formatY1, formatY2, null, null, 0, 1, datasource, legend_alignAsTable, legend_avg, legend_min, legend_max, legend_current, legend_values) .addTargets( [addTargetSchema(expr1, 1, 'time_series', legendFormat1)]) + {gridPos: {x: x, y: y, w: w, h: h}}; @@ -271,6 +271,12 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt .addTemplate( addTemplateSchema('rgw_servers', '$datasource', 'label_values(ceph_rgw_req, ceph_daemon)', 1, true, 1, '', '') ) + .addTemplate( + addTemplateSchema('code', '$datasource', 'label_values(haproxy_server_http_responses_total{instance=~"$ingress_service"}, code)', 1, true, 1, 'HTTP Code', '') + ) + .addTemplate( + addTemplateSchema('ingress_service', '$datasource', 'label_values(haproxy_server_status, instance)', 1, true, 1, 'Ingress Service', '') + ) .addTemplate( g.template.datasource('datasource', 'prometheus', 'default', label='Data Source') ) @@ -289,8 +295,72 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt RgwOverviewPanel( 'Bandwidth by RGW Instance', 'Total bytes transferred in/out through get/put operations, by radosgw instance', 'bytes', 'short', 'sum by(rgw_host) (\n (label_replace(rate(ceph_rgw_get_b[30s]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")) + \n (label_replace(rate(ceph_rgw_put_b[30s]), \"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\"))\n)', '{{rgw_host}}', 8, 8, 7, 6), RgwOverviewPanel( - 'PUT Latencies by RGW Instance', 'Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts', 's', 'short', 'label_replace(rate(ceph_rgw_put_initial_lat_sum[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_put_initial_lat_count[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")', '{{rgw_host}}', 15, 8, 6, 6) - ]) + 'PUT Latencies by RGW Instance', 'Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts', 's', 'short', 'label_replace(rate(ceph_rgw_put_initial_lat_sum[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\") / \nlabel_replace(rate(ceph_rgw_put_initial_lat_count[30s]),\"rgw_host\",\"$1\",\"ceph_daemon\",\"rgw.(.*)\")', '{{rgw_host}}', 15, 8, 6, 6), + + addRowSchema(false, true, 'RGW Overview - HAProxy Metrics') + {gridPos: {x: 0, y: 12, w: 9, h: 12}}, + RgwOverviewPanel( + 'Total responses by HTTP code', '', 'short', 'short', 'sum(irate(haproxy_frontend_http_responses_total{code=~"$code",instance=~"$ingress_service",proxy=~"frontend"}[5m])) by (code)', 'Frontend {{ code }}', 0, 12, 5, 12, '$datasource', true, true, true, true, true, true) + .addTargets( + [addTargetSchema('sum(irate(haproxy_backend_http_responses_total{code=~"$code",instance=~"$ingress_service",proxy=~"backend"}[5m])) by (code)', 1, 'time_series', 'Backend {{ code }}')]) + .addSeriesOverride([ + { "alias": "/.*Back.*/", + "transform": "negative-Y" }, + { "alias": "/.*1.*/" }, + { "alias": "/.*2.*/" }, + { "alias": "/.*3.*/" }, + { "alias": "/.*4.*/" }, + { "alias": "/.*5.*/" }, + { "alias": "/.*other.*/" } + ]), + RgwOverviewPanel( + 'Total requests / responses', '', 'short', 'short', + 'sum(irate(haproxy_frontend_http_requests_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', 'Requests', 5, 12, 5, 12, '$datasource', true, true, true, true, true, true) + .addTargets( + [addTargetSchema('sum(irate(haproxy_backend_response_errors_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 2, 'time_series', 'Response errors'), + addTargetSchema('sum(irate(haproxy_frontend_request_errors_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', 1, 'time_series', 'Requests errors'), + addTargetSchema('sum(irate(haproxy_backend_redispatch_warnings_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 2, 'time_series', 'Backend redispatch'), + addTargetSchema('sum(irate(haproxy_backend_retry_warnings_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 2, 'time_series', 'Backend retry'), + addTargetSchema('sum(irate(haproxy_frontend_requests_denied_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', 2, 'time_series', 'Request denied'), + addTargetSchema('sum(haproxy_backend_current_queue{proxy=~"backend",instance=~"$ingress_service"}) by (instance)', 2, 'time_series', 'Backend Queued'), + ]) + .addSeriesOverride([ + { + "alias": "/.*Response.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*Backend.*/", + "transform": "negative-Y" + } + ]), + RgwOverviewPanel( + 'Total number of connections', '', 'short', 'short', + 'sum(irate(haproxy_frontend_connections_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])) by (instance)', 'Front', 10, 12, 5, 12, '$datasource', true, true, true, true, true, true) + .addTargets( + [addTargetSchema('sum(irate(haproxy_backend_connection_attempts_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 1, 'time_series', 'Back'), + addTargetSchema('sum(irate(haproxy_backend_connection_errors_total{proxy=~"backend",instance=~"$ingress_service"}[5m])) by (instance)', 1, 'time_series', 'Back errors'), + ]) + .addSeriesOverride([ + { + "alias": "/.*Back.*/", + "transform": "negative-Y" + } + ]), + RgwOverviewPanel( + 'Current total of incoming / outgoing bytes', '', 'short', 'short', + 'sum(irate(haproxy_frontend_bytes_in_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])*8) by (instance)', 'IN Front', 15, 12, 6, 12, '$datasource', true, true, true, true, true, true) + .addTargets( + [addTargetSchema('sum(irate(haproxy_frontend_bytes_out_total{proxy=~"frontend",instance=~"$ingress_service"}[5m])*8) by (instance)', 2, 'time_series', 'OUT Front'), + addTargetSchema('sum(irate(haproxy_backend_bytes_in_total{proxy=~"backend",instance=~"$ingress_service"}[5m])*8) by (instance)', 2, 'time_series', 'IN Back'), + addTargetSchema('sum(irate(haproxy_backend_bytes_out_total{proxy=~"backend",instance=~"$ingress_service"}[5m])*8) by (instance)', 2, 'time_series', 'OUT Back') + ]) + .addSeriesOverride([ + { + "alias": "/.*OUT.*/", + "transform": "negative-Y" + } + ]) + ]) } { "radosgw-detail.json": diff --git a/monitoring/grafana/dashboards/radosgw-overview.json b/monitoring/grafana/dashboards/radosgw-overview.json index f996fed95e6e6..489f29a2fc783 100644 --- a/monitoring/grafana/dashboards/radosgw-overview.json +++ b/monitoring/grafana/dashboards/radosgw-overview.json @@ -579,6 +579,499 @@ "show": true } ] + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 12, + "w": 9, + "x": 0, + "y": 12 + }, + "id": 9, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Overview - HAProxy Metrics", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "gridPos": { + "h": 12, + "w": 5, + "x": 0, + "y": 12 + }, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + [ + { + "alias": "/.*Back.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*1.*/" + }, + { + "alias": "/.*2.*/" + }, + { + "alias": "/.*3.*/" + }, + { + "alias": "/.*4.*/" + }, + { + "alias": "/.*5.*/" + }, + { + "alias": "/.*other.*/" + } + ] + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(haproxy_frontend_http_responses_total{code=~\"$code\",instance=~\"$ingress_service\",proxy=~\"frontend\"}[5m])) by (code)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Frontend {{ code }}", + "refId": "A" + }, + { + "expr": "sum(irate(haproxy_backend_http_responses_total{code=~\"$code\",instance=~\"$ingress_service\",proxy=~\"backend\"}[5m])) by (code)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Backend {{ code }}", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Total responses by HTTP code", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "gridPos": { + "h": 12, + "w": 5, + "x": 5, + "y": 12 + }, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + [ + { + "alias": "/.*Response.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*Backend.*/", + "transform": "negative-Y" + } + ] + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(haproxy_frontend_http_requests_total{proxy=~\"frontend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Requests", + "refId": "A" + }, + { + "expr": "sum(irate(haproxy_backend_response_errors_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Response errors", + "refId": "B" + }, + { + "expr": "sum(irate(haproxy_frontend_request_errors_total{proxy=~\"frontend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Requests errors", + "refId": "C" + }, + { + "expr": "sum(irate(haproxy_backend_redispatch_warnings_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Backend redispatch", + "refId": "D" + }, + { + "expr": "sum(irate(haproxy_backend_retry_warnings_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Backend retry", + "refId": "E" + }, + { + "expr": "sum(irate(haproxy_frontend_requests_denied_total{proxy=~\"frontend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Request denied", + "refId": "F" + }, + { + "expr": "sum(haproxy_backend_current_queue{proxy=~\"backend\",instance=~\"$ingress_service\"}) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Backend Queued", + "refId": "G" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Total requests / responses", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "gridPos": { + "h": 12, + "w": 5, + "x": 10, + "y": 12 + }, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + [ + { + "alias": "/.*Back.*/", + "transform": "negative-Y" + } + ] + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(haproxy_frontend_connections_total{proxy=~\"frontend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Front", + "refId": "A" + }, + { + "expr": "sum(irate(haproxy_backend_connection_attempts_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Back", + "refId": "B" + }, + { + "expr": "sum(irate(haproxy_backend_connection_errors_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m])) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Back errors", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Total number of connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "gridPos": { + "h": 12, + "w": 6, + "x": 15, + "y": 12 + }, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + [ + { + "alias": "/.*OUT.*/", + "transform": "negative-Y" + } + ] + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(haproxy_frontend_bytes_in_total{proxy=~\"frontend\",instance=~\"$ingress_service\"}[5m])*8) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "IN Front", + "refId": "A" + }, + { + "expr": "sum(irate(haproxy_frontend_bytes_out_total{proxy=~\"frontend\",instance=~\"$ingress_service\"}[5m])*8) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "OUT Front", + "refId": "B" + }, + { + "expr": "sum(irate(haproxy_backend_bytes_in_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m])*8) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "IN Back", + "refId": "C" + }, + { + "expr": "sum(irate(haproxy_backend_bytes_out_total{proxy=~\"backend\",instance=~\"$ingress_service\"}[5m])*8) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "OUT Back", + "refId": "D" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Current total of incoming / outgoing bytes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] } ], "refresh": "15s", @@ -610,6 +1103,46 @@ "type": "query", "useTags": false }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "HTTP Code", + "multi": false, + "name": "code", + "options": [ ], + "query": "label_values(haproxy_server_http_responses_total{instance=~\"$ingress_service\"}, code)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "Ingress Service", + "multi": false, + "name": "ingress_service", + "options": [ ], + "query": "label_values(haproxy_server_status, instance)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, { "current": { "text": "default", diff --git a/monitoring/grafana/dashboards/tests/features/radosgw_overview.feature b/monitoring/grafana/dashboards/tests/features/radosgw_overview.feature new file mode 100644 index 0000000000000..b77d56616bd9d --- /dev/null +++ b/monitoring/grafana/dashboards/tests/features/radosgw_overview.feature @@ -0,0 +1,212 @@ +Feature: RGW Overview Dashboard + +Scenario: "Test Average GET Latencies" + Given the following series: + | metrics | values | + | ceph_rgw_get_initial_lat_sum{ceph_daemon="rgw.foo",instance="127.0.0.1", job="ceph"} | 10 50 100 | + | ceph_rgw_get_initial_lat_count{ceph_daemon="rgw.foo", instance="127.0.0.1", job="ceph"} | 20 60 80 | + When interval is `30s` + Then Grafana panel `Average GET/PUT Latencies` with legend `GET AVG` shows: + | metrics | values | + | {ceph_daemon="rgw.foo",instance="127.0.0.1", job="ceph"} | 2.5000000000000004 | + +Scenario: "Test Average PUT Latencies" + Given the following series: + | metrics | values | + | ceph_rgw_put_initial_lat_sum{ceph_daemon="rgw.foo",instance="127.0.0.1", job="ceph"} | 15 35 55 | + | ceph_rgw_put_initial_lat_count{ceph_daemon="rgw.foo", instance="127.0.0.1", job="ceph"} | 10 30 50 | + When interval is `30s` + Then Grafana panel `Average GET/PUT Latencies` with legend `PUT AVG` shows: + | metrics | values | + | {ceph_daemon="rgw.foo",instance="127.0.0.1", job="ceph"} | 1 | + +Scenario: "Test Total Requests/sec by RGW Instance" + Given the following series: + | metrics | values | + | ceph_rgw_req{ceph_daemon="rgw.1",instance="127.0.0.1",job="ceph"} | 10 50 100 | + When interval is `30s` + Then Grafana panel `Total Requests/sec by RGW Instance` with legend `{{rgw_host}}` shows: + | metrics | values | + | {rgw_host="1"} | 1.6666666666666667 | + +Scenario: "Test Bandwidth Consumed by Type- GET" + Given the following series: + | metrics | values | + | ceph_rgw_get_b{ceph_daemon="rgw.1",instance="127.0.0.1",job="ceph"} | 10 50 100 | + When evaluation time is `1m` + And interval is `30s` + Then Grafana panel `Bandwidth Consumed by Type` with legend `GETs` shows: + | metrics | values | + | {} | 1.6666666666666667 | + +Scenario: "Test Bandwidth Consumed by Type- PUT" + Given the following series: + | metrics | values | + | ceph_rgw_put_b{ceph_daemon="rgw.1",instance="127.0.0.1",job="ceph"} | 5 20 50 | + When evaluation time is `1m` + And interval is `30s` + Then Grafana panel `Bandwidth Consumed by Type` with legend `PUTs` shows: + | metrics | values | + | {} | 1 | + +Scenario: "Test Total backend responses by HTTP code" + Given the following series: + | metrics | values | + | haproxy_backend_http_responses_total{code="200",instance="ingress.rgw.1",proxy="backend"} | 10 100 | + | haproxy_backend_http_responses_total{code="404",instance="ingress.rgw.1",proxy="backend"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + When variable `code` is `200` + Then Grafana panel `Total responses by HTTP code` with legend `Backend {{ code }}` shows: + | metrics | values | + | {code="200"} | 1.5 | + +Scenario: "Test Total frontend responses by HTTP code" + Given the following series: + | metrics | values | + | haproxy_frontend_http_responses_total{code="200",instance="ingress.rgw.1",proxy="frontend"} | 10 100 | + | haproxy_frontend_http_responses_total{code="404",instance="ingress.rgw.1",proxy="frontend"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + When variable `code` is `200` + Then Grafana panel `Total responses by HTTP code` with legend `Frontend {{ code }}` shows: + | metrics | values | + | {code="200"} | 1.5 | + +Scenario: "Test Total http frontend requests by instance" + Given the following series: + | metrics | values | + | haproxy_frontend_http_requests_total{proxy="frontend",instance="ingress.rgw.1"} | 10 100 | + | haproxy_frontend_http_requests_total{proxy="frontend",instance="ingress.rgw.1"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + Then Grafana panel `Total requests / responses` with legend `Requests` shows: + | metrics | values | + | {instance="ingress.rgw.1"} | 3 | + +Scenario: "Test Total backend response errors by instance" + Given the following series: + | metrics | values | + | haproxy_backend_response_errors_total{proxy="backend",instance="ingress.rgw.1"} | 10 100 | + | haproxy_backend_response_errors_total{proxy="backend",instance="ingress.rgw.1"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + Then Grafana panel `Total requests / responses` with legend `Response errors` shows: + | metrics | values | + | {instance="ingress.rgw.1"} | 3 | + +Scenario: "Test Total frontend requests errors by instance" + Given the following series: + | metrics | values | + | haproxy_frontend_request_errors_total{proxy="frontend",instance="ingress.rgw.1"} | 10 100 | + | haproxy_frontend_request_errors_total{proxy="frontend",instance="ingress.rgw.1"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + Then Grafana panel `Total requests / responses` with legend `Requests errors` shows: + | metrics | values | + | {instance="ingress.rgw.1"} | 3 | + +Scenario: "Test Total backend redispatch warnings by instance" + Given the following series: + | metrics | values | + | haproxy_backend_redispatch_warnings_total{proxy="backend",instance="ingress.rgw.1"} | 10 100 | + | haproxy_backend_redispatch_warnings_total{proxy="backend",instance="ingress.rgw.1"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + Then Grafana panel `Total requests / responses` with legend `Backend redispatch` shows: + | metrics | values | + | {instance="ingress.rgw.1"} | 3 | + +Scenario: "Test Total backend retry warnings by instance" + Given the following series: + | metrics | values | + | haproxy_backend_retry_warnings_total{proxy="backend",instance="ingress.rgw.1"} | 10 100 | + | haproxy_backend_retry_warnings_total{proxy="backend",instance="ingress.rgw.1"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + Then Grafana panel `Total requests / responses` with legend `Backend retry` shows: + | metrics | values | + | {instance="ingress.rgw.1"} | 3 | + +Scenario: "Test Total frontend requests denied by instance" + Given the following series: + | metrics | values | + | haproxy_frontend_requests_denied_total{proxy="frontend",instance="ingress.rgw.1"} | 10 100 | + | haproxy_frontend_requests_denied_total{proxy="frontend",instance="ingress.rgw.1"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + Then Grafana panel `Total requests / responses` with legend `Request denied` shows: + | metrics | values | + | {instance="ingress.rgw.1"} | 3 | + +Scenario: "Test Total backend current queue by instance" + Given the following series: + | metrics | values | + | haproxy_backend_current_queue{proxy="backend",instance="ingress.rgw.1"} | 10 100 | + | haproxy_backend_current_queue{proxy="backend",instance="ingress.rgw.1"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + Then Grafana panel `Total requests / responses` with legend `Backend Queued` shows: + | metrics | values | + | {instance="ingress.rgw.1"} | 200 | + +Scenario: "Test Total frontend connections by instance" + Given the following series: + | metrics | values | + | haproxy_frontend_connections_total{proxy="frontend",instance="ingress.rgw.1"} | 10 100 | + | haproxy_frontend_connections_total{proxy="frontend",instance="ingress.rgw.1"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + Then Grafana panel `Total number of connections` with legend `Front` shows: + | metrics | values | + | {instance="ingress.rgw.1"} | 3 | + +Scenario: "Test Total backend connections attempts by instance" + Given the following series: + | metrics | values | + | haproxy_backend_connection_attempts_total{proxy="backend",instance="ingress.rgw.1"} | 10 100 | + | haproxy_backend_connection_attempts_total{proxy="backend",instance="ingress.rgw.1"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + Then Grafana panel `Total number of connections` with legend `Back` shows: + | metrics | values | + | {instance="ingress.rgw.1"} | 3 | + +Scenario: "Test Total backend connections error by instance" + Given the following series: + | metrics | values | + | haproxy_backend_connection_errors_total{proxy="backend",instance="ingress.rgw.1"} | 10 100 | + | haproxy_backend_connection_errors_total{proxy="backend",instance="ingress.rgw.1"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + Then Grafana panel `Total number of connections` with legend `Back errors` shows: + | metrics | values | + | {instance="ingress.rgw.1"} | 3 | + +Scenario: "Test Total frontend bytes incoming by instance" + Given the following series: + | metrics | values | + | haproxy_frontend_bytes_in_total{proxy="frontend",instance="ingress.rgw.1"} | 10 100 | + | haproxy_frontend_bytes_in_total{proxy="frontend",instance="ingress.rgw.1"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + Then Grafana panel `Current total of incoming / outgoing bytes` with legend `IN Front` shows: + | metrics | values | + | {instance="ingress.rgw.1"} | 24 | + +Scenario: "Test Total frontend bytes outgoing by instance" + Given the following series: + | metrics | values | + | haproxy_frontend_bytes_out_total{proxy="frontend",instance="ingress.rgw.1"} | 10 100 | + | haproxy_frontend_bytes_out_total{proxy="frontend",instance="ingress.rgw.1"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + Then Grafana panel `Current total of incoming / outgoing bytes` with legend `OUT Front` shows: + | metrics | values | + | {instance="ingress.rgw.1"} | 24 | + +Scenario: "Test Total backend bytes incoming by instance" + Given the following series: + | metrics | values | + | haproxy_backend_bytes_in_total{proxy="backend",instance="ingress.rgw.1"} | 10 100 | + | haproxy_backend_bytes_in_total{proxy="backend",instance="ingress.rgw.1"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + Then Grafana panel `Current total of incoming / outgoing bytes` with legend `IN Back` shows: + | metrics | values | + | {instance="ingress.rgw.1"} | 24 | + +Scenario: "Test Total backend bytes outgoing by instance" + Given the following series: + | metrics | values | + | haproxy_backend_bytes_out_total{proxy="backend",instance="ingress.rgw.1"} | 10 100 | + | haproxy_backend_bytes_out_total{proxy="backend",instance="ingress.rgw.1"} | 20 200 | + When variable `ingress_service` is `ingress.rgw.1` + Then Grafana panel `Current total of incoming / outgoing bytes` with legend `OUT Back` shows: + | metrics | values | + | {instance="ingress.rgw.1"} | 24 | -- 2.39.5