From 15fa97d49d28aebdd57a2eb9276865c4513e0cb2 Mon Sep 17 00:00:00 2001 From: Tatjana Dehler Date: Fri, 19 Aug 2022 12:08:17 +0200 Subject: [PATCH] monitoring/ceph-mixin: add RGW host to label info Add the missing information about the RGW instance to the labels of the "Average GET/PUT Latencies" panel on the "RGW Overview" dashboard. Fixes: https://tracker.ceph.com/issues/57166 Signed-off-by: Tatjana Dehler --- .../ceph-mixin/dashboards/rgw.libsonnet | 24 ++++++++++++------- .../dashboards_out/radosgw-overview.json | 10 ++++---- monitoring/ceph-mixin/jsonnetfile.lock.json | 4 ++-- .../features/radosgw_overview.feature | 8 +++---- 4 files changed, 26 insertions(+), 20 deletions(-) diff --git a/monitoring/ceph-mixin/dashboards/rgw.libsonnet b/monitoring/ceph-mixin/dashboards/rgw.libsonnet index 437eb783f18..892480d1ca0 100644 --- a/monitoring/ceph-mixin/dashboards/rgw.libsonnet +++ b/monitoring/ceph-mixin/dashboards/rgw.libsonnet @@ -257,16 +257,19 @@ local u = import 'utils.libsonnet'; gridPos: { x: 0, y: 0, w: 24, h: 1 }, }, RgwOverviewPanel( - 'Average GET/PUT Latencies', + 'Average GET/PUT Latencies by RGW Instance', '', 's', 'short', ||| - rate(ceph_rgw_get_initial_lat_sum{%(matchers)s}[$__rate_interval]) / - rate(ceph_rgw_get_initial_lat_count{%(matchers)s}[$__rate_interval]) * - on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s} + label_replace( + rate(ceph_rgw_get_initial_lat_sum{%(matchers)s}[$__rate_interval]) / + rate(ceph_rgw_get_initial_lat_count{%(matchers)s}[$__rate_interval]) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s}, + "rgw_host", "$1", "ceph_daemon", "rgw.(.*)" + ) ||| % $.matchers(), - 'GET AVG', + 'GET {{rgw_host}}', 0, 1, 8, @@ -275,11 +278,14 @@ local u = import 'utils.libsonnet'; [ $.addTargetSchema( ||| - rate(ceph_rgw_put_initial_lat_sum{%(matchers)s}[$__rate_interval]) / - rate(ceph_rgw_put_initial_lat_count{%(matchers)s}[$__rate_interval]) * - on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s} + label_replace( + rate(ceph_rgw_put_initial_lat_sum{%(matchers)s}[$__rate_interval]) / + rate(ceph_rgw_put_initial_lat_count{%(matchers)s}[$__rate_interval]) * + on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s}, + "rgw_host", "$1", "ceph_daemon", "rgw.(.*)" + ) ||| % $.matchers(), - 'PUT AVG' + 'PUT {{rgw_host}}' ), ] ), diff --git a/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json b/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json index 4332aac5ee5..77d69e4f315 100644 --- a/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json @@ -99,24 +99,24 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"}\n", + "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", "format": "time_series", "intervalFactor": 1, - "legendFormat": "GET AVG", + "legendFormat": "GET {{rgw_host}}", "refId": "A" }, { - "expr": "rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"}\n", + "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", "format": "time_series", "intervalFactor": 1, - "legendFormat": "PUT AVG", + "legendFormat": "PUT {{rgw_host}}", "refId": "B" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, - "title": "Average GET/PUT Latencies", + "title": "Average GET/PUT Latencies by RGW Instance", "tooltip": { "shared": true, "sort": 0, diff --git a/monitoring/ceph-mixin/jsonnetfile.lock.json b/monitoring/ceph-mixin/jsonnetfile.lock.json index 0430b39fc36..3c9d38d935c 100644 --- a/monitoring/ceph-mixin/jsonnetfile.lock.json +++ b/monitoring/ceph-mixin/jsonnetfile.lock.json @@ -8,8 +8,8 @@ "subdir": "grafonnet" } }, - "version": "3626fc4dc2326931c530861ac5bebe39444f6cbf", - "sum": "gF8foHByYcB25jcUOBqP6jxk0OPifQMjPvKY0HaCk6w=" + "version": "30280196507e0fe6fa978a3e0eaca3a62844f817", + "sum": "342u++/7rViR/zj2jeJOjshzglkZ1SY+hFNuyCBFMdc=" } ], "legacyImports": false diff --git a/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature b/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature index b095392a21b..642e4397878 100644 --- a/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature +++ b/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature @@ -7,9 +7,9 @@ Scenario: "Test Average GET Latencies" | ceph_rgw_get_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 20 60 80 | | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | When interval is `30s` - Then Grafana panel `Average GET/PUT Latencies` with legend `GET AVG` shows: + Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `GET {{rgw_host}}` shows: | metrics | values | - | {ceph_daemon="rgw.foo",instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.5 | + | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1.5 | Scenario: "Test Average PUT Latencies" Given the following series: @@ -18,9 +18,9 @@ Scenario: "Test Average PUT Latencies" | ceph_rgw_put_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 30 50 | | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 | When interval is `30s` - Then Grafana panel `Average GET/PUT Latencies` with legend `PUT AVG` shows: + Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `PUT {{rgw_host}}` shows: | metrics | values | - | {ceph_daemon="rgw.foo",instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 | + | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1 | Scenario: "Test Total Requests/sec by RGW Instance" Given the following series: -- 2.39.5