]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/dashboard: Add ceph_daemon filter to rgw overview grafana panel 62268/head
authorAashish Sharma <aasharma@li-e74156cc-2f67-11b2-a85c-e98659a63c5c.ibm.com>
Thu, 28 Nov 2024 05:58:59 +0000 (11:28 +0530)
committerAashish Sharma <Aashish.Sharma1@ibm.com>
Thu, 13 Mar 2025 05:45:45 +0000 (11:15 +0530)
queries

Currently rgw_servers filtering is not working in RGW Overview garfana graphs.
It is showing data of all the RGW services, even though filter set to single service.
This PR intends to solve this issue

Fixes: https://tracker.ceph.com/issues/69074
Signed-off-by: Aashish Sharma <aasharma@redhat.com>
(cherry picked from commit 666f8faf11c0a639ce6ead06026850dd72a14d41)

Conflicts:
monitoring/ceph-mixin/dashboards/rgw.libsonnet (conflicts with
metric names like ceph_rgw_put_initial_lat_sum)
monitoring/ceph-mixin/dashboards_out/radosgw-overview.json
(conflicts with metrics names like ceph_rgw_put_initial_lat_sum)

monitoring/ceph-mixin/dashboards/rgw.libsonnet
monitoring/ceph-mixin/dashboards_out/radosgw-overview.json
monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature

index 557f1ddccb254581ba16951b2a7c100352cea55f..1e2533ecc4e2ef8738b5e288021d22ad64bf385c 100644 (file)
@@ -265,7 +265,7 @@ local u = import 'utils.libsonnet';
           label_replace(
             rate(ceph_rgw_get_initial_lat_sum{%(matchers)s}[$__rate_interval]) /
               rate(ceph_rgw_get_initial_lat_count{%(matchers)s}[$__rate_interval]) *
-              on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+              on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
             "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
           )
         ||| % $.matchers(),
@@ -281,7 +281,7 @@ local u = import 'utils.libsonnet';
               label_replace(
                 rate(ceph_rgw_put_initial_lat_sum{%(matchers)s}[$__rate_interval]) /
                   rate(ceph_rgw_put_initial_lat_count{%(matchers)s}[$__rate_interval]) *
-                  on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+                  on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
                 "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
               )
             ||| % $.matchers(),
@@ -298,7 +298,7 @@ local u = import 'utils.libsonnet';
           sum by (rgw_host) (
             label_replace(
               rate(ceph_rgw_req{%(matchers)s}[$__rate_interval]) *
-                on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+                on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
               "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
             )
           )
@@ -318,7 +318,7 @@ local u = import 'utils.libsonnet';
           label_replace(
             rate(ceph_rgw_get_initial_lat_sum{%(matchers)s}[$__rate_interval]) /
               rate(ceph_rgw_get_initial_lat_count{%(matchers)s}[$__rate_interval]) *
-              on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+              on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
             "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
           )
         ||| % $.matchers(),
@@ -352,7 +352,7 @@ local u = import 'utils.libsonnet';
           label_replace(sum by (instance_id) (
             rate(ceph_rgw_get_b{%(matchers)s}[$__rate_interval]) +
               rate(ceph_rgw_put_b{%(matchers)s}[$__rate_interval])) *
-              on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
+              on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
             "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
           )
         ||| % $.matchers(),
index 92e416764e7c01ad2730edfd61b69bb42f129425..8d83bf8bbc38c5e17703e5f0ba7163d7e91a1083 100644 (file)
          "steppedLine": false,
          "targets": [
             {
-               "expr": "label_replace(\n  rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n    rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n    on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n  \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+               "expr": "label_replace(\n  rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n    rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n    on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", job=~\"$job\"},\n  \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
                "format": "time_series",
                "intervalFactor": 1,
                "legendFormat": "GET {{rgw_host}}",
                "refId": "A"
             },
             {
-               "expr": "label_replace(\n  rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n    rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n    on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n  \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+               "expr": "label_replace(\n  rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n    rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n    on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", job=~\"$job\"},\n  \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
                "format": "time_series",
                "intervalFactor": 1,
                "legendFormat": "PUT {{rgw_host}}",
          "steppedLine": false,
          "targets": [
             {
-               "expr": "sum by (rgw_host) (\n  label_replace(\n    rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) *\n      on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n    \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n  )\n)\n",
+               "expr": "sum by (rgw_host) (\n  label_replace(\n    rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) *\n      on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", job=~\"$job\"},\n    \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n  )\n)\n",
                "format": "time_series",
                "intervalFactor": 1,
                "legendFormat": "{{rgw_host}}",
          "steppedLine": false,
          "targets": [
             {
-               "expr": "label_replace(\n  rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n    rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n    on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n  \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+               "expr": "label_replace(\n  rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n    rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n    on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", job=~\"$job\"},\n  \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
                "format": "time_series",
                "intervalFactor": 1,
                "legendFormat": "{{rgw_host}}",
          "steppedLine": false,
          "targets": [
             {
-               "expr": "label_replace(sum by (instance_id) (\n  rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) +\n    rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval])) *\n    on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n  \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+               "expr": "label_replace(sum by (instance_id) (\n  rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) +\n    rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval])) *\n    on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", job=~\"$job\"},\n  \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
                "format": "time_series",
                "intervalFactor": 1,
                "legendFormat": "{{rgw_host}}",
          "steppedLine": false,
          "targets": [
             {
-               "expr": "label_replace(\n  rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n    rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n    on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n  \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
+               "expr": "label_replace(\n  rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n    rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n    on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", job=~\"$job\"},\n  \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
                "format": "time_series",
                "intervalFactor": 1,
                "legendFormat": "{{rgw_host}}",
index 642e4397878287cd8a030cc44c1776b3a3009b26..c1527a32708ac686c244ba84e70970c64357ce4e 100644 (file)
@@ -7,6 +7,7 @@ Scenario: "Test Average GET Latencies"
     | ceph_rgw_get_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 20 60 80 |
     | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
   When interval is `30s`
+  And variable `rgw_servers` is `rgw.foo`
   Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `GET {{rgw_host}}` shows:
     | metrics | values |
     | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1.5 |
@@ -18,6 +19,7 @@ Scenario: "Test Average PUT Latencies"
     | ceph_rgw_put_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 30 50 |
     | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
   When interval is `30s`
+  And variable `rgw_servers` is `rgw.foo`
   Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `PUT {{rgw_host}}` shows:
     | metrics | values |
     | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1 |
@@ -28,6 +30,7 @@ Scenario: "Test Total Requests/sec by RGW Instance"
     | ceph_rgw_req{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 |
     | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 |
   When interval is `30s`
+  And variable `rgw_servers` is `rgw.1`
   Then Grafana panel `Total Requests/sec by RGW Instance` with legend `{{rgw_host}}` shows:
     | metrics | values |
     | {rgw_host="1"} | 1.5 |
@@ -39,6 +42,7 @@ Scenario: "Test GET Latencies by RGW Instance"
     | ceph_rgw_get_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 20 60 80 |
     | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
   When interval is `30s`
+  And variable `rgw_servers` is `rgw.foo`
   Then Grafana panel `GET Latencies by RGW Instance` with legend `{{rgw_host}}` shows:
     | metrics | values |
     | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1.5 |
@@ -71,6 +75,7 @@ Scenario: "Test Bandwidth by RGW Instance"
     | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 |
   When evaluation time is `1m`
   And interval is `30s`
+  And variable `rgw_servers` is `rgw.1`
   Then Grafana panel `Bandwidth by RGW Instance` with legend `{{rgw_host}}` shows:
     | metrics | values |
     | {ceph_daemon="rgw.1", instance_id="92806566", rgw_host="1"} | 2.25 |
@@ -83,6 +88,7 @@ Scenario: "Test PUT Latencies by RGW Instance"
     | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
   When evaluation time is `1m`
   And interval is `30s`
+  And variable `rgw_servers` is `rgw.foo`
   Then Grafana panel `PUT Latencies by RGW Instance` with legend `{{rgw_host}}` shows:
     | metrics | values |
     | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1 |