From 0dcee7816d23fc6b8a6adb4e791e03e0bc10a186 Mon Sep 17 00:00:00 2001 From: Arthur Outhenin-Chalandre Date: Tue, 17 May 2022 09:41:21 +0200 Subject: [PATCH] ceph-mixin: don't add cluster matcher if showcluster is disabled Signed-off-by: Arthur Outhenin-Chalandre (cherry picked from commit 3b6356c8724ee2b299743d20ff5df0401181228b) --- .../ceph-mixin/dashboards/utils.libsonnet | 7 +-- .../dashboards_out/cephfs-overview.json | 10 ++-- .../dashboards_out/host-details.json | 18 +++---- .../dashboards_out/hosts-overview.json | 14 +++--- .../dashboards_out/osd-device-details.json | 30 +++++------ .../dashboards_out/osds-overview.json | 50 +++++++++---------- .../dashboards_out/pool-detail.json | 20 ++++---- .../dashboards_out/pool-overview.json | 48 +++++++++--------- .../dashboards_out/radosgw-detail.json | 28 +++++------ .../dashboards_out/radosgw-overview.json | 20 ++++---- .../dashboards_out/radosgw-sync-overview.json | 12 ++--- .../dashboards_out/rbd-details.json | 14 +++--- .../dashboards_out/rbd-overview.json | 20 ++++---- 13 files changed, 146 insertions(+), 145 deletions(-) diff --git a/monitoring/ceph-mixin/dashboards/utils.libsonnet b/monitoring/ceph-mixin/dashboards/utils.libsonnet index d0c007db142e4..f9468cce745a0 100644 --- a/monitoring/ceph-mixin/dashboards/utils.libsonnet +++ b/monitoring/ceph-mixin/dashboards/utils.libsonnet @@ -185,15 +185,16 @@ local g = import 'grafonnet/grafana.libsonnet'; { // Common labels jobMatcher: jobMatcher, - clusterMatcher: clusterMatcher, - matchers: '%s, %s' % [jobMatcher, clusterMatcher], + clusterMatcher: (if $._config.showMultiCluster then clusterMatcher else ''), + matchers: jobMatcher + + (if $._config.showMultiCluster then ', ' + clusterMatcher else ''), }, addClusterTemplate():: $.addTemplateSchema( 'cluster', '$datasource', - 'label_values(ceph_osd_metadata, cluster)', + 'label_values(ceph_osd_metadata, %s)' % $._config.clusterLabel, 1, true, 1, diff --git a/monitoring/ceph-mixin/dashboards_out/cephfs-overview.json b/monitoring/ceph-mixin/dashboards_out/cephfs-overview.json index 79dd870daa5a6..d76f92ae32a55 100644 --- a/monitoring/ceph-mixin/dashboards_out/cephfs-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/cephfs-overview.json @@ -104,14 +104,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(ceph_objecter_op_r{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))", + "expr": "sum(rate(ceph_objecter_op_r{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Read Ops", "refId": "A" }, { - "expr": "sum(rate(ceph_objecter_op_w{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))", + "expr": "sum(rate(ceph_objecter_op_w{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Write Ops", @@ -197,7 +197,7 @@ "steppedLine": false, "targets": [ { - "expr": "ceph_mds_server_handle_client_request{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"($mds_servers).*\"}", + "expr": "ceph_mds_server_handle_client_request{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{ceph_daemon}}", @@ -294,7 +294,7 @@ "multi": true, "name": "job", "options": [ ], - "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)", + "query": "label_values(ceph_osd_metadata{}, job)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -314,7 +314,7 @@ "multi": false, "name": "mds_servers", "options": [ ], - "query": "label_values(ceph_mds_inodes{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)", + "query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)", "refresh": 1, "regex": "", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/host-details.json b/monitoring/ceph-mixin/dashboards_out/host-details.json index defb80f207ebe..4a972e772b6b3 100644 --- a/monitoring/ceph-mixin/dashboards_out/host-details.json +++ b/monitoring/ceph-mixin/dashboards_out/host-details.json @@ -123,7 +123,7 @@ "tableColumn": "", "targets": [ { - "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{job=~\"$job\", cluster=~\"$cluster\", hostname='$ceph_hosts'}))", + "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{job=~\"$job\", hostname='$ceph_hosts'}))", "format": "time_series", "intervalFactor": 1, "legendFormat": "", @@ -615,7 +615,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(\n ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} and\n on (ceph_daemon) ceph_disk_occupation{job=~\"$job\", cluster=~\"$cluster\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}\n)\n", + "expr": "sum(\n ceph_osd_stat_bytes{job=~\"$job\"} and\n on (ceph_daemon) ceph_disk_occupation{job=~\"$job\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "", @@ -800,14 +800,14 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n (\n rate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n (\n rate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}}) writes", "refId": "A" }, { - "expr": "label_replace(\n (\n rate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\"},\"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n (\n rate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\"},\"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}}) reads", @@ -898,14 +898,14 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n (\n rate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n", + "expr": "label_replace(\n (\n rate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}}) write", "refId": "A" }, { - "expr": "label_replace(\n (\n rate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n", + "expr": "label_replace(\n (\n rate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}}) read", @@ -1077,7 +1077,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n (\n (rate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) / 10) or\n rate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) * 100\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n (\n (rate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) / 10) or\n rate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) * 100\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}}({{ceph_daemon}})", @@ -1175,7 +1175,7 @@ "multi": true, "name": "job", "options": [ ], - "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)", + "query": "label_values(ceph_osd_metadata{}, job)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -1195,7 +1195,7 @@ "multi": false, "name": "ceph_hosts", "options": [ ], - "query": "label_values({cluster=~\"$cluster\"}, instance)", + "query": "label_values({}, instance)", "refresh": 1, "regex": "([^.:]*).*", "sort": 3, diff --git a/monitoring/ceph-mixin/dashboards_out/hosts-overview.json b/monitoring/ceph-mixin/dashboards_out/hosts-overview.json index 89f65a0d350f0..61996aac77730 100644 --- a/monitoring/ceph-mixin/dashboards_out/hosts-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/hosts-overview.json @@ -104,7 +104,7 @@ "tableColumn": "", "targets": [ { - "expr": "count(sum by (hostname) (ceph_osd_metadata{job=~\"$job\", cluster=~\"$cluster\"}))", + "expr": "count(sum by (hostname) (ceph_osd_metadata{job=~\"$job\"}))", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -436,7 +436,7 @@ "tableColumn": "", "targets": [ { - "expr": "avg (\n label_replace(\n (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or\n (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", instance=~\"($osd_hosts).*\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n", + "expr": "avg (\n label_replace(\n (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or\n (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", instance=~\"($osd_hosts).*\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -766,7 +766,7 @@ "multi": true, "name": "job", "options": [ ], - "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)", + "query": "label_values(ceph_osd_metadata{}, job)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -786,7 +786,7 @@ "multi": false, "name": "osd_hosts", "options": [ ], - "query": "label_values(ceph_disk_occupation{job=~\"$job\", cluster=~\"$cluster\"}, exported_instance)", + "query": "label_values(ceph_disk_occupation{job=~\"$job\"}, exported_instance)", "refresh": 1, "regex": "([^.]*).*", "sort": 1, @@ -806,7 +806,7 @@ "multi": false, "name": "mon_hosts", "options": [ ], - "query": "label_values(ceph_mon_metadata{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)", + "query": "label_values(ceph_mon_metadata{job=~\"$job\"}, ceph_daemon)", "refresh": 1, "regex": "mon.(.*)", "sort": 1, @@ -826,7 +826,7 @@ "multi": false, "name": "mds_hosts", "options": [ ], - "query": "label_values(ceph_mds_inodes{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)", + "query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)", "refresh": 1, "regex": "mds.(.*)", "sort": 1, @@ -846,7 +846,7 @@ "multi": false, "name": "rgw_hosts", "options": [ ], - "query": "label_values(ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)", + "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", "refresh": 1, "regex": "rgw.(.*)", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/osd-device-details.json b/monitoring/ceph-mixin/dashboards_out/osd-device-details.json index 8406b3451832e..822733fbba546 100644 --- a/monitoring/ceph-mixin/dashboards_out/osd-device-details.json +++ b/monitoring/ceph-mixin/dashboards_out/osd-device-details.json @@ -104,14 +104,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_osd_op_r_latency_sum{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n", + "expr": "rate(ceph_osd_op_r_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "read", "refId": "A" }, { - "expr": "rate(ceph_osd_op_w_latency_sum{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n", + "expr": "rate(ceph_osd_op_w_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "write", @@ -202,14 +202,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_osd_op_r{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "expr": "rate(ceph_osd_op_r{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "Reads", "refId": "A" }, { - "expr": "rate(ceph_osd_op_w{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "expr": "rate(ceph_osd_op_w{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "Writes", @@ -300,14 +300,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_osd_op_r_out_bytes{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "expr": "rate(ceph_osd_op_r_out_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "Read Bytes", "refId": "A" }, { - "expr": "rate(ceph_osd_op_w_in_bytes{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "expr": "rate(ceph_osd_op_w_in_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "Write Bytes", @@ -417,14 +417,14 @@ "steppedLine": false, "targets": [ { - "expr": "(\n label_replace(\n rate(node_disk_read_time_seconds_total{cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(node_disk_reads_completed_total{cluster=~\"$cluster\"}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n", + "expr": "(\n label_replace(\n rate(node_disk_read_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}/{{device}} Reads", "refId": "A" }, { - "expr": "(\n label_replace(\n rate(node_disk_write_time_seconds_total{cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(node_disk_writes_completed_total{cluster=~\"$cluster\"}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n", + "expr": "(\n label_replace(\n rate(node_disk_write_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}/{{device}} Writes", @@ -515,14 +515,14 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n rate(node_disk_writes_completed_total{cluster=~\"$cluster\"}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} on {{instance}} Writes", "refId": "A" }, { - "expr": "label_replace(\n rate(node_disk_reads_completed_total{cluster=~\"$cluster\"}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} on {{instance}} Reads", @@ -613,14 +613,14 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n rate(node_disk_read_bytes_total{cluster=~\"$cluster\"}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n rate(node_disk_read_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}} {{device}} Reads", "refId": "A" }, { - "expr": "label_replace(\n rate(node_disk_written_bytes_total{cluster=~\"$cluster\"}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n rate(node_disk_written_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}} {{device}} Writes", @@ -706,7 +706,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n rate(node_disk_io_time_seconds_total{cluster=~\"$cluster\"}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "expr": "label_replace(\n rate(node_disk_io_time_seconds_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device}} on {{instance}}", @@ -803,7 +803,7 @@ "multi": true, "name": "job", "options": [ ], - "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)", + "query": "label_values(ceph_osd_metadata{}, job)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -823,7 +823,7 @@ "multi": false, "name": "osd", "options": [ ], - "query": "label_values(ceph_osd_metadata{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)", + "query": "label_values(ceph_osd_metadata{job=~\"$job\"}, ceph_daemon)", "refresh": 1, "regex": "(.*)", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/osds-overview.json b/monitoring/ceph-mixin/dashboards_out/osds-overview.json index 16e94ec8851de..0c5f968a950df 100644 --- a/monitoring/ceph-mixin/dashboards_out/osds-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/osds-overview.json @@ -94,21 +94,21 @@ "steppedLine": false, "targets": [ { - "expr": "avg (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) * 1000\n)\n", + "expr": "avg (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "AVG read", "refId": "A" }, { - "expr": "max(\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) * 1000\n)\n", + "expr": "max(\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "MAX read", "refId": "B" }, { - "expr": "quantile(0.95,\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n * 1000\n )\n)\n", + "expr": "quantile(0.95,\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n )\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "@95%ile", @@ -222,7 +222,7 @@ ], "targets": [ { - "expr": "topk(10,\n (sort(\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n 1000\n )\n ))\n)\n", + "expr": "topk(10,\n (sort(\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n )\n ))\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -281,21 +281,21 @@ "steppedLine": false, "targets": [ { - "expr": "avg(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n * 1000\n)\n", + "expr": "avg(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "AVG write", "refId": "A" }, { - "expr": "max(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n 1000\n)\n", + "expr": "max(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "MAX write", "refId": "B" }, { - "expr": "quantile(0.95, (\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n 1000\n))\n", + "expr": "quantile(0.95, (\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "@95%ile write", @@ -409,7 +409,7 @@ ], "targets": [ { - "expr": "topk(10,\n (sort(\n (rate(ceph_osd_op_w_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n 1000)\n ))\n)\n", + "expr": "topk(10,\n (sort(\n (rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000)\n ))\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -443,7 +443,7 @@ "pieType": "pie", "targets": [ { - "expr": "count by (device_class) (ceph_osd_metadata{job=~\"$job\", cluster=~\"$cluster\"})", + "expr": "count by (device_class) (ceph_osd_metadata{job=~\"$job\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{device_class}}", @@ -476,14 +476,14 @@ "pieType": "pie", "targets": [ { - "expr": "count(ceph_bluefs_wal_total_bytes{job=~\"$job\", cluster=~\"$cluster\"})", + "expr": "count(ceph_bluefs_wal_total_bytes{job=~\"$job\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "bluestore", "refId": "A" }, { - "expr": "absent(ceph_bluefs_wal_total_bytesjob=~\"$job\", cluster=~\"$cluster\") * count(ceph_osd_metadata{job=~\"$job\", cluster=~\"$cluster\"})", + "expr": "absent(ceph_bluefs_wal_total_bytesjob=~\"$job\") * count(ceph_osd_metadata{job=~\"$job\"})", "format": "time_series", "intervalFactor": 2, "legendFormat": "filestore", @@ -514,63 +514,63 @@ "pieType": "pie", "targets": [ { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} < 1099511627776)", + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} < 1099511627776)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<1TB", "refId": "A" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 1099511627776 < 2199023255552)", + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 1099511627776 < 2199023255552)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<2TB", "refId": "B" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 2199023255552 < 3298534883328)", + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 2199023255552 < 3298534883328)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<3TB", "refId": "C" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 3298534883328 < 4398046511104)", + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 3298534883328 < 4398046511104)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<4TB", "refId": "D" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 4398046511104 < 6597069766656)", + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 4398046511104 < 6597069766656)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<6TB", "refId": "E" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 6597069766656 < 8796093022208)", + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 6597069766656 < 8796093022208)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<8TB", "refId": "F" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 8796093022208 < 10995116277760)", + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 8796093022208 < 10995116277760)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<10TB", "refId": "G" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 10995116277760 < 13194139533312)", + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 10995116277760 < 13194139533312)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<12TB", "refId": "H" }, { - "expr": "count(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"} >= 13194139533312)", + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 13194139533312)", "format": "time_series", "intervalFactor": 2, "legendFormat": "<12TB+", @@ -623,7 +623,7 @@ "steppedLine": false, "targets": [ { - "expr": "ceph_osd_numpg{job=~\"$job\", cluster=~\"$cluster\"}", + "expr": "ceph_osd_numpg{job=~\"$job\"}", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -728,7 +728,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(ceph_bluestore_onode_hits{job=~\"$job\", cluster=~\"$cluster\"}) / (\n sum(ceph_bluestore_onode_hits{job=~\"$job\", cluster=~\"$cluster\"}) +\n sum(ceph_bluestore_onode_misses{job=~\"$job\", cluster=~\"$cluster\"})\n)\n", + "expr": "sum(ceph_bluestore_onode_hits{job=~\"$job\"}) / (\n sum(ceph_bluestore_onode_hits{job=~\"$job\"}) +\n sum(ceph_bluestore_onode_misses{job=~\"$job\"})\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "", @@ -810,14 +810,14 @@ "steppedLine": false, "targets": [ { - "expr": "round(sum(rate(ceph_pool_rd{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])))", + "expr": "round(sum(rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Reads", "refId": "A" }, { - "expr": "round(sum(rate(ceph_pool_wr{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])))", + "expr": "round(sum(rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Writes", @@ -914,7 +914,7 @@ "multi": true, "name": "job", "options": [ ], - "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)", + "query": "label_values(ceph_osd_metadata{}, job)", "refresh": 1, "regex": "(.*)", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/pool-detail.json b/monitoring/ceph-mixin/dashboards_out/pool-detail.json index 216c09ed014e3..3af2ce50ea92e 100644 --- a/monitoring/ceph-mixin/dashboards_out/pool-detail.json +++ b/monitoring/ceph-mixin/dashboards_out/pool-detail.json @@ -104,7 +104,7 @@ "tableColumn": "", "targets": [ { - "expr": "(ceph_pool_stored{job=~\"$job\", cluster=~\"$cluster\"} / (ceph_pool_stored{job=~\"$job\", cluster=~\"$cluster\"} + ceph_pool_max_avail{job=~\"$job\", cluster=~\"$cluster\"})) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"}\n", + "expr": "(ceph_pool_stored{job=~\"$job\"} / (ceph_pool_stored{job=~\"$job\"} + ceph_pool_max_avail{job=~\"$job\"})) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "", @@ -186,7 +186,7 @@ "tableColumn": "", "targets": [ { - "expr": "(ceph_pool_max_avail{job=~\"$job\", cluster=~\"$cluster\"} / deriv(ceph_pool_stored{job=~\"$job\", cluster=~\"$cluster\"}[6h])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"} > 0\n", + "expr": "(ceph_pool_max_avail{job=~\"$job\"} / deriv(ceph_pool_stored{job=~\"$job\"}[6h])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"} > 0\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "", @@ -252,7 +252,7 @@ "steppedLine": false, "targets": [ { - "expr": "deriv(ceph_pool_objects{job=~\"$job\", cluster=~\"$cluster\"}[1m]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"}\n", + "expr": "deriv(ceph_pool_objects{job=~\"$job\"}[1m]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Objects per second", @@ -346,14 +346,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_pool_rd{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"}\n", + "expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "reads", "refId": "A" }, { - "expr": "rate(ceph_pool_wr{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"}\n", + "expr": "rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "writes", @@ -447,14 +447,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"}\n", + "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "reads", "refId": "A" }, { - "expr": "rate(ceph_pool_wr_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"}\n", + "expr": "rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "writes", @@ -543,7 +543,7 @@ "steppedLine": false, "targets": [ { - "expr": "ceph_pool_objects{job=~\"$job\", cluster=~\"$cluster\"} *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", name=~\"$pool_name\"}\n", + "expr": "ceph_pool_objects{job=~\"$job\"} *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Number of Objects", @@ -640,7 +640,7 @@ "multi": true, "name": "job", "options": [ ], - "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)", + "query": "label_values(ceph_osd_metadata{}, job)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -660,7 +660,7 @@ "multi": false, "name": "pool_name", "options": [ ], - "query": "label_values(ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}, name)", + "query": "label_values(ceph_pool_metadata{job=~\"$job\"}, name)", "refresh": 1, "regex": "", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/pool-overview.json b/monitoring/ceph-mixin/dashboards_out/pool-overview.json index bc2a49fd9cb0f..952f7d0bd4de5 100644 --- a/monitoring/ceph-mixin/dashboards_out/pool-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/pool-overview.json @@ -85,7 +85,7 @@ "tableColumn": "", "targets": [ { - "expr": "count(ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"})", + "expr": "count(ceph_pool_metadata{job=~\"$job\"})", "format": "table", "instant": true, "intervalFactor": 1, @@ -168,7 +168,7 @@ "tableColumn": "", "targets": [ { - "expr": "count(ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", compression_mode!=\"none\"})", + "expr": "count(ceph_pool_metadata{job=~\"$job\", compression_mode!=\"none\"})", "format": "", "intervalFactor": 1, "legendFormat": "", @@ -250,7 +250,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(ceph_osd_stat_bytes{job=~\"$job\", cluster=~\"$cluster\"})", + "expr": "sum(ceph_osd_stat_bytes{job=~\"$job\"})", "format": "", "intervalFactor": 1, "legendFormat": "", @@ -332,7 +332,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(ceph_pool_bytes_used{job=~\"$job\", cluster=~\"$cluster\"})", + "expr": "sum(ceph_pool_bytes_used{job=~\"$job\"})", "format": "", "instant": true, "intervalFactor": 1, @@ -415,7 +415,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(ceph_pool_stored{job=~\"$job\", cluster=~\"$cluster\"})", + "expr": "sum(ceph_pool_stored{job=~\"$job\"})", "format": "", "instant": true, "intervalFactor": 1, @@ -498,7 +498,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\", cluster=~\"$cluster\"}\n)\n", + "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\"}\n)\n", "format": "", "intervalFactor": 1, "legendFormat": "", @@ -580,7 +580,7 @@ "tableColumn": "", "targets": [ { - "expr": "(\n sum(ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} > 0) /\n sum(ceph_pool_stored_raw{job=~\"$job\", cluster=~\"$cluster\"} and ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} > 0)\n) * 100\n", + "expr": "(\n sum(ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n sum(ceph_pool_stored_raw{job=~\"$job\"} and ceph_pool_compress_under_bytes{job=~\"$job\"} > 0)\n) * 100\n", "format": "table", "intervalFactor": 1, "legendFormat": "", @@ -662,7 +662,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} > 0)\n / sum(ceph_pool_compress_bytes_used{job=~\"$job\", cluster=~\"$cluster\"} > 0\n)\n", + "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\"} > 0)\n / sum(ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n)\n", "format": "", "intervalFactor": 1, "legendFormat": "", @@ -1053,7 +1053,7 @@ ], "targets": [ { - "expr": "(\n ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} /\n ceph_pool_compress_bytes_used{job=~\"$job\", cluster=~\"$cluster\"} > 0\n) and on(pool_id) (\n (\n (ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\", cluster=~\"$cluster\"}\n ) * 100 > 0.5\n)\n", + "expr": "(\n ceph_pool_compress_under_bytes{job=~\"$job\"} /\n ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n) and on(pool_id) (\n (\n (ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\"}\n ) * 100 > 0.5\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1061,7 +1061,7 @@ "refId": "A" }, { - "expr": "ceph_pool_max_avail{job=~\"$job\", cluster=~\"$cluster\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}\n", + "expr": "ceph_pool_max_avail{job=~\"$job\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"}\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1069,7 +1069,7 @@ "refId": "B" }, { - "expr": "(\n (ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\", cluster=~\"$cluster\"}\n) * 100\n", + "expr": "(\n (ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\"}\n) * 100\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1077,7 +1077,7 @@ "refId": "C" }, { - "expr": "ceph_pool_percent_used{job=~\"$job\", cluster=~\"$cluster\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}\n", + "expr": "ceph_pool_percent_used{job=~\"$job\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"}\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1085,7 +1085,7 @@ "refId": "D" }, { - "expr": "ceph_pool_compress_under_bytes{job=~\"$job\", cluster=~\"$cluster\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\", cluster=~\"$cluster\"} > 0\n", + "expr": "ceph_pool_compress_under_bytes{job=~\"$job\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1093,7 +1093,7 @@ "refId": "E" }, { - "expr": "delta(ceph_pool_stored{job=~\"$job\", cluster=~\"$cluster\"}[5d])", + "expr": "delta(ceph_pool_stored{job=~\"$job\"}[5d])", "format": "table", "instant": true, "intervalFactor": 1, @@ -1101,7 +1101,7 @@ "refId": "F" }, { - "expr": "rate(ceph_pool_rd{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n + rate(ceph_pool_wr{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n", + "expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval])\n + rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1109,7 +1109,7 @@ "refId": "G" }, { - "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n", + "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval])\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -1117,7 +1117,7 @@ "refId": "H" }, { - "expr": "ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}", + "expr": "ceph_pool_metadata{job=~\"$job\"}", "format": "table", "instant": true, "intervalFactor": 1, @@ -1125,7 +1125,7 @@ "refId": "I" }, { - "expr": "ceph_pool_stored{job=~\"$job\", cluster=~\"$cluster\"} * on(pool_id) group_left ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}", + "expr": "ceph_pool_stored{job=~\"$job\"} * on(pool_id) group_left ceph_pool_metadata{job=~\"$job\"}", "format": "table", "instant": true, "intervalFactor": 1, @@ -1133,7 +1133,7 @@ "refId": "J" }, { - "expr": "ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\", compression_mode!=\"none\"}", + "expr": "ceph_pool_metadata{job=~\"$job\", compression_mode!=\"none\"}", "format": "table", "instant": true, "intervalFactor": 1, @@ -1197,14 +1197,14 @@ "steppedLine": false, "targets": [ { - "expr": "topk($topk,\n round(\n (\n rate(ceph_pool_rd{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n rate(ceph_pool_wr{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n ), 1\n ) * on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"})\n", + "expr": "topk($topk,\n round(\n (\n rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])\n ), 1\n ) * on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"})\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{name}} ", "refId": "A" }, { - "expr": "topk($topk,\n rate(ceph_pool_wr{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}\n)\n", + "expr": "topk($topk,\n rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"}\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{name}} - write", @@ -1290,7 +1290,7 @@ "steppedLine": false, "targets": [ { - "expr": "topk($topk,\n (\n rate(ceph_pool_rd_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n ) * on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}\n)\n", + "expr": "topk($topk,\n (\n rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval])\n ) * on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\"}\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{name}}", @@ -1376,7 +1376,7 @@ "steppedLine": false, "targets": [ { - "expr": "ceph_pool_bytes_used{job=~\"$job\", cluster=~\"$cluster\"} * on(pool_id) group_right ceph_pool_metadata{job=~\"$job\", cluster=~\"$cluster\"}", + "expr": "ceph_pool_bytes_used{job=~\"$job\"} * on(pool_id) group_right ceph_pool_metadata{job=~\"$job\"}", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{name}}", @@ -1473,7 +1473,7 @@ "multi": true, "name": "job", "options": [ ], - "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)", + "query": "label_values(ceph_osd_metadata{}, job)", "refresh": 1, "regex": "(.*)", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json b/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json index ea45b685a1f49..c2b090f9045e4 100644 --- a/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json +++ b/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json @@ -105,14 +105,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (instance_id) (\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "sum by (instance_id) (\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "GET {{ceph_daemon}}", "refId": "A" }, { - "expr": "sum by (instance_id) (\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "sum by (instance_id) (\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUT {{ceph_daemon}}", @@ -198,14 +198,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rgw_get_b{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "GETs {{ceph_daemon}}", "refId": "A" }, { - "expr": "rate(ceph_rgw_put_b{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUTs {{ceph_daemon}}", @@ -297,28 +297,28 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rgw_failed_req{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\",ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\",ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Requests Failed {{ceph_daemon}}", "refId": "A" }, { - "expr": "rate(ceph_rgw_get{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "GETs {{ceph_daemon}}", "refId": "B" }, { - "expr": "rate(ceph_rgw_put{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUTs {{ceph_daemon}}", "refId": "C" }, { - "expr": "(\n rate(ceph_rgw_req{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Other {{ceph_daemon}}", @@ -387,28 +387,28 @@ "pieType": "pie", "targets": [ { - "expr": "rate(ceph_rgw_failed_req{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Failures {{ceph_daemon}}", "refId": "A" }, { - "expr": "rate(ceph_rgw_get{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "GETs {{ceph_daemon}}", "refId": "B" }, { - "expr": "rate(ceph_rgw_put{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUTs {{ceph_daemon}}", "refId": "C" }, { - "expr": "(\n rate(ceph_rgw_req{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\", ceph_daemon=~\"$rgw_servers\"}\n", + "expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}", @@ -474,7 +474,7 @@ "multi": true, "name": "job", "options": [ ], - "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)", + "query": "label_values(ceph_osd_metadata{}, job)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -494,7 +494,7 @@ "multi": false, "name": "rgw_servers", "options": [ ], - "query": "label_values(ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)", + "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", "refresh": 1, "regex": "", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json b/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json index 7ba8861b47e7e..aa24a5161ee5f 100644 --- a/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json @@ -99,14 +99,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"}\n", + "expr": "rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "GET AVG", "refId": "A" }, { - "expr": "rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"}\n", + "expr": "rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"}\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUT AVG", @@ -192,7 +192,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n", + "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{rgw_host}}", @@ -278,7 +278,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{rgw_host}}", @@ -364,14 +364,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(ceph_rgw_get_b{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))", + "expr": "sum(rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "GETs", "refId": "A" }, { - "expr": "sum(rate(ceph_rgw_put_b{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))", + "expr": "sum(rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "PUTs", @@ -457,7 +457,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n rate(ceph_rgw_put_b{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{rgw_host}}", @@ -543,7 +543,7 @@ "steppedLine": false, "targets": [ { - "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{rgw_host}}", @@ -1138,7 +1138,7 @@ "multi": true, "name": "job", "options": [ ], - "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)", + "query": "label_values(ceph_osd_metadata{}, job)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -1158,7 +1158,7 @@ "multi": false, "name": "rgw_servers", "options": [ ], - "query": "label_values(ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)", + "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", "refresh": 1, "regex": "RGW Server", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/radosgw-sync-overview.json b/monitoring/ceph-mixin/dashboards_out/radosgw-sync-overview.json index 8cbab9a0c15d4..b6dd774cf81b3 100644 --- a/monitoring/ceph-mixin/dashboards_out/radosgw-sync-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/radosgw-sync-overview.json @@ -80,7 +80,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))", + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum{job=~\"$job\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{source_zone}}", @@ -166,7 +166,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))", + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count{job=~\"$job\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{source_zone}}", @@ -252,7 +252,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))", + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum{job=~\"$job\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{source_zone}}", @@ -338,7 +338,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))", + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors{job=~\"$job\"}[$__rate_interval]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{source_zone}}", @@ -436,7 +436,7 @@ "multi": true, "name": "job", "options": [ ], - "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)", + "query": "label_values(ceph_osd_metadata{}, job)", "refresh": 1, "regex": "(.*)", "sort": 1, @@ -456,7 +456,7 @@ "multi": false, "name": "rgw_servers", "options": [ ], - "query": "label_values(ceph_rgw_metadata{job=~\"$job\", cluster=~\"$cluster\"}, ceph_daemon)", + "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", "refresh": 1, "regex": "RGW Server", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/rbd-details.json b/monitoring/ceph-mixin/dashboards_out/rbd-details.json index cb25f8ec89b32..2477e30501283 100644 --- a/monitoring/ceph-mixin/dashboards_out/rbd-details.json +++ b/monitoring/ceph-mixin/dashboards_out/rbd-details.json @@ -80,14 +80,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rbd_write_ops{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "expr": "rate(ceph_rbd_write_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pool}} Write", "refId": "A" }, { - "expr": "rate(ceph_rbd_read_ops{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "expr": "rate(ceph_rbd_read_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pool}} Read", @@ -173,14 +173,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rbd_write_bytes{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "expr": "rate(ceph_rbd_write_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pool}} Write", "refId": "A" }, { - "expr": "rate(ceph_rbd_read_bytes{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "expr": "rate(ceph_rbd_read_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pool}} Read", @@ -266,14 +266,14 @@ "steppedLine": false, "targets": [ { - "expr": "rate(ceph_rbd_write_latency_sum{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_write_latency_count{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n", + "expr": "rate(ceph_rbd_write_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_write_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pool}} Write", "refId": "A" }, { - "expr": "rate(ceph_rbd_read_latency_sum{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_read_latency_count{job=~\"$job\", cluster=~\"$cluster\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n", + "expr": "rate(ceph_rbd_read_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_read_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{pool}} Read", @@ -370,7 +370,7 @@ "multi": true, "name": "job", "options": [ ], - "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)", + "query": "label_values(ceph_osd_metadata{}, job)", "refresh": 1, "regex": "(.*)", "sort": 1, diff --git a/monitoring/ceph-mixin/dashboards_out/rbd-overview.json b/monitoring/ceph-mixin/dashboards_out/rbd-overview.json index 10facbee58cce..5b0b726ff52e1 100644 --- a/monitoring/ceph-mixin/dashboards_out/rbd-overview.json +++ b/monitoring/ceph-mixin/dashboards_out/rbd-overview.json @@ -92,14 +92,14 @@ "steppedLine": false, "targets": [ { - "expr": "round(sum(rate(ceph_rbd_write_ops{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])))", + "expr": "round(sum(rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Writes", "refId": "A" }, { - "expr": "round(sum(rate(ceph_rbd_read_ops{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])))", + "expr": "round(sum(rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Reads", @@ -185,14 +185,14 @@ "steppedLine": false, "targets": [ { - "expr": "round(sum(rate(ceph_rbd_write_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])))", + "expr": "round(sum(rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Write", "refId": "A" }, { - "expr": "round(sum(rate(ceph_rbd_read_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])))", + "expr": "round(sum(rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval])))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Read", @@ -278,14 +278,14 @@ "steppedLine": false, "targets": [ { - "expr": "round(\n sum(rate(ceph_rbd_write_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_write_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))\n)\n", + "expr": "round(\n sum(rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Write", "refId": "A" }, { - "expr": "round(\n sum(rate(ceph_rbd_read_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_read_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]))\n)\n", + "expr": "round(\n sum(rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "Read", @@ -416,7 +416,7 @@ ], "targets": [ { - "expr": "topk(10,\n (\n sort((\n rate(ceph_rbd_write_ops{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n on (image, pool, namespace) rate(ceph_rbd_read_ops{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n ))\n )\n)\n", + "expr": "topk(10,\n (\n sort((\n rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval]) +\n on (image, pool, namespace) rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])\n ))\n )\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -518,7 +518,7 @@ ], "targets": [ { - "expr": "topk(10,\n sort(\n sum(\n rate(ceph_rbd_read_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) +\n rate(ceph_rbd_write_bytes{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval])\n ) by (pool, image, namespace)\n )\n)\n", + "expr": "topk(10,\n sort(\n sum(\n rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])\n ) by (pool, image, namespace)\n )\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -620,7 +620,7 @@ ], "targets": [ { - "expr": "topk(10,\n sum(\n rate(ceph_rbd_write_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_write_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]), 1) +\n rate(ceph_rbd_read_latency_sum{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_read_latency_count{job=~\"$job\", cluster=~\"$cluster\"}[$__rate_interval]), 1)\n ) by (pool, image, namespace)\n)\n", + "expr": "topk(10,\n sum(\n rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]), 1) +\n rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]), 1)\n ) by (pool, image, namespace)\n)\n", "format": "table", "instant": true, "intervalFactor": 1, @@ -689,7 +689,7 @@ "multi": true, "name": "job", "options": [ ], - "query": "label_values(ceph_osd_metadata{cluster=~\"$cluster\"}, job)", + "query": "label_values(ceph_osd_metadata{}, job)", "refresh": 1, "regex": "(.*)", "sort": 1, -- 2.39.5