From: Boris Ranto Date: Tue, 19 Nov 2019 22:17:47 +0000 (+0100) Subject: dashboards: Ignore wal_device label X-Git-Tag: v2.0.9^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F249%2Fhead;p=cephmetrics.git dashboards: Ignore wal_device label The wal_device label was added to ceph_disk_occupation. We need to ignore it in these queries to provide proper matching between values. Otherwise, the query won't return any data. This is backwards-compatible, if you ignore a non-existing label, nothing will change. Signed-off-by: Boris Ranto --- diff --git a/dashboards/mgr-prometheus/ceph-at-a-glance.json b/dashboards/mgr-prometheus/ceph-at-a-glance.json index bbf59c4..37edab6 100644 --- a/dashboards/mgr-prometheus/ceph-at-a-glance.json +++ b/dashboards/mgr-prometheus/ceph-at-a-glance.json @@ -2599,7 +2599,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(\n sum(\n rate(\n node_disk_reads_completed[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation\n)\n+\nsum(\n sum(\n rate(\n node_disk_writes_completed[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation\n)", + "expr": "sum(\n sum(\n rate(\n node_disk_reads_completed[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job,wal_device) group_right(instance) ceph_disk_occupation\n)\n+\nsum(\n sum(\n rate(\n node_disk_writes_completed[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job,wal_device) group_right(instance) ceph_disk_occupation\n)", "format": "time_series", "groupBy": [], "hide": false, @@ -2708,7 +2708,7 @@ "tableColumn": "", "targets": [ { - "expr": "# should only include OSD hosts\nsum(\n sum(\n rate(\n node_disk_bytes_read[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation\n)\n+\nsum(\n sum(\n rate(\n node_disk_bytes_written[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation\n)", + "expr": "# should only include OSD hosts\nsum(\n sum(\n rate(\n node_disk_bytes_read[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job,wal_device) group_right(instance) ceph_disk_occupation\n)\n+\nsum(\n sum(\n rate(\n node_disk_bytes_written[$__interval]\n )\n ) by (instance,device)\n + ignoring(ceph_daemon,job,wal_device) group_right(instance) ceph_disk_occupation\n)", "format": "time_series", "groupBy": [], "hide": false, @@ -2934,21 +2934,21 @@ "tableColumn": "", "targets": [ { - "expr": "quantile(\n 5 / 100,\n (irate(node_disk_read_time_ms[1m]) + irate(node_disk_write_time_ms[1m]) / \n (irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed[1m])) \n +\n ignoring(ceph_daemon,job) ceph_disk_occupation))", + "expr": "quantile(\n 5 / 100,\n (irate(node_disk_read_time_ms[1m]) + irate(node_disk_write_time_ms[1m]) / \n (irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed[1m])) \n +\n ignoring(ceph_daemon,job,wal_device) ceph_disk_occupation))", "format": "time_series", "hide": false, "intervalFactor": 1, "refId": "C" }, { - "expr": "max(\n irate(node_disk_read_time_ms[30s]) / irate(node_disk_reads_completed[30s])\n +\n irate(node_disk_write_time_ms[30s]) / irate(node_disk_writes_completed[30s])\n + ignoring(ceph_daemon,job) ceph_disk_occupation\n)", + "expr": "max(\n irate(node_disk_read_time_ms[30s]) / irate(node_disk_reads_completed[30s])\n +\n irate(node_disk_write_time_ms[30s]) / irate(node_disk_writes_completed[30s])\n + ignoring(ceph_daemon,job,wal_device) ceph_disk_occupation\n)", "format": "time_series", "hide": true, "intervalFactor": 2, "refId": "B" }, { - "expr": "sum(\n (irate(node_disk_read_time_ms[1m]) + irate(node_disk_write_time_ms[1m]) / \n (irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed[1m])) \n +\n ignoring(ceph_daemon,job) ceph_disk_occupation)) / count(ceph_osd_up)", + "expr": "sum(\n (irate(node_disk_read_time_ms[1m]) + irate(node_disk_write_time_ms[1m]) / \n (irate(node_disk_reads_completed[1m]) + irate(node_disk_writes_completed[1m])) \n +\n ignoring(ceph_daemon,job,wal_device) ceph_disk_occupation)) / count(ceph_osd_up)", "format": "time_series", "hide": true, "intervalFactor": 1, diff --git a/dashboards/mgr-prometheus/iops-by-server.json b/dashboards/mgr-prometheus/iops-by-server.json index b4da67b..eb6db6b 100644 --- a/dashboards/mgr-prometheus/iops-by-server.json +++ b/dashboards/mgr-prometheus/iops-by-server.json @@ -93,7 +93,7 @@ "refId": "B" }, { - "expr": "sum(\n sum(\n irate(node_disk_reads_completed{job=\"node\" }[1m]) + \n irate(node_disk_writes_completed{job=\"node\"}[1m]))\n by(instance, device) + ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation)\n by(instance)", + "expr": "sum(\n sum(\n irate(node_disk_reads_completed{job=\"node\" }[1m]) + \n irate(node_disk_writes_completed{job=\"node\"}[1m]))\n by(instance, device) + ignoring(ceph_daemon,job,wal_device) group_right(instance) ceph_disk_occupation)\n by(instance)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}", @@ -181,7 +181,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(\n sum(\n irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[1m]) + irate(node_disk_writes_completed[1m]))\n by(instance,device) +\n ignoring(ceph_daemon,job) group_right(instance) ceph_disk_occupation)\n \n \n", + "expr": "sum(\n sum(\n irate(node_disk_reads_completed{instance=~\"[[osd_servers]].*\"}[1m]) + irate(node_disk_writes_completed[1m]))\n by(instance,device) +\n ignoring(ceph_daemon,job,wal_device) group_right(instance) ceph_disk_occupation)\n \n \n", "format": "time_series", "intervalFactor": 2, "refId": "A" diff --git a/dashboards/mgr-prometheus/latency-by-server.json b/dashboards/mgr-prometheus/latency-by-server.json index 7e9510f..61260e5 100644 --- a/dashboards/mgr-prometheus/latency-by-server.json +++ b/dashboards/mgr-prometheus/latency-by-server.json @@ -90,7 +90,7 @@ "steppedLine": false, "targets": [ { - "expr": "max(\n (irate(node_disk_read_time_ms[30s]) + irate(node_disk_write_time_ms[30s]) / \n (irate(node_disk_reads_completed[30s]) + irate(node_disk_writes_completed[30s])) +\n ignoring(ceph_daemon,job) ceph_disk_occupation))\n by(instance)", + "expr": "max(\n (irate(node_disk_read_time_ms[30s]) + irate(node_disk_write_time_ms[30s]) / \n (irate(node_disk_reads_completed[30s]) + irate(node_disk_writes_completed[30s])) +\n ignoring(ceph_daemon,job,wal_device) ceph_disk_occupation))\n by(instance)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}",