From: Patrick Seidensal Date: Tue, 15 Jun 2021 12:43:50 +0000 (+0200) Subject: monitoring: remove instance label from ceph-cluster.json completely X-Git-Tag: v16.2.6~120^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F42299%2Fhead;p=ceph.git monitoring: remove instance label from ceph-cluster.json completely The `instance` label is only useful if - the exporter returns only data about its node or instance - the exporter provides an instance label and then may return data about other nodes In this case, it's about the Prometheus mgr module, which is a single exporter providing data about a whole cluster, so not only data related to the node (or instance) the mgr module is running on. It is completely irrelevant on which node the exporter runs on, the data provided doesn't change. The exporter also doesn't provide `instance` labels (which Prometheus wouldn't change due to our configuration, see "honor_labels" setting). (Actually there's one exception where `instance` labels are provided by the Ceph mgr module, but that doesn't affect the Ceph Cluster dashboard.) Note that keeping that instance label on this particular dashboard would enable the user to switch between a previously failed mgr instance and the data collected from there and the currently running mgr instance (on which the Prometheus mgr module runs on). That'd split the data, which I don't think is a useful feature, but rather looks broken. Fixes: https://tracker.ceph.com/issues/51212 Signed-off-by: Patrick Seidensal (cherry picked from commit 037410713f032c0a2a25243e411ae67dffcc1d1a) --- diff --git a/monitoring/grafana/dashboards/ceph-cluster.json b/monitoring/grafana/dashboards/ceph-cluster.json index 5603b064af68..61a425d09f2e 100644 --- a/monitoring/grafana/dashboards/ceph-cluster.json +++ b/monitoring/grafana/dashboards/ceph-cluster.json @@ -107,7 +107,7 @@ "tableColumn": "", "targets": [ { - "expr": "ceph_health_status{instance=~'$instance'}", + "expr": "ceph_health_status", "format": "time_series", "instant": true, "interval": "$interval", @@ -175,7 +175,7 @@ "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "count(ceph_osd_metadata{instance=~\"$instance\"})", + "expr": "count(ceph_osd_metadata)", "format": "time_series", "intervalFactor": 1, "legendFormat": "All", @@ -190,7 +190,7 @@ "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "sum(ceph_osds_in{instance=~\"$instance\"})", + "expr": "sum(ceph_osds_in)", "format": "time_series", "intervalFactor": 1, "legendFormat": "In", @@ -205,7 +205,7 @@ "displayAliasType": "Warning / Critical", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "sum(ceph_osd_in{instance=~\"$instance\"} == bool 0)", + "expr": "sum(ceph_osd_in == bool 0)", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -222,7 +222,7 @@ "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "sum(ceph_osd_up{instance=~\"$instance\"})", + "expr": "sum(ceph_osd_up)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Up", @@ -238,7 +238,7 @@ "displayAliasType": "Warning / Critical", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "sum(ceph_osd_up{instance=~\"$instance\"} == bool 0)", + "expr": "sum(ceph_osd_up == bool 0)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Down", @@ -313,7 +313,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(ceph_osd_stat_bytes_used{instance=~\"$instance\"})/sum(ceph_osd_stat_bytes{instance=~\"$instance\"})", + "expr": "sum(ceph_osd_stat_bytes_used)/sum(ceph_osd_stat_bytes)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Used", @@ -531,28 +531,28 @@ "steppedLine": false, "targets": [ { - "expr": "quantile(0.95, ceph_osd_apply_latency_ms{instance=~\"$instance\"})", + "expr": "quantile(0.95, ceph_osd_apply_latency_ms)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Apply Latency P_95", "refId": "A" }, { - "expr": "quantile(0.95, ceph_osd_commit_latency_ms{instance=~\"$instance\"})", + "expr": "quantile(0.95, ceph_osd_commit_latency_ms)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Commit Latency P_95", "refId": "B" }, { - "expr": "avg(ceph_osd_apply_latency_ms{instance=~\"$instance\"})", + "expr": "avg(ceph_osd_apply_latency_ms)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Avg Apply Latency", "refId": "C" }, { - "expr": "avg(ceph_osd_commit_latency_ms{instance=~\"$instance\"})", + "expr": "avg(ceph_osd_commit_latency_ms)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Avg Commit Latency", @@ -630,7 +630,7 @@ "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "sum(ceph_mon_quorum_status{instance=~\"$instance\"})", + "expr": "sum(ceph_mon_quorum_status)", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -647,7 +647,7 @@ "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "count(ceph_mon_quorum_status{instance=~\"$instance\"})", + "expr": "count(ceph_mon_quorum_status)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total", @@ -664,7 +664,7 @@ "displayAliasType": "Warning / Critical", "displayType": "Annotation", "displayValueWithAlias": "Never", - "expr": "count(ceph_mon_quorum_status{instance=~\"$instance\"}) / sum(ceph_mon_quorum_status{instance=~\"$instance\"})", + "expr": "count(ceph_mon_quorum_status) / sum(ceph_mon_quorum_status)", "format": "time_series", "intervalFactor": 1, "legendFormat": "MONs out of Quorum", @@ -711,7 +711,7 @@ "displayAliasType": "Always", "displayType": "Regular", "displayValueWithAlias": "When Alias Displayed", - "expr": "ceph_mds_server_handle_client_session{instance=~\"$instance\"}", + "expr": "ceph_mds_server_handle_client_session", "format": "time_series", "intervalFactor": 1, "legendFormat": "Clients", @@ -765,14 +765,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(irate(ceph_osd_op_w_in_bytes{instance=~\"$instance\"}[1m]))", + "expr": "sum(irate(ceph_osd_op_w_in_bytes[1m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Writes", "refId": "A" }, { - "expr": "sum(irate(ceph_osd_op_r_out_bytes{instance=~\"$instance\"}[1m]))", + "expr": "sum(irate(ceph_osd_op_r_out_bytes[1m]))", "format": "time_series", "intervalFactor": 1, "legendFormat": "Reads", @@ -852,7 +852,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(deriv(ceph_pool_stored{instance=~\"$instance\"}[1m]))", + "expr": "sum(deriv(ceph_pool_stored[1m]))", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -925,7 +925,7 @@ "span": 12, "targets": [ { - "expr": "ceph_osd_stat_bytes_used{instance=~'$instance'} / ceph_osd_stat_bytes{instance=~'$instance'}", + "expr": "ceph_osd_stat_bytes_used / ceph_osd_stat_bytes", "format": "time_series", "interval": "1m", "intervalFactor": 1, @@ -987,7 +987,7 @@ "links": [], "targets": [ { - "expr": "ceph_osd_numpg{instance=~\"$instance\"}", + "expr": "ceph_osd_numpg", "format": "time_series", "intervalFactor": 1, "legendFormat": "#PGs", @@ -1191,29 +1191,6 @@ "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", "refresh": 2, "type": "interval" - }, - { - "allFormat": "glob", - "allValue": null, - "current": {}, - "datasource": "$datasource", - "hide": 0, - "hideLabel": false, - "includeAll": true, - "label": "Exporter Instance", - "multi": false, - "multiFormat": "glob", - "name": "instance", - "options": [], - "query": "label_values(ceph_health_status, instance)", - "refresh": 1, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "type": "query", - "useTags": false } ] },