]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
monitoring: remove instance label from ceph-cluster.json completely 42299/head
authorPatrick Seidensal <pseidensal@suse.com>
Tue, 15 Jun 2021 12:43:50 +0000 (14:43 +0200)
committerLaura Paduano <lpaduano@suse.com>
Tue, 13 Jul 2021 08:40:41 +0000 (10:40 +0200)
The `instance` label is only useful if

- the exporter returns only data about its node or instance
- the exporter provides an instance label and then may return data about
  other nodes

In this case, it's about the Prometheus mgr module, which is a single
exporter providing data about a whole cluster, so not only data related
to the node (or instance) the mgr module is running on.  It is
completely irrelevant on which node the exporter runs on, the data
provided doesn't change.  The exporter also doesn't provide `instance`
labels (which Prometheus wouldn't change due to our configuration, see
"honor_labels" setting).

(Actually there's one exception where `instance` labels are provided by
the Ceph mgr module, but that doesn't affect the Ceph Cluster
dashboard.)

Note that keeping that instance label on this particular dashboard would
enable the user to switch between a previously failed mgr instance and
the data collected from there and the currently running mgr instance (on
which the Prometheus mgr module runs on).  That'd split the data, which
I don't think is a useful feature, but rather looks broken.

Fixes: https://tracker.ceph.com/issues/51212
Signed-off-by: Patrick Seidensal <pseidensal@suse.com>
(cherry picked from commit 037410713f032c0a2a25243e411ae67dffcc1d1a)

monitoring/grafana/dashboards/ceph-cluster.json

index 5603b064af680d2f08ddb4f36dd1786a7896cb99..61a425d09f2eb7187a1461c7495b58ae6124d493 100644 (file)
       "tableColumn": "",
       "targets": [
         {
-          "expr": "ceph_health_status{instance=~'$instance'}",
+          "expr": "ceph_health_status",
           "format": "time_series",
           "instant": true,
           "interval": "$interval",
           "displayAliasType": "Always",
           "displayType": "Regular",
           "displayValueWithAlias": "When Alias Displayed",
-          "expr": "count(ceph_osd_metadata{instance=~\"$instance\"})",
+          "expr": "count(ceph_osd_metadata)",
           "format": "time_series",
           "intervalFactor": 1,
           "legendFormat": "All",
           "displayAliasType": "Always",
           "displayType": "Regular",
           "displayValueWithAlias": "When Alias Displayed",
-          "expr": "sum(ceph_osds_in{instance=~\"$instance\"})",
+          "expr": "sum(ceph_osds_in)",
           "format": "time_series",
           "intervalFactor": 1,
           "legendFormat": "In",
           "displayAliasType": "Warning / Critical",
           "displayType": "Regular",
           "displayValueWithAlias": "When Alias Displayed",
-          "expr": "sum(ceph_osd_in{instance=~\"$instance\"} == bool 0)",
+          "expr": "sum(ceph_osd_in == bool 0)",
           "format": "time_series",
           "interval": "",
           "intervalFactor": 1,
           "displayAliasType": "Always",
           "displayType": "Regular",
           "displayValueWithAlias": "When Alias Displayed",
-          "expr": "sum(ceph_osd_up{instance=~\"$instance\"})",
+          "expr": "sum(ceph_osd_up)",
           "format": "time_series",
           "intervalFactor": 1,
           "legendFormat": "Up",
           "displayAliasType": "Warning / Critical",
           "displayType": "Regular",
           "displayValueWithAlias": "When Alias Displayed",
-          "expr": "sum(ceph_osd_up{instance=~\"$instance\"} == bool 0)",
+          "expr": "sum(ceph_osd_up == bool 0)",
           "format": "time_series",
           "intervalFactor": 1,
           "legendFormat": "Down",
       "tableColumn": "",
       "targets": [
         {
-          "expr": "sum(ceph_osd_stat_bytes_used{instance=~\"$instance\"})/sum(ceph_osd_stat_bytes{instance=~\"$instance\"})",
+          "expr": "sum(ceph_osd_stat_bytes_used)/sum(ceph_osd_stat_bytes)",
           "format": "time_series",
           "intervalFactor": 1,
           "legendFormat": "Used",
       "steppedLine": false,
       "targets": [
         {
-          "expr": "quantile(0.95, ceph_osd_apply_latency_ms{instance=~\"$instance\"})",
+          "expr": "quantile(0.95, ceph_osd_apply_latency_ms)",
           "format": "time_series",
           "intervalFactor": 1,
           "legendFormat": "Apply Latency P_95",
           "refId": "A"
         },
         {
-          "expr": "quantile(0.95, ceph_osd_commit_latency_ms{instance=~\"$instance\"})",
+          "expr": "quantile(0.95, ceph_osd_commit_latency_ms)",
           "format": "time_series",
           "intervalFactor": 1,
           "legendFormat": "Commit Latency P_95",
           "refId": "B"
         },
         {
-          "expr": "avg(ceph_osd_apply_latency_ms{instance=~\"$instance\"})",
+          "expr": "avg(ceph_osd_apply_latency_ms)",
           "format": "time_series",
           "intervalFactor": 1,
           "legendFormat": "Avg Apply Latency",
           "refId": "C"
         },
         {
-          "expr": "avg(ceph_osd_commit_latency_ms{instance=~\"$instance\"})",
+          "expr": "avg(ceph_osd_commit_latency_ms)",
           "format": "time_series",
           "intervalFactor": 1,
           "legendFormat": "Avg Commit Latency",
           "displayAliasType": "Always",
           "displayType": "Regular",
           "displayValueWithAlias": "When Alias Displayed",
-          "expr": "sum(ceph_mon_quorum_status{instance=~\"$instance\"})",
+          "expr": "sum(ceph_mon_quorum_status)",
           "format": "time_series",
           "interval": "",
           "intervalFactor": 1,
           "displayAliasType": "Always",
           "displayType": "Regular",
           "displayValueWithAlias": "When Alias Displayed",
-          "expr": "count(ceph_mon_quorum_status{instance=~\"$instance\"})",
+          "expr": "count(ceph_mon_quorum_status)",
           "format": "time_series",
           "intervalFactor": 1,
           "legendFormat": "Total",
           "displayAliasType": "Warning / Critical",
           "displayType": "Annotation",
           "displayValueWithAlias": "Never",
-          "expr": "count(ceph_mon_quorum_status{instance=~\"$instance\"}) / sum(ceph_mon_quorum_status{instance=~\"$instance\"})",
+          "expr": "count(ceph_mon_quorum_status) / sum(ceph_mon_quorum_status)",
           "format": "time_series",
           "intervalFactor": 1,
           "legendFormat": "MONs out of Quorum",
           "displayAliasType": "Always",
           "displayType": "Regular",
           "displayValueWithAlias": "When Alias Displayed",
-          "expr": "ceph_mds_server_handle_client_session{instance=~\"$instance\"}",
+          "expr": "ceph_mds_server_handle_client_session",
           "format": "time_series",
           "intervalFactor": 1,
           "legendFormat": "Clients",
       "steppedLine": false,
       "targets": [
         {
-          "expr": "sum(irate(ceph_osd_op_w_in_bytes{instance=~\"$instance\"}[1m]))",
+          "expr": "sum(irate(ceph_osd_op_w_in_bytes[1m]))",
           "format": "time_series",
           "intervalFactor": 1,
           "legendFormat": "Writes",
           "refId": "A"
         },
         {
-          "expr": "sum(irate(ceph_osd_op_r_out_bytes{instance=~\"$instance\"}[1m]))",
+          "expr": "sum(irate(ceph_osd_op_r_out_bytes[1m]))",
           "format": "time_series",
           "intervalFactor": 1,
           "legendFormat": "Reads",
       "steppedLine": false,
       "targets": [
         {
-          "expr": "sum(deriv(ceph_pool_stored{instance=~\"$instance\"}[1m]))",
+          "expr": "sum(deriv(ceph_pool_stored[1m]))",
           "format": "time_series",
           "intervalFactor": 1,
           "refId": "A"
       "span": 12,
       "targets": [
         {
-          "expr": "ceph_osd_stat_bytes_used{instance=~'$instance'} / ceph_osd_stat_bytes{instance=~'$instance'}",
+          "expr": "ceph_osd_stat_bytes_used / ceph_osd_stat_bytes",
           "format": "time_series",
           "interval": "1m",
           "intervalFactor": 1,
       "links": [],
       "targets": [
         {
-          "expr": "ceph_osd_numpg{instance=~\"$instance\"}",
+          "expr": "ceph_osd_numpg",
           "format": "time_series",
           "intervalFactor": 1,
           "legendFormat": "#PGs",
         "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
         "refresh": 2,
         "type": "interval"
-      },
-      {
-        "allFormat": "glob",
-        "allValue": null,
-        "current": {},
-        "datasource": "$datasource",
-        "hide": 0,
-        "hideLabel": false,
-        "includeAll": true,
-        "label": "Exporter Instance",
-        "multi": false,
-        "multiFormat": "glob",
-        "name": "instance",
-        "options": [],
-        "query": "label_values(ceph_health_status, instance)",
-        "refresh": 1,
-        "regex": "",
-        "sort": 0,
-        "tagValuesQuery": "",
-        "tags": [],
-        "tagsQuery": "",
-        "type": "query",
-        "useTags": false
       }
     ]
   },