]> git-server-git.apps.pok.os.sepia.ceph.com Git - cephmetrics.git/commitdiff
Multiple fixes to OSD information dashboard 204/head
authorPaul Cuzner <pcuzner@redhat.com>
Tue, 10 Jul 2018 23:41:30 +0000 (11:41 +1200)
committerPaul Cuzner <pcuzner@redhat.com>
Tue, 10 Jul 2018 23:41:30 +0000 (11:41 +1200)
Bluestore tables and charts updated, including;
- switched units from ms to secs which shows us too
- changed metric from commit to KV latency
- updated thresholds in bluestore tables
- switched from rate to irate for bluestore metrics
- updated bluestore text box description

Signed-off-by: Paul Cuzner <pcuzner@redhat.com>
dashboards/mgr-prometheus/ceph-osd-information.json

index f26899541cba3b9c6a34b39b82f5204f0ba24d1b..b0cb210d7a6b5a0296ddc84b6c4cc554f5558756 100644 (file)
@@ -1,33 +1,33 @@
 {
   "__requires": [
     {
+      "type": "grafana",
       "id": "grafana",
       "name": "Grafana",
-      "type": "grafana",
       "version": "5.0.0"
     },
     {
+      "type": "panel",
       "id": "grafana-piechart-panel",
       "name": "Pie Chart",
-      "type": "panel",
       "version": "1.3.3"
     },
     {
-      "id": "prometheus",
-      "name": "Prometheus",
       "type": "datasource",
+      "id": "prometheus",
+      "name": "Local",
       "version": "5.0.0"
     },
     {
+      "type": "panel",
       "id": "singlestat",
       "name": "Singlestat",
-      "type": "panel",
       "version": "5.0.0"
     },
     {
+      "type": "panel",
       "id": "table",
       "name": "Table",
-      "type": "panel",
       "version": "5.0.0"
     }
   ],
@@ -48,7 +48,7 @@
   "gnetId": null,
   "graphTooltip": 0,
   "id": null,
-  "iteration": 1530217097188,
+  "iteration": 1531263612973,
   "links": [
     {
       "asDropdown": true,
       "id": 26,
       "panels": [
         {
-          "content": "<h1>Ceph Bluestore I/O Process</h1>\n<p style=\"text-align: justify;\">\nUnlike filestore, bluestore does not suffer from a double-write penalty (i.e write to journal then write to HDD). With bluestore, once a write is scheduled (<b>submit</b> and <b>throttle</b> latencies), it is done directly to the disk (<b>AIO wait</b>), and then the metadata relating to the object is changed (<b>kv_commit</b>). Writes are not considered complete until the kv store is updated. <p> The tables on the right focus on  the top 10 Bluestore OSDs with the highest latencies.\n",
+          "content": "<h1>Ceph Bluestore I/O Process</h1>\n<p style=\"text-align: justify;\">\nUnlike filestore, bluestore does not suffer from a double-write penalty (i.e write to journal then write to HDD). With bluestore, once a write is scheduled (<b>submit</b> and <b>throttle</b> latencies), it is done directly to the disk (<b>AIO wait</b>), and then the metadata relating to the object is changed (<b>kv_latency</b>). Writes are not considered complete until the kv store is updated. <p> The tables on the right focus on  the top 10 Bluestore OSDs with the highest latencies.\n",
           "gridPos": {
             "h": 8,
             "w": 6,
               "decimals": 0,
               "pattern": "osd_num",
               "thresholds": [],
-              "type": "number",
-              "unit": "short"
+              "type": "string",
+              "unit": "s"
             },
             {
               "alias": "Submit Latency",
                 ".003"
               ],
               "type": "number",
-              "unit": "ms"
+              "unit": "s"
             },
             {
               "alias": "",
           ],
           "targets": [
             {
-              "expr": "label_replace(\n  (\n    topk($max_devices,\n      rate(ceph_bluestore_submit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n      (rate(ceph_bluestore_submit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n    )\n  ),\n  \"osd_num\",\n  \"$1\",\n  \"ceph_daemon\",\n  \"osd.(.*)\"\n)",
+              "expr": "label_replace(\n  (\n    topk($max_devices,\n      irate(ceph_bluestore_submit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n      (irate(ceph_bluestore_submit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n    )\n  ),\n  \"osd_num\",\n  \"$1\",\n  \"ceph_daemon\",\n  \"osd.(.*)\"\n)",
               "format": "table",
               "instant": true,
               "intervalFactor": 2,
               "decimals": 0,
               "pattern": "osd_num",
               "thresholds": [],
-              "type": "number",
+              "type": "string",
               "unit": "short"
             },
             {
               "decimals": 2,
               "pattern": "Value",
               "thresholds": [
-                ".002",
-                ".005"
+                ".001",
+                ".003"
               ],
               "type": "number",
-              "unit": "ms"
+              "unit": "s"
             },
             {
               "alias": "",
           ],
           "targets": [
             {
-              "expr": "label_replace(\n  (\n    topk($max_devices,\n      rate(ceph_bluestore_throttle_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n      (rate(ceph_bluestore_throttle_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n    )\n  ),\n  \"osd_num\",\n  \"$1\",\n  \"ceph_daemon\",\n  \"osd.(.*)\"\n)",
+              "expr": "label_replace(\n  (\n    topk($max_devices,\n      irate(ceph_bluestore_throttle_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n      (irate(ceph_bluestore_throttle_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n    )\n  ),\n  \"osd_num\",\n  \"$1\",\n  \"ceph_daemon\",\n  \"osd.(.*)\"\n)",
               "format": "table",
               "instant": true,
               "intervalFactor": 2,
               "decimals": 0,
               "pattern": "osd_num",
               "thresholds": [],
-              "type": "number",
+              "type": "string",
               "unit": "short"
             },
             {
                 ".050"
               ],
               "type": "number",
-              "unit": "ms"
+              "unit": "s"
             },
             {
               "alias": "",
           ],
           "targets": [
             {
-              "expr": "label_replace(\n  (\n    topk($max_devices,\n      rate(ceph_bluestore_state_aio_wait_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n      (rate(ceph_bluestore_state_aio_wait_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n    )\n  ),\n  \"osd_num\",\n  \"$1\",\n  \"ceph_daemon\",\n  \"osd.(.*)\"\n)",
+              "expr": "label_replace(\n  (\n    topk($max_devices,\n      irate(ceph_bluestore_state_aio_wait_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n      (irate(ceph_bluestore_state_aio_wait_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n    )\n  ),\n  \"osd_num\",\n  \"$1\",\n  \"ceph_daemon\",\n  \"osd.(.*)\"\n)",
               "format": "table",
               "instant": true,
               "intervalFactor": 2,
               "decimals": 2,
               "pattern": "osd_num",
               "thresholds": [],
-              "type": "number",
+              "type": "string",
               "unit": "short"
             },
             {
-              "alias": "KV Commit Latency",
+              "alias": "KV Latency",
               "colorMode": "row",
               "colors": [
                 "rgba(50, 172, 45, 0.97)",
               "decimals": 2,
               "pattern": "Value",
               "thresholds": [
-                ".003",
-                ".005"
+                ".020",
+                ".050"
               ],
               "type": "number",
-              "unit": "ms"
+              "unit": "s"
             },
             {
               "alias": "",
           ],
           "targets": [
             {
-              "expr": "label_replace(\n  (\n    topk($max_devices,\n      rate(ceph_bluestore_commit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n      (rate(ceph_bluestore_commit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n    )\n  ),\n  \"osd_num\",\n  \"$1\",\n  \"ceph_daemon\",\n  \"osd.(.*)\"\n)",
+              "expr": "label_replace(\n  (\n    topk($max_devices,\n      irate(ceph_bluestore_kv_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n      (irate(ceph_bluestore_kv_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n    )\n  ),\n  \"osd_num\",\n  \"$1\",\n  \"ceph_daemon\",\n  \"osd.(.*)\"\n)",
               "format": "table",
               "instant": true,
               "intervalFactor": 2,
           "steppedLine": false,
           "targets": [
             {
-              "expr": "avg(\n  rate(ceph_bluestore_submit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n  (rate(ceph_bluestore_submit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
+              "expr": "avg(\n  irate(ceph_bluestore_submit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n  (irate(ceph_bluestore_submit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
               "format": "time_series",
               "hide": false,
               "intervalFactor": 2,
               "textEditor": true
             },
             {
-              "expr": "avg(\n  rate(ceph_bluestore_throttle_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n  (rate(ceph_bluestore_throttle_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
+              "expr": "avg(\n  irate(ceph_bluestore_throttle_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n  (irate(ceph_bluestore_throttle_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
               "format": "time_series",
               "hide": false,
               "intervalFactor": 2,
               "textEditor": true
             },
             {
-              "expr": "avg(\n  rate(ceph_bluestore_state_aio_wait_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n  (rate(ceph_bluestore_state_aio_wait_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
+              "expr": "avg(\n  irate(ceph_bluestore_state_aio_wait_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n  (irate(ceph_bluestore_state_aio_wait_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
               "format": "time_series",
               "hide": false,
               "intervalFactor": 2,
               "textEditor": true
             },
             {
-              "expr": "avg(\n    rate(ceph_bluestore_commit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n    (rate(ceph_bluestore_commit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
+              "expr": "avg(\n    irate(ceph_bluestore_kv_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n    (irate(ceph_bluestore_kv_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
               "format": "time_series",
               "hide": false,
               "intervalFactor": 2,
-              "legendFormat": "KV Commit",
+              "legendFormat": "KV Latency",
               "refId": "D",
               "textEditor": true
             }
           },
           "yaxes": [
             {
-              "format": "ms",
+              "format": "s",
               "label": null,
               "logBase": 1,
               "max": null,
           "steppedLine": false,
           "targets": [
             {
-              "expr": "quantile(\n  $percentile/100,\n    rate(ceph_bluestore_submit_lat_sum[$__interval]) / \n    (rate(ceph_bluestore_submit_lat_count[$__interval]) != 0)\n)",
+              "expr": "quantile(\n  $percentile/100,\n    irate(ceph_bluestore_submit_lat_sum[$__interval]) / \n    (irate(ceph_bluestore_submit_lat_count[$__interval]) != 0)\n)",
               "format": "time_series",
               "intervalFactor": 2,
               "legendFormat": "Submit",
               "textEditor": true
             },
             {
-              "expr": "quantile(\n  $percentile/100,\n    rate(ceph_bluestore_throttle_lat_sum[$__interval]) / \n    (rate(ceph_bluestore_throttle_lat_count[$__interval]) != 0)\n)",
+              "expr": "quantile(\n  $percentile/100,\n    irate(ceph_bluestore_throttle_lat_sum[$__interval]) / \n    (irate(ceph_bluestore_throttle_lat_count[$__interval]) != 0)\n)",
               "format": "time_series",
               "intervalFactor": 2,
               "legendFormat": "Throttle",
               "textEditor": true
             },
             {
-              "expr": "quantile(\n  $percentile/100,\n    rate(ceph_bluestore_state_aio_wait_lat_sum[$__interval]) / \n    (rate(ceph_bluestore_state_aio_wait_lat_count[$__interval]) != 0)\n)",
+              "expr": "quantile(\n  $percentile/100,\n    irate(ceph_bluestore_state_aio_wait_lat_sum[$__interval]) / \n    (irate(ceph_bluestore_state_aio_wait_lat_count[$__interval]) != 0)\n)",
               "format": "time_series",
               "intervalFactor": 2,
               "legendFormat": "AIO Wait",
               "textEditor": true
             },
             {
-              "expr": "quantile(\n  $percentile/100,\n    rate(ceph_bluestore_commit_lat_sum[$__interval]) / \n    (rate(ceph_bluestore_commit_lat_count[$__interval]) != 0)\n)",
+              "expr": "quantile(\n  $percentile/100,\n    irate(ceph_bluestore_kv_lat_sum[$__interval]) / \n    (irate(ceph_bluestore_kv_lat_count[$__interval]) != 0)\n)",
               "format": "time_series",
               "intervalFactor": 2,
-              "legendFormat": "KV Commit",
+              "legendFormat": "KV Latency",
               "refId": "D",
               "textEditor": true
             }
           },
           "yaxes": [
             {
-              "format": "ms",
+              "format": "s",
               "label": null,
               "logBase": 1,
               "max": null,
   },
   "timezone": "browser",
   "title": "Ceph OSD Information",
-  "version": 28
+  "version": 31
 }