{
"__requires": [
{
+ "type": "grafana",
"id": "grafana",
"name": "Grafana",
- "type": "grafana",
"version": "5.0.0"
},
{
+ "type": "panel",
"id": "grafana-piechart-panel",
"name": "Pie Chart",
- "type": "panel",
"version": "1.3.3"
},
{
- "id": "prometheus",
- "name": "Prometheus",
"type": "datasource",
+ "id": "prometheus",
+ "name": "Local",
"version": "5.0.0"
},
{
+ "type": "panel",
"id": "singlestat",
"name": "Singlestat",
- "type": "panel",
"version": "5.0.0"
},
{
+ "type": "panel",
"id": "table",
"name": "Table",
- "type": "panel",
"version": "5.0.0"
}
],
"gnetId": null,
"graphTooltip": 0,
"id": null,
- "iteration": 1530217097188,
+ "iteration": 1531263612973,
"links": [
{
"asDropdown": true,
"id": 26,
"panels": [
{
- "content": "<h1>Ceph Bluestore I/O Process</h1>\n<p style=\"text-align: justify;\">\nUnlike filestore, bluestore does not suffer from a double-write penalty (i.e write to journal then write to HDD). With bluestore, once a write is scheduled (<b>submit</b> and <b>throttle</b> latencies), it is done directly to the disk (<b>AIO wait</b>), and then the metadata relating to the object is changed (<b>kv_commit</b>). Writes are not considered complete until the kv store is updated. <p> The tables on the right focus on the top 10 Bluestore OSDs with the highest latencies.\n",
+ "content": "<h1>Ceph Bluestore I/O Process</h1>\n<p style=\"text-align: justify;\">\nUnlike filestore, bluestore does not suffer from a double-write penalty (i.e write to journal then write to HDD). With bluestore, once a write is scheduled (<b>submit</b> and <b>throttle</b> latencies), it is done directly to the disk (<b>AIO wait</b>), and then the metadata relating to the object is changed (<b>kv_latency</b>). Writes are not considered complete until the kv store is updated. <p> The tables on the right focus on the top 10 Bluestore OSDs with the highest latencies.\n",
"gridPos": {
"h": 8,
"w": 6,
"decimals": 0,
"pattern": "osd_num",
"thresholds": [],
- "type": "number",
- "unit": "short"
+ "type": "string",
+ "unit": "s"
},
{
"alias": "Submit Latency",
".003"
],
"type": "number",
- "unit": "ms"
+ "unit": "s"
},
{
"alias": "",
],
"targets": [
{
- "expr": "label_replace(\n (\n topk($max_devices,\n rate(ceph_bluestore_submit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (rate(ceph_bluestore_submit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)",
+ "expr": "label_replace(\n (\n topk($max_devices,\n irate(ceph_bluestore_submit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_submit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"decimals": 0,
"pattern": "osd_num",
"thresholds": [],
- "type": "number",
+ "type": "string",
"unit": "short"
},
{
"decimals": 2,
"pattern": "Value",
"thresholds": [
- ".002",
- ".005"
+ ".001",
+ ".003"
],
"type": "number",
- "unit": "ms"
+ "unit": "s"
},
{
"alias": "",
],
"targets": [
{
- "expr": "label_replace(\n (\n topk($max_devices,\n rate(ceph_bluestore_throttle_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (rate(ceph_bluestore_throttle_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)",
+ "expr": "label_replace(\n (\n topk($max_devices,\n irate(ceph_bluestore_throttle_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_throttle_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"decimals": 0,
"pattern": "osd_num",
"thresholds": [],
- "type": "number",
+ "type": "string",
"unit": "short"
},
{
".050"
],
"type": "number",
- "unit": "ms"
+ "unit": "s"
},
{
"alias": "",
],
"targets": [
{
- "expr": "label_replace(\n (\n topk($max_devices,\n rate(ceph_bluestore_state_aio_wait_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (rate(ceph_bluestore_state_aio_wait_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)",
+ "expr": "label_replace(\n (\n topk($max_devices,\n irate(ceph_bluestore_state_aio_wait_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_state_aio_wait_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"decimals": 2,
"pattern": "osd_num",
"thresholds": [],
- "type": "number",
+ "type": "string",
"unit": "short"
},
{
- "alias": "KV Commit Latency",
+ "alias": "KV Latency",
"colorMode": "row",
"colors": [
"rgba(50, 172, 45, 0.97)",
"decimals": 2,
"pattern": "Value",
"thresholds": [
- ".003",
- ".005"
+ ".020",
+ ".050"
],
"type": "number",
- "unit": "ms"
+ "unit": "s"
},
{
"alias": "",
],
"targets": [
{
- "expr": "label_replace(\n (\n topk($max_devices,\n rate(ceph_bluestore_commit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (rate(ceph_bluestore_commit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)",
+ "expr": "label_replace(\n (\n topk($max_devices,\n irate(ceph_bluestore_kv_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_kv_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n )\n ),\n \"osd_num\",\n \"$1\",\n \"ceph_daemon\",\n \"osd.(.*)\"\n)",
"format": "table",
"instant": true,
"intervalFactor": 2,
"steppedLine": false,
"targets": [
{
- "expr": "avg(\n rate(ceph_bluestore_submit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (rate(ceph_bluestore_submit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
+ "expr": "avg(\n irate(ceph_bluestore_submit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_submit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
"format": "time_series",
"hide": false,
"intervalFactor": 2,
"textEditor": true
},
{
- "expr": "avg(\n rate(ceph_bluestore_throttle_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (rate(ceph_bluestore_throttle_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
+ "expr": "avg(\n irate(ceph_bluestore_throttle_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_throttle_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
"format": "time_series",
"hide": false,
"intervalFactor": 2,
"textEditor": true
},
{
- "expr": "avg(\n rate(ceph_bluestore_state_aio_wait_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (rate(ceph_bluestore_state_aio_wait_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
+ "expr": "avg(\n irate(ceph_bluestore_state_aio_wait_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_state_aio_wait_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
"format": "time_series",
"hide": false,
"intervalFactor": 2,
"textEditor": true
},
{
- "expr": "avg(\n rate(ceph_bluestore_commit_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (rate(ceph_bluestore_commit_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
+ "expr": "avg(\n irate(ceph_bluestore_kv_lat_sum{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) / \n (irate(ceph_bluestore_kv_lat_count{ceph_daemon=~\"osd.[[osd_id]]\"}[$__interval]) != 0)\n)",
"format": "time_series",
"hide": false,
"intervalFactor": 2,
- "legendFormat": "KV Commit",
+ "legendFormat": "KV Latency",
"refId": "D",
"textEditor": true
}
},
"yaxes": [
{
- "format": "ms",
+ "format": "s",
"label": null,
"logBase": 1,
"max": null,
"steppedLine": false,
"targets": [
{
- "expr": "quantile(\n $percentile/100,\n rate(ceph_bluestore_submit_lat_sum[$__interval]) / \n (rate(ceph_bluestore_submit_lat_count[$__interval]) != 0)\n)",
+ "expr": "quantile(\n $percentile/100,\n irate(ceph_bluestore_submit_lat_sum[$__interval]) / \n (irate(ceph_bluestore_submit_lat_count[$__interval]) != 0)\n)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Submit",
"textEditor": true
},
{
- "expr": "quantile(\n $percentile/100,\n rate(ceph_bluestore_throttle_lat_sum[$__interval]) / \n (rate(ceph_bluestore_throttle_lat_count[$__interval]) != 0)\n)",
+ "expr": "quantile(\n $percentile/100,\n irate(ceph_bluestore_throttle_lat_sum[$__interval]) / \n (irate(ceph_bluestore_throttle_lat_count[$__interval]) != 0)\n)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Throttle",
"textEditor": true
},
{
- "expr": "quantile(\n $percentile/100,\n rate(ceph_bluestore_state_aio_wait_lat_sum[$__interval]) / \n (rate(ceph_bluestore_state_aio_wait_lat_count[$__interval]) != 0)\n)",
+ "expr": "quantile(\n $percentile/100,\n irate(ceph_bluestore_state_aio_wait_lat_sum[$__interval]) / \n (irate(ceph_bluestore_state_aio_wait_lat_count[$__interval]) != 0)\n)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "AIO Wait",
"textEditor": true
},
{
- "expr": "quantile(\n $percentile/100,\n rate(ceph_bluestore_commit_lat_sum[$__interval]) / \n (rate(ceph_bluestore_commit_lat_count[$__interval]) != 0)\n)",
+ "expr": "quantile(\n $percentile/100,\n irate(ceph_bluestore_kv_lat_sum[$__interval]) / \n (irate(ceph_bluestore_kv_lat_count[$__interval]) != 0)\n)",
"format": "time_series",
"intervalFactor": 2,
- "legendFormat": "KV Commit",
+ "legendFormat": "KV Latency",
"refId": "D",
"textEditor": true
}
},
"yaxes": [
{
- "format": "ms",
+ "format": "s",
"label": null,
"logBase": 1,
"max": null,
},
"timezone": "browser",
"title": "Ceph OSD Information",
- "version": 28
+ "version": 31
}