From: Paul Cuzner Date: Wed, 2 Aug 2017 02:13:00 +0000 (+1200) Subject: ceph-osd-information : osd dashboard now provides summary and performance data X-Git-Tag: v1.0~36^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=30550554930dddefc2559f397ac192d8fde628ca;p=cephmetrics.git ceph-osd-information : osd dashboard now provides summary and performance data Summary row shows; - osd count - osd up count - osd's down - disk size summary (pie chart showing what sizes of disk are in the cluster - table of osd to disk size - OSD encryption summary (how many of my OSDs are encrypted?) - OSD type status (how many OSDs are filestore vs bluestore Panel includes an OSD id which is used as a filter for the filestore performance row The performance row now shows average OSD performance for a single OSD or all OSDs. This can then be used for side-by-side comparison with OSD performance across the cluster at the 95%ile. --- diff --git a/dashboards/current/ceph-osd-information.json b/dashboards/current/ceph-osd-information.json new file mode 100644 index 0000000..ba06609 --- /dev/null +++ b/dashboards/current/ceph-osd-information.json @@ -0,0 +1,1330 @@ +{ + "dashboard": { + "annotations": { + "list": [] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": true, + "id": 61, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "tags": [ + "overview" + ], + "title": "Shortcuts", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "220px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd), \"max\"))", + "textEditor": true + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Local", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 12, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "refId": "A", + "target": "keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_up),\"max\"))", + "textEditor": true + } + ], + "thresholds": "", + "title": "OSDs UP", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(251,251,251,0.97)", + "rgba(255,165,0, 1)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Local", + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 16, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 1, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 1, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "hide": true, + "refId": "A", + "target": "keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_up),\"max\"))", + "textEditor": true + }, + { + "hide": true, + "refId": "B", + "target": "keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd), \"max\"))", + "textEditor": true + }, + { + "refId": "C", + "target": "diffSeries(#B,#A)", + "targetFull": "diffSeries(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd), \"max\")),keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.mon.num_osd_up),\"max\")))", + "textEditor": true + } + ], + "thresholds": "1,3", + "title": "OSDs DOWN", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "Local", + "description": "The pie chart shows the varios disk sizes used within the cluster", + "fontSize": "80%", + "format": "none", + "height": "220", + "id": 13, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "sortDesc": true, + "values": true + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": "1", + "minSpan": 3, + "nullPointMode": "connected", + "pieType": "pie", + "span": 3, + "strokeWidth": "1", + "targets": [ + { + "hide": true, + "refId": "A", + "target": "keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6)", + "textEditor": true + }, + { + "hide": true, + "refId": "B", + "target": "alias(currentBelow(#A,1099511627776),\"<1TB\")", + "targetFull": "alias(currentBelow(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6),1099511627776),\"<1TB\")", + "textEditor": true + }, + { + "hide": true, + "refId": "C", + "target": "alias(currentBelow(currentAbove(#A,1099511627776),2199023255552),\"2TB\")", + "targetFull": "alias(currentBelow(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6),1099511627776),2199023255552),\"2TB\")", + "textEditor": true + }, + { + "hide": true, + "refId": "D", + "target": "alias(currentBelow(currentAbove(#A,2199023255552),4398046511104),\"4TB\")", + "targetFull": "alias(currentBelow(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6),2199023255552),4398046511104),\"4TB\")", + "textEditor": true + }, + { + "hide": true, + "refId": "E", + "target": "alias(currentBelow(currentAbove(#A,4398046511104),6597069766656),\"6TB\")", + "targetFull": "alias(currentBelow(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6),4398046511104),6597069766656),\"6TB\")", + "textEditor": true + }, + { + "hide": true, + "refId": "F", + "target": "alias(currentBelow(currentAbove(#A,6597069766656),8796093022208),\"8TB\")", + "targetFull": "alias(currentBelow(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6),6597069766656),8796093022208),\"8TB\")", + "textEditor": true + }, + { + "hide": true, + "refId": "G", + "target": "alias(currentBelow(currentAbove(#A,8796093022208),10995116277760),\"10TB\")", + "targetFull": "alias(currentBelow(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6),8796093022208),10995116277760),\"10TB\")", + "textEditor": true + }, + { + "hide": true, + "refId": "H", + "target": "alias(currentBelow(currentAbove(#A,10995116277760),13194139533312),\"12TB\")", + "targetFull": "alias(currentBelow(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6),10995116277760),13194139533312),\"12TB\")", + "textEditor": true + }, + { + "refId": "I", + "target": "alias(countSeries(#B), \"<1TB\")", + "targetFull": "alias(countSeries(alias(currentBelow(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6),1099511627776),\"<1TB\")), \"<1TB\")", + "textEditor": true + }, + { + "refId": "J", + "target": "alias(countSeries(#C), \"2TB\")", + "targetFull": "alias(countSeries(alias(currentBelow(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6),1099511627776),2199023255552),\"2TB\")), \"2TB\")", + "textEditor": true + }, + { + "refId": "K", + "target": "alias(countSeries(#D), \"4TB\")", + "targetFull": "alias(countSeries(alias(currentBelow(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6),2199023255552),4398046511104),\"4TB\")), \"4TB\")", + "textEditor": true + }, + { + "refId": "L", + "target": "alias(countSeries(#E), \"6TB\")", + "targetFull": "alias(countSeries(alias(currentBelow(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6),4398046511104),6597069766656),\"6TB\")), \"6TB\")", + "textEditor": true + }, + { + "refId": "M", + "target": "alias(countSeries(#F), \"8TB\")", + "targetFull": "alias(countSeries(alias(currentBelow(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6),6597069766656),8796093022208),\"8TB\")), \"8TB\")", + "textEditor": true + }, + { + "refId": "N", + "target": "alias(countSeries(#G), \"10TB\")", + "targetFull": "alias(countSeries(alias(currentBelow(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6),8796093022208),10995116277760),\"10TB\")), \"10TB\")", + "textEditor": true + }, + { + "refId": "O", + "target": "alias(countSeries(#H), \"12TB\")", + "targetFull": "alias(countSeries(alias(currentBelow(currentAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.stat_bytes, 6),10995116277760),13194139533312),\"12TB\")), \"12TB\")", + "textEditor": true + } + ], + "title": "OSD Disk Size Summary", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "fontSize": "100%", + "id": 18, + "links": [], + "maxDataPoints": "1", + "minSpan": 2, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": false + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Disk Size", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Current", + "thresholds": [], + "type": "number", + "unit": "decbytes" + }, + { + "alias": "Hostname.OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.stat_bytes),1,-2)", + "textEditor": true + } + ], + "title": "OSD Size", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "aliasColors": { + "Non-Encrypted": "#E5AC0E" + }, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": null, + "fontSize": "80%", + "format": "none", + "height": "200px", + "id": 19, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": "1", + "minSpan": 2, + "nullPointMode": "connected", + "pieType": "pie", + "span": 2, + "strokeWidth": 1, + "targets": [ + { + "refId": "A", + "target": "alias(countSeries(maximumAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.encrypted),0.5)),\"Encrypted\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(countSeries(maximumBelow(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.encrypted),0.5)),\"Non-Encrypted\")", + "textEditor": true + } + ], + "title": "OSD Encryption Summary", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + "Non-Encrypted": "#E5AC0E" + }, + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": null, + "fontSize": "80%", + "format": "none", + "height": "200px", + "id": 20, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": "1", + "minSpan": 2, + "nullPointMode": "connected", + "pieType": "pie", + "span": 2, + "strokeWidth": 1, + "targets": [ + { + "refId": "A", + "target": "alias(countSeries(maximumAbove(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.osd_type),0.5)),\"Bluestore\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(countSeries(maximumBelow(keepLastValue(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.osd_type),0.5)),\"Filestore\")", + "textEditor": true + } + ], + "title": "Summary of OSD Types", + "type": "grafana-piechart-panel", + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Summary", + "titleSize": "h5" + }, + { + "collapse": true, + "height": "", + "panels": [ + { + "content": "

Ceph Filestore I/O Process

\n

\nA write request is issued to a 'primary' OSD and committed to a journal using direct-io (apply). Once this write is complete, the data is persisted to HDD by a second 'buffered' write operation (commit). The commit operation is basically a measure of time taken to perform a syncfs call to flush dirty pages to disk, and is therefore not a time associated with any specific client initiated operation.

The tables on the right show the top 10 OSDs with the highest latencies.\n", + "height": "300", + "id": 10, + "links": [], + "minSpan": 4, + "mode": "html", + "span": 4, + "title": "", + "type": "text" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "Time spent in the queue for the journal. Excessive times here may indicate OSD tthrottling is happening. In this scenario you should review the OSD specific settings in \"ceph.conf\"; filestore_queue_max_ops or filestore_queue_max_bytes", + "fontSize": "100%", + "height": "300", + "id": 3, + "links": [], + "minSpan": 2, + "pageSize": 6, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Journal Queue Time", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + ".001", + ".003" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.queue_transaction_latency_avg,$max_devices),-2)", + "textEditor": true + } + ], + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "Time taken for the write request to be safely committed to the journal device", + "fontSize": "100%", + "height": "300", + "id": 4, + "links": [], + "minSpan": 2, + "pageSize": 6, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Journal Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + "0.01", + "0.1" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.journal_latency,$max_devices),-2)", + "textEditor": true + } + ], + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "Apply latency covers the time taken to commit to the journal and complete the transaction", + "fontSize": "100%", + "height": "300", + "id": 5, + "links": [], + "minSpan": 2, + "pageSize": 6, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Apply Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + "100", + "500" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.apply_latency,$max_devices),-2)", + "textEditor": true + } + ], + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "columns": [ + { + "text": "Current", + "value": "current" + } + ], + "description": "Commit latency is the time taken for writes to be flushed to disk as part of async kernel activity", + "fontSize": "100%", + "height": "300", + "id": 6, + "links": [], + "minSpan": 2, + "pageSize": 6, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "span": 2, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Commit Latency", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Current", + "thresholds": [ + "1", + "3" + ], + "type": "number", + "unit": "s" + }, + { + "alias": "OSD Id", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "Metric", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "refId": "A", + "target": "aliasByNode(limit(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.commitcycle_latency,$max_devices),-2)", + "textEditor": true + } + ], + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + }, + { + "aliasColors": { + "95%ile Commit Latency": "#447EBC", + "Apply Latency Max": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 0, + "height": "300px", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxDataPoints": "", + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Apply Latency Max", + "fill": 0 + }, + { + "alias": "95%ile Apply Latency", + "fill": 2 + } + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(averageSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.queue_transaction_latency_avg),\"Journal queue avg\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(averageSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.journal_latency),\"Journal latency avg\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(averageSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.apply_latency), \"Apply latency avg\")", + "textEditor": true + }, + { + "refId": "D", + "target": "alias(averageSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.$osd_id.commitcycle_latency),\"Commit latency avg\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Latency Averages for OSD Id - '$osd_id'", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Local", + "fill": 1, + "height": "300px", + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "minSpan": 6, + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "refId": "A", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.queue_transaction_latency_avg,$percentile), \"journal Queue time\")", + "textEditor": true + }, + { + "refId": "B", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.journal_latency,$percentile), \"journal Latency\")", + "textEditor": true + }, + { + "refId": "C", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.apply_latency,$percentile), \"apply Latency\")", + "textEditor": true + }, + { + "refId": "D", + "target": "alias(percentileOfSeries(collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*.commitcycle_latency,$percentile), \"commit/flush Latency\")", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IO Summary across all OSD's @ $percentile%ile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Filestore OSD Latencies", + "titleSize": "h5" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "overview" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "domain", + "options": [ + { + "selected": true, + "text": "storage.lab", + "value": "storage.lab" + } + ], + "query": "storage.lab", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "ceph", + "value": "ceph" + }, + "datasource": "Local", + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "cluster_name", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.*", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "95", + "value": "95" + }, + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "percentile", + "options": [ + { + "selected": true, + "text": "95", + "value": "95" + }, + { + "selected": false, + "text": "96", + "value": "96" + }, + { + "selected": false, + "text": "97", + "value": "97" + }, + { + "selected": false, + "text": "98", + "value": "98" + }, + { + "selected": false, + "text": "99", + "value": "99" + } + ], + "query": "95,96,97,98,99", + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "10", + "value": "10" + }, + "hide": 2, + "includeAll": false, + "label": null, + "multi": false, + "name": "max_devices", + "options": [ + { + "selected": true, + "text": "10", + "value": "10" + } + ], + "query": "10", + "type": "custom" + }, + { + "allValue": null, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "Local", + "hide": 0, + "includeAll": true, + "label": "OSD Id", + "multi": false, + "name": "osd_id", + "options": [], + "query": "collectd.*.$domain.cephmetrics.gauge.$cluster_name.osd.*", + "refresh": 1, + "regex": "/^\\d+$/", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Ceph OSD Information", + "version": 3 + }, + "meta": { + "canEdit": true, + "canSave": true, + "canStar": true, + "created": "2017-08-01T20:50:51Z", + "createdBy": "admin@localhost", + "expires": "0001-01-01T00:00:00Z", + "slug": "ceph-osd-information", + "type": "db", + "updated": "2017-08-01T21:01:21Z", + "updatedBy": "admin@localhost", + "version": 3 + } +} \ No newline at end of file