{
- "meta" : {
- "expires" : "0001-01-01T00:00:00Z",
- "canSave" : true,
- "created" : "2017-08-03T21:42:28Z",
- "canStar" : true,
- "slug" : "alert-status",
- "createdBy" : "admin",
- "canEdit" : true,
- "updated" : "2017-08-18T05:26:10Z",
- "version" : 15,
- "updatedBy" : "admin",
- "type" : "db"
- },
- "dashboard" : {
- "version" : 15,
- "timepicker" : {
- "time_options" : [
- "5m",
- "15m",
- "1h",
- "6h",
- "12h",
- "24h",
- "2d",
- "7d",
- "30d"
- ],
- "refresh_intervals" : [
- "5s",
- "10s",
- "30s",
- "1m",
- "5m",
- "15m",
- "30m",
- "1h",
- "2h",
- "1d"
- ]
- },
- "refresh" : "10s",
- "hideControls" : true,
- "id" : 24,
- "annotations" : {
- "list" : []
- },
- "style" : "dark",
- "gnetId" : null,
- "timezone" : "browser",
- "schemaVersion" : 14,
- "time" : {
- "from" : "now-1h",
- "to" : "now"
- },
- "rows" : [
+ "meta": {
+ "canSave": true,
+ "created": "2017-08-03T21:42:28Z",
+ "canStar": true,
+ "expires": "0001-01-01T00:00:00Z",
+ "updated": "2017-08-18T05:26:10Z",
+ "slug": "alert-status",
+ "version": 15,
+ "createdBy": "admin",
+ "updatedBy": "admin",
+ "type": "db",
+ "canEdit": true
+ },
+ "dashboard": {
+ "style": "dark",
+ "rows": [
{
- "panels" : [
+ "repeat": null,
+ "titleSize": "h6",
+ "collapse": false,
+ "title": "Dashboard Row",
+ "height": "250px",
+ "repeatRowId": null,
+ "panels": [
{
- "limit" : "20",
- "title" : "Active Ceph Alert List",
- "span" : 12,
- "id" : 1,
- "sortOrder" : 3,
- "onlyAlertsOnDashboard" : true,
- "links" : [],
- "show" : "current",
- "type" : "alertlist",
- "stateFilter" : [
+ "span": 12,
+ "stateFilter": [
"alerting"
- ]
+ ],
+ "links": [],
+ "show": "current",
+ "title": "Active Ceph Alert List",
+ "onlyAlertsOnDashboard": true,
+ "limit": "20",
+ "sortOrder": 3,
+ "type": "alertlist",
+ "id": 1
}
- ],
- "repeatIteration" : null,
- "repeat" : null,
- "showTitle" : false,
- "collapse" : false,
- "title" : "Dashboard Row",
- "repeatRowId" : null,
- "height" : "250px",
- "titleSize" : "h6"
- },
+ ],
+ "showTitle": false,
+ "repeatIteration": null
+ },
{
- "height" : 250,
- "titleSize" : "h5",
- "title" : "Health Checks",
- "collapse" : false,
- "repeatRowId" : null,
- "showTitle" : true,
- "panels" : [
+ "repeat": null,
+ "titleSize": "h5",
+ "collapse": false,
+ "title": "Health Checks",
+ "height": 250,
+ "repeatRowId": null,
+ "panels": [
{
- "legend" : {
- "min" : false,
- "values" : false,
- "current" : false,
- "show" : true,
- "total" : false,
- "avg" : false,
- "max" : false
- },
- "dashes" : false,
- "hideTimeOverride" : false,
- "percentage" : false,
- "maxDataPoints" : "360",
- "alert" : {
- "notifications" : [
- {
- "id" : 1
- }
- ],
- "name" : "Overall Ceph Health",
- "conditions" : [
- {
- "query" : {
- "params" : [
- "A",
- "20s",
- "now"
- ]
- },
- "reducer" : {
- "params" : [],
- "type" : "last"
- },
- "operator" : {
- "type" : "and"
- },
- "evaluator" : {
- "type" : "gt",
- "params" : [
- 0
- ]
- },
- "type" : "query"
- }
- ],
- "message" : "Cluster Health is not OK",
- "handler" : 1,
- "frequency" : "10s",
- "executionErrorState" : "keep_state",
- "noDataState" : "no_data"
- },
- "id" : 2,
- "points" : false,
- "spaceLength" : 10,
- "renderer" : "flot",
- "minSpan" : 2,
- "dashLength" : 10,
- "pointradius" : 5,
- "xaxis" : {
- "values" : [],
- "name" : null,
- "show" : true,
- "buckets" : null,
- "mode" : "time"
- },
- "yaxes" : [
+ "bars": false,
+ "timeFrom": null,
+ "links": [],
+ "thresholds": [
{
- "format" : "short",
- "max" : "10",
- "label" : "",
- "show" : true,
- "min" : "0",
- "logBase" : 1
- },
- {
- "max" : null,
- "format" : "short",
- "show" : false,
- "label" : null,
- "logBase" : 1,
- "min" : null
+ "colorMode": "critical",
+ "line": true,
+ "fill": true,
+ "value": 0,
+ "op": "gt"
}
- ],
- "bars" : false,
- "description" : "The chart plots the clusters health, over time. Health is depicted as a integer; 0, 4 or 8 where 0 is OK, 4 is WARN and 8 represents an ERROR state.",
- "thresholds" : [
+ ],
+ "spaceLength": 10,
+ "nullPointMode": "null",
+ "renderer": "flot",
+ "linewidth": 2,
+ "steppedLine": true,
+ "id": 2,
+ "maxDataPoints": "360",
+ "fill": 1,
+ "span": 2,
+ "title": "Overall Ceph Health",
+ "tooltip": {
+ "sort": 1,
+ "shared": false,
+ "value_type": "individual"
+ },
+ "targets": [
{
- "fill" : true,
- "colorMode" : "critical",
- "op" : "gt",
- "line" : true,
- "value" : 0
+ "textEditor": true,
+ "target": "alias(maxSeries(consolidateBy(keepLastValue(transformNull(collectd.*.$domain.cephmetrics.gauge.*.mon.health,0)),\"max\")),\"Ceph Health\")",
+ "refId": "A"
}
- ],
- "tooltip" : {
- "shared" : false,
- "value_type" : "individual",
- "sort" : 1
- },
- "targets" : [
+ ],
+ "yaxes": [
+ {
+ "logBase": 1,
+ "format": "short",
+ "max": "10",
+ "min": "0",
+ "label": "",
+ "show": true
+ },
{
- "target" : "alias(maxSeries(consolidateBy(keepLastValue(transformNull(collectd.*.$domain.cephmetrics.gauge.*.mon.health,0)),\"max\")),\"Ceph Health\")",
- "textEditor" : true,
- "refId" : "A"
+ "logBase": 1,
+ "show": false,
+ "max": null,
+ "format": "short",
+ "label": null,
+ "min": null
}
- ],
- "linewidth" : 2,
- "stack" : false,
- "title" : "Overall Ceph Health",
- "nullPointMode" : "null",
- "span" : 2,
- "seriesOverrides" : [],
- "lines" : true,
- "datasource" : "Local",
- "fill" : 1,
- "timeShift" : null,
- "links" : [],
- "type" : "graph",
- "timeFrom" : null,
- "steppedLine" : true,
- "aliasColors" : {
- "Ceph Health" : "#890F02",
- "Ceph Health (0:OK, 4:Warning,8:Error)" : "#DEDAF7",
- "ceph health" : "#890F02"
- }
- },
- {
- "percentage" : false,
- "dashes" : false,
- "legend" : {
- "current" : false,
- "show" : false,
- "min" : false,
- "values" : false,
- "max" : false,
- "total" : false,
- "avg" : false
- },
- "spaceLength" : 10,
- "id" : 3,
- "points" : false,
- "alert" : {
- "name" : "Disks Near Full",
- "notifications" : [
+ ],
+ "xaxis": {
+ "buckets": null,
+ "show": true,
+ "values": [],
+ "mode": "time",
+ "name": null
+ },
+ "seriesOverrides": [],
+ "percentage": false,
+ "type": "graph",
+ "dashes": false,
+ "description": "The chart plots the clusters health, over time. Health is depicted as a integer; 0, 4 or 8 where 0 is OK, 4 is WARN and 8 represents an ERROR state.",
+ "alert": {
+ "noDataState": "no_data",
+ "name": "Overall Ceph Health",
+ "frequency": "10s",
+ "notifications": [
{
- "id" : 1
+ "id": 1
}
- ],
- "message" : "DIsks Near full detected within the cluster. Warning threshold is 80% full.",
- "conditions" : [
+ ],
+ "handler": 1,
+ "executionErrorState": "keep_state",
+ "message": "Cluster Health is not OK",
+ "conditions": [
{
- "operator" : {
- "type" : "and"
- },
- "reducer" : {
- "type" : "max",
- "params" : []
- },
- "query" : {
- "params" : [
- "A",
- "1m",
+ "operator": {
+ "type": "and"
+ },
+ "query": {
+ "params": [
+ "A",
+ "20s",
"now"
]
- },
- "type" : "query",
- "evaluator" : {
- "params" : [
+ },
+ "evaluator": {
+ "type": "gt",
+ "params": [
0
- ],
- "type" : "gt"
- }
+ ]
+ },
+ "reducer": {
+ "params": [],
+ "type": "last"
+ },
+ "type": "query"
}
- ],
- "frequency" : "60s",
- "executionErrorState" : "keep_state",
- "handler" : 1,
- "noDataState" : "ok"
- },
- "pointradius" : 5,
- "renderer" : "flot",
- "minSpan" : 2,
- "dashLength" : 10,
- "yaxes" : [
- {
- "min" : "0",
- "logBase" : 1,
- "label" : null,
- "show" : true,
- "format" : "short",
- "max" : null
- },
+ ]
+ },
+ "hideTimeOverride": false,
+ "dashLength": 10,
+ "stack": false,
+ "timeShift": null,
+ "aliasColors": {
+ "Ceph Health (0:OK, 4:Warning,8:Error)": "#DEDAF7",
+ "Ceph Health": "#890F02",
+ "ceph health": "#890F02"
+ },
+ "lines": true,
+ "legend": {
+ "total": false,
+ "min": false,
+ "max": false,
+ "show": true,
+ "current": false,
+ "values": false,
+ "avg": false
+ },
+ "points": false,
+ "datasource": "Local",
+ "pointradius": 5,
+ "minSpan": 2
+ },
+ {
+ "bars": false,
+ "timeFrom": null,
+ "links": [],
+ "thresholds": [
{
- "label" : null,
- "show" : false,
- "min" : null,
- "logBase" : 1,
- "format" : "short",
- "max" : null
+ "colorMode": "critical",
+ "line": true,
+ "fill": true,
+ "value": 0,
+ "op": "gt"
}
- ],
- "xaxis" : {
- "mode" : "time",
- "values" : [],
- "name" : null,
- "show" : true,
- "buckets" : null
- },
- "description" : "This shows how many disks are at or above 80% full. Performance may degrade beyond this threshold on filestore (XFS) backed OSD's.",
- "bars" : false,
- "tooltip" : {
- "value_type" : "individual",
- "sort" : 0,
- "shared" : true
- },
- "targets" : [
+ ],
+ "spaceLength": 10,
+ "nullPointMode": "null",
+ "renderer": "flot",
+ "linewidth": 1,
+ "steppedLine": false,
+ "targets": [
{
- "textEditor" : true,
- "target" : "currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.osd.*.osd_percent_used),0),85)",
- "refId" : "A",
- "hide" : true
- },
+ "textEditor": true,
+ "hide": true,
+ "target": "currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.osd.*.osd_percent_used),0),85)",
+ "refId": "A"
+ },
{
- "target" : "alias(countSeries(#A),\"OSDs Near Full\")",
- "textEditor" : true,
- "refId" : "B",
- "targetFull" : "alias(countSeries(currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.osd.*.osd_percent_used),0),85)),\"OSDs Near Full\")"
+ "targetFull": "alias(countSeries(currentAbove(transformNull(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.osd.*.osd_percent_used),0),85)),\"OSDs Near Full\")",
+ "textEditor": true,
+ "target": "alias(countSeries(#A),\"OSDs Near Full\")",
+ "refId": "B"
}
- ],
- "linewidth" : 1,
- "thresholds" : [
+ ],
+ "fill": 1,
+ "span": 2,
+ "title": "Disks Near Full",
+ "tooltip": {
+ "sort": 0,
+ "shared": true,
+ "value_type": "individual"
+ },
+ "id": 3,
+ "yaxes": [
+ {
+ "logBase": 1,
+ "min": "0",
+ "max": null,
+ "format": "short",
+ "label": null,
+ "show": true
+ },
{
- "colorMode" : "critical",
- "fill" : true,
- "value" : 0,
- "op" : "gt",
- "line" : true
+ "logBase": 1,
+ "min": null,
+ "max": null,
+ "format": "short",
+ "show": false,
+ "label": null
}
- ],
- "datasource" : "Local",
- "lines" : true,
- "title" : "Disks Near Full",
- "stack" : false,
- "nullPointMode" : "null",
- "span" : 2,
- "seriesOverrides" : [],
- "aliasColors" : {},
- "steppedLine" : false,
- "timeShift" : null,
- "links" : [],
- "fill" : 1,
- "type" : "graph",
- "timeFrom" : null
- },
- {
- "hideTimeOverride" : true,
- "percentage" : false,
- "dashes" : false,
- "legend" : {
- "current" : false,
- "min" : false,
- "alignAsTable" : false,
- "max" : false,
- "total" : false,
- "show" : false,
- "rightSide" : false,
- "values" : false,
- "avg" : false,
- "hideZero" : false
- },
- "spaceLength" : 10,
- "points" : false,
- "id" : 4,
- "alert" : {
- "name" : "OSDs Down",
- "notifications" : [
+ ],
+ "xaxis": {
+ "buckets": null,
+ "values": [],
+ "mode": "time",
+ "name": null,
+ "show": true
+ },
+ "seriesOverrides": [],
+ "percentage": false,
+ "type": "graph",
+ "dashes": false,
+ "description": "This shows how many disks are at or above 80% full. Performance may degrade beyond this threshold on filestore (XFS) backed OSD's.",
+ "alert": {
+ "noDataState": "ok",
+ "name": "Disks Near Full",
+ "frequency": "60s",
+ "notifications": [
{
- "id" : 1
+ "id": 1
}
- ],
- "conditions" : [
+ ],
+ "handler": 1,
+ "executionErrorState": "keep_state",
+ "message": "DIsks Near full detected within the cluster. Warning threshold is 80% full.",
+ "conditions": [
{
- "evaluator" : {
- "type" : "gt",
- "params" : [
- 0
- ]
- },
- "type" : "query",
- "query" : {
- "params" : [
- "C",
- "30s",
+ "operator": {
+ "type": "and"
+ },
+ "query": {
+ "params": [
+ "A",
+ "1m",
"now"
]
- },
- "reducer" : {
- "params" : [],
- "type" : "max"
- },
- "operator" : {
- "type" : "and"
- }
+ },
+ "evaluator": {
+ "params": [
+ 0
+ ],
+ "type": "gt"
+ },
+ "reducer": {
+ "type": "max",
+ "params": []
+ },
+ "type": "query"
}
- ],
- "message" : "OSD Down event",
- "handler" : 1,
- "executionErrorState" : "alerting",
- "frequency" : "10s",
- "noDataState" : "ok"
- },
- "pointradius" : 5,
- "dashLength" : 10,
- "minSpan" : 2,
- "renderer" : "flot",
- "yaxes" : [
- {
- "format" : "short",
- "max" : null,
- "label" : null,
- "show" : true,
- "min" : "0",
- "logBase" : 1
- },
+ ]
+ },
+ "dashLength": 10,
+ "stack": false,
+ "timeShift": null,
+ "aliasColors": {},
+ "lines": true,
+ "legend": {
+ "total": false,
+ "show": false,
+ "max": false,
+ "min": false,
+ "current": false,
+ "values": false,
+ "avg": false
+ },
+ "points": false,
+ "datasource": "Local",
+ "pointradius": 5,
+ "minSpan": 2
+ },
+ {
+ "bars": true,
+ "timeFrom": "5m",
+ "links": [],
+ "thresholds": [
{
- "logBase" : 1,
- "min" : null,
- "show" : false,
- "label" : null,
- "max" : null,
- "format" : "short"
+ "colorMode": "critical",
+ "line": true,
+ "op": "gt",
+ "value": 0,
+ "fill": true
}
- ],
- "xaxis" : {
- "mode" : "time",
- "show" : true,
- "buckets" : null,
- "name" : null,
- "values" : []
- },
- "description" : "Count of OSDs currently in a DOWN state",
- "bars" : true,
- "linewidth" : 2,
- "tooltip" : {
- "shared" : true,
- "value_type" : "individual",
- "sort" : 0
- },
- "targets" : [
+ ],
+ "spaceLength": 10,
+ "nullPointMode": "null",
+ "renderer": "flot",
+ "linewidth": 2,
+ "steppedLine": true,
+ "targets": [
{
- "target" : "alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd), \"max\")),\"total\")",
- "textEditor" : true,
- "hide" : true,
- "refId" : "A"
- },
+ "textEditor": true,
+ "hide": true,
+ "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd), \"max\")),\"total\")",
+ "refId": "A"
+ },
{
- "hide" : true,
- "refId" : "B",
- "target" : "alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd_up), \"max\")),\"up\")",
- "textEditor" : true
- },
+ "hide": true,
+ "textEditor": true,
+ "refId": "B",
+ "target": "alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd_up), \"max\")),\"up\")"
+ },
{
- "targetFull" : "alias(diffSeries(alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd), \"max\")),\"total\"),alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd_up), \"max\")),\"up\")), \"down\")",
- "refId" : "C",
- "hide" : false,
- "textEditor" : true,
- "target" : "alias(diffSeries(#A,#B), \"down\")"
+ "hide": false,
+ "targetFull": "alias(diffSeries(alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd), \"max\")),\"total\"),alias(keepLastValue(consolidateBy(maxSeries(collectd.*.$domain.cephmetrics.gauge.*.mon.num_osd_up), \"max\")),\"up\")), \"down\")",
+ "textEditor": true,
+ "refId": "C",
+ "target": "alias(diffSeries(#A,#B), \"down\")"
}
- ],
- "thresholds" : [
+ ],
+ "fill": 2,
+ "span": 2,
+ "title": "OSDs Down",
+ "tooltip": {
+ "sort": 0,
+ "shared": true,
+ "value_type": "individual"
+ },
+ "id": 4,
+ "yaxes": [
+ {
+ "logBase": 1,
+ "format": "short",
+ "max": null,
+ "min": "0",
+ "label": null,
+ "show": true
+ },
{
- "op" : "gt",
- "line" : true,
- "value" : 0,
- "fill" : true,
- "colorMode" : "critical"
+ "logBase": 1,
+ "min": null,
+ "max": null,
+ "format": "short",
+ "show": false,
+ "label": null
}
- ],
- "datasource" : "Local",
- "lines" : false,
- "nullPointMode" : "null",
- "title" : "OSDs Down",
- "stack" : false,
- "span" : 2,
- "seriesOverrides" : [],
- "steppedLine" : true,
- "aliasColors" : {},
- "timeFrom" : "5m",
- "type" : "graph",
- "fill" : 2,
- "timeShift" : null,
- "links" : []
- },
+ ],
+ "xaxis": {
+ "buckets": null,
+ "show": true,
+ "values": [],
+ "mode": "time",
+ "name": null
+ },
+ "seriesOverrides": [],
+ "percentage": false,
+ "type": "graph",
+ "dashes": false,
+ "description": "Count of OSDs currently in a DOWN state",
+ "alert": {
+ "noDataState": "ok",
+ "name": "OSDs Down",
+ "frequency": "10s",
+ "notifications": [
+ {
+ "id": 1
+ }
+ ],
+ "handler": 1,
+ "executionErrorState": "alerting",
+ "message": "OSD Down event",
+ "conditions": [
+ {
+ "operator": {
+ "type": "and"
+ },
+ "query": {
+ "params": [
+ "C",
+ "30s",
+ "now"
+ ]
+ },
+ "evaluator": {
+ "type": "gt",
+ "params": [
+ 0
+ ]
+ },
+ "reducer": {
+ "params": [],
+ "type": "max"
+ },
+ "type": "query"
+ }
+ ]
+ },
+ "hideTimeOverride": true,
+ "dashLength": 10,
+ "stack": false,
+ "timeShift": null,
+ "aliasColors": {},
+ "lines": false,
+ "legend": {
+ "rightSide": false,
+ "total": false,
+ "min": false,
+ "max": false,
+ "show": false,
+ "current": false,
+ "values": false,
+ "alignAsTable": false,
+ "avg": false,
+ "hideZero": false
+ },
+ "points": false,
+ "datasource": "Local",
+ "pointradius": 5,
+ "minSpan": 2
+ },
{
- "bars" : false,
- "description" : "This trigger raises a notification if the raw used crosses the 85% capacity threshold of the ceph cluster",
- "thresholds" : [
+ "bars": false,
+ "timeFrom": null,
+ "links": [],
+ "thresholds": [
{
- "op" : "gt",
- "line" : true,
- "value" : 85,
- "fill" : true,
- "colorMode" : "critical"
+ "colorMode": "critical",
+ "line": true,
+ "op": "gt",
+ "value": 85,
+ "fill": true
}
- ],
- "linewidth" : 1,
- "targets" : [
+ ],
+ "spaceLength": 10,
+ "nullPointMode": "null",
+ "renderer": "flot",
+ "stack": false,
+ "linewidth": 1,
+ "steppedLine": false,
+ "targets": [
{
- "hide" : true,
- "refId" : "A",
- "textEditor" : true,
- "target" : "alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes,1, \"maxSeries\")), \"Raw Capacity\")"
- },
+ "hide": true,
+ "textEditor": true,
+ "refId": "A",
+ "target": "alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes,1, \"maxSeries\")), \"Raw Capacity\")"
+ },
{
- "refId" : "B",
- "hide" : true,
- "textEditor" : true,
- "target" : "alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_used,1, \"maxSeries\")), \"Used Raw\")"
- },
+ "hide": true,
+ "textEditor": true,
+ "refId": "B",
+ "target": "alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_used,1, \"maxSeries\")), \"Used Raw\")"
+ },
{
- "target" : "alias(asPercent(#B, #A), \"Raw Capacity Used %\")",
- "textEditor" : true,
- "targetFull" : "alias(asPercent(alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_used,1, \"maxSeries\")), \"Used Raw\"), alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes,1, \"maxSeries\")), \"Raw Capacity\")), \"Raw Capacity Used %\")",
- "refId" : "C"
+ "targetFull": "alias(asPercent(alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_used,1, \"maxSeries\")), \"Used Raw\"), alias(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes,1, \"maxSeries\")), \"Raw Capacity\")), \"Raw Capacity Used %\")",
+ "textEditor": true,
+ "target": "alias(asPercent(#B, #A), \"Raw Capacity Used %\")",
+ "refId": "C"
}
- ],
- "tooltip" : {
- "shared" : true,
- "value_type" : "individual",
- "sort" : 0
- },
- "title" : "Cluster Capacity",
- "stack" : false,
- "seriesOverrides" : [],
- "span" : 2,
- "nullPointMode" : "null",
- "lines" : true,
- "datasource" : "Local",
- "timeFrom" : null,
- "type" : "graph",
- "timeShift" : null,
- "links" : [],
- "fill" : 1,
- "aliasColors" : {},
- "steppedLine" : false,
- "legend" : {
- "max" : false,
- "avg" : false,
- "total" : false,
- "show" : true,
- "current" : false,
- "values" : false,
- "min" : false
- },
- "dashes" : false,
- "percentage" : false,
- "alert" : {
- "name" : "Cluster Capacity",
- "notifications" : [
+ ],
+ "fill": 1,
+ "span": 2,
+ "title": "Cluster Capacity",
+ "tooltip": {
+ "sort": 0,
+ "shared": true,
+ "value_type": "individual"
+ },
+ "id": 5,
+ "points": false,
+ "xaxis": {
+ "buckets": null,
+ "show": true,
+ "values": [],
+ "mode": "time",
+ "name": null
+ },
+ "seriesOverrides": [],
+ "percentage": false,
+ "type": "graph",
+ "dashes": false,
+ "description": "This trigger raises a notification if the raw used crosses the 85% capacity threshold of the ceph cluster",
+ "alert": {
+ "noDataState": "keep_state",
+ "name": "Cluster Capacity",
+ "frequency": "60s",
+ "notifications": [
{
- "id" : 1
+ "id": 1
}
- ],
- "message" : "Cluster Capacity Limit Warning",
- "conditions" : [
+ ],
+ "handler": 1,
+ "executionErrorState": "alerting",
+ "message": "Cluster Capacity Limit Warning",
+ "conditions": [
{
- "evaluator" : {
- "params" : [
- 85
- ],
- "type" : "gt"
- },
- "type" : "query",
- "query" : {
- "params" : [
- "C",
- "1h",
+ "operator": {
+ "type": "and"
+ },
+ "query": {
+ "params": [
+ "C",
+ "1h",
"now"
]
- },
- "reducer" : {
- "params" : [],
- "type" : "avg"
- },
- "operator" : {
- "type" : "and"
- }
+ },
+ "evaluator": {
+ "params": [
+ 85
+ ],
+ "type": "gt"
+ },
+ "reducer": {
+ "params": [],
+ "type": "avg"
+ },
+ "type": "query"
}
- ],
- "executionErrorState" : "alerting",
- "frequency" : "60s",
- "handler" : 1,
- "noDataState" : "keep_state"
- },
- "points" : false,
- "id" : 5,
- "spaceLength" : 10,
- "minSpan" : 2,
- "dashLength" : 10,
- "renderer" : "flot",
- "pointradius" : 5,
- "xaxis" : {
- "values" : [],
- "name" : null,
- "show" : true,
- "buckets" : null,
- "mode" : "time"
- },
- "yaxes" : [
+ ]
+ },
+ "dashLength": 10,
+ "legend": {
+ "total": false,
+ "min": false,
+ "max": false,
+ "show": true,
+ "current": false,
+ "values": false,
+ "avg": false
+ },
+ "timeShift": null,
+ "aliasColors": {},
+ "lines": true,
+ "yaxes": [
{
- "label" : "",
- "show" : true,
- "min" : "0",
- "logBase" : 1,
- "format" : "percent",
- "max" : "100"
- },
+ "logBase": 1,
+ "min": "0",
+ "max": "100",
+ "format": "percent",
+ "show": true,
+ "label": ""
+ },
{
- "logBase" : 1,
- "min" : null,
- "show" : false,
- "label" : null,
- "max" : null,
- "format" : "short"
+ "logBase": 1,
+ "min": null,
+ "max": null,
+ "format": "short",
+ "show": false,
+ "label": null
}
- ]
- },
+ ],
+ "datasource": "Local",
+ "pointradius": 5,
+ "minSpan": 2
+ },
{
- "alert" : {
- "executionErrorState" : "alerting",
- "frequency" : "60s",
- "handler" : 1,
- "noDataState" : "no_data",
- "notifications" : [
+ "bars": false,
+ "timeFrom": "6h",
+ "links": [],
+ "thresholds": [
+ {
+ "colorMode": "critical",
+ "line": true,
+ "fill": true,
+ "value": 0,
+ "op": "gt"
+ }
+ ],
+ "spaceLength": 10,
+ "nullPointMode": "null",
+ "renderer": "flot",
+ "linewidth": 2,
+ "steppedLine": false,
+ "targets": [
+ {
+ "textEditor": true,
+ "refId": "A",
+ "target": "alias(maxSeries(consolidateBy(collectd.*.$domain.cephmetrics.gauge.*.mon.num_pgs_stuck, \"maxSeries\")), \"# pg's stuck inactive\")"
+ }
+ ],
+ "fill": 2,
+ "span": 2,
+ "title": "PG's Stuck",
+ "tooltip": {
+ "sort": 0,
+ "shared": false,
+ "value_type": "individual"
+ },
+ "id": 8,
+ "yaxes": [
+ {
+ "logBase": 1,
+ "min": "0",
+ "max": null,
+ "format": "short",
+ "show": true,
+ "label": null
+ },
+ {
+ "logBase": 1,
+ "show": false,
+ "max": null,
+ "format": "short",
+ "min": null,
+ "label": null
+ }
+ ],
+ "xaxis": {
+ "buckets": null,
+ "show": true,
+ "values": [
+ "total"
+ ],
+ "mode": "time",
+ "name": null
+ },
+ "seriesOverrides": [],
+ "percentage": false,
+ "type": "graph",
+ "dashes": false,
+ "description": "This chart shows whether there are pg's in a stuck state, that need manual intervention to resolve.",
+ "alert": {
+ "noDataState": "no_data",
+ "name": "PG's Stuck",
+ "frequency": "60s",
+ "notifications": [
{
- "id" : 1
+ "id": 1
}
- ],
- "name" : "PG's Stuck",
- "message" : "PG's stuck inactive",
- "conditions" : [
+ ],
+ "handler": 1,
+ "executionErrorState": "alerting",
+ "message": "PG's stuck inactive",
+ "conditions": [
{
- "evaluator" : {
- "params" : [
- 0
- ],
- "type" : "gt"
- },
- "type" : "query",
- "query" : {
- "params" : [
- "A",
- "1m",
+ "operator": {
+ "type": "and"
+ },
+ "query": {
+ "params": [
+ "A",
+ "1m",
"now"
]
- },
- "reducer" : {
- "type" : "last",
- "params" : []
- },
- "operator" : {
- "type" : "and"
- }
+ },
+ "evaluator": {
+ "params": [
+ 0
+ ],
+ "type": "gt"
+ },
+ "reducer": {
+ "type": "last",
+ "params": []
+ },
+ "type": "query"
}
]
- },
- "points" : false,
- "id" : 8,
- "spaceLength" : 10,
- "legend" : {
- "max" : false,
- "avg" : false,
- "total" : false,
- "show" : true,
- "current" : false,
- "values" : false,
- "min" : false
- },
- "dashes" : false,
- "percentage" : false,
- "hideTimeOverride" : true,
- "xaxis" : {
- "mode" : "time",
- "show" : true,
- "buckets" : null,
- "name" : null,
- "values" : [
- "total"
- ]
- },
- "yaxes" : [
- {
- "logBase" : 1,
- "min" : "0",
- "show" : true,
- "label" : null,
- "max" : null,
- "format" : "short"
- },
+ },
+ "hideTimeOverride": true,
+ "dashLength": 10,
+ "stack": false,
+ "timeShift": null,
+ "aliasColors": {},
+ "lines": true,
+ "legend": {
+ "total": false,
+ "min": false,
+ "max": false,
+ "show": true,
+ "current": false,
+ "values": false,
+ "avg": false
+ },
+ "points": false,
+ "datasource": "Local",
+ "pointradius": 5,
+ "minSpan": 2
+ },
+ {
+ "bars": false,
+ "timeFrom": null,
+ "links": [],
+ "thresholds": [
{
- "show" : false,
- "label" : null,
- "logBase" : 1,
- "min" : null,
- "max" : null,
- "format" : "short"
+ "colorMode": "critical",
+ "line": true,
+ "op": "lt",
+ "value": 0,
+ "fill": true
}
- ],
- "dashLength" : 10,
- "minSpan" : 2,
- "renderer" : "flot",
- "pointradius" : 5,
- "thresholds" : [
+ ],
+ "spaceLength": 10,
+ "nullPointMode": "null",
+ "renderer": "flot",
+ "linewidth": 1,
+ "steppedLine": false,
+ "targets": [
{
- "fill" : true,
- "colorMode" : "critical",
- "line" : true,
- "op" : "gt",
- "value" : 0
- }
- ],
- "linewidth" : 2,
- "tooltip" : {
- "shared" : false,
- "sort" : 0,
- "value_type" : "individual"
- },
- "targets" : [
+ "hide": true,
+ "textEditor": true,
+ "refId": "A",
+ "target": "alias(scale(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_avail,1, \"maxSeries\")),0.9), \"Raw Freespace\")"
+ },
{
- "refId" : "A",
- "target" : "alias(maxSeries(consolidateBy(collectd.*.$domain.cephmetrics.gauge.*.mon.num_pgs_stuck, \"maxSeries\")), \"# pg's stuck inactive\")",
- "textEditor" : true
+ "textEditor": true,
+ "hide": true,
+ "target": "alias(maxSeries(groupByNode(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.osd.*.stat_bytes),1,\"sumSeries\")), \"Largest OSD Host\")",
+ "refId": "B"
+ },
+ {
+ "targetFull": "alias(diffSeries(alias(scale(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_avail,1, \"maxSeries\")),0.9), \"Raw Freespace\"),alias(maxSeries(groupByNode(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.osd.*.stat_bytes),1,\"sumSeries\")), \"Largest OSD Host\")),\"freespace after Node loss\")",
+ "textEditor": true,
+ "refId": "C",
+ "target": "alias(diffSeries(#A,#B),\"freespace after Node loss\")"
}
- ],
- "bars" : false,
- "description" : "This chart shows whether there are pg's in a stuck state, that need manual intervention to resolve.",
- "timeFrom" : "6h",
- "type" : "graph",
- "links" : [],
- "timeShift" : null,
- "fill" : 2,
- "aliasColors" : {},
- "steppedLine" : false,
- "span" : 2,
- "title" : "PG's Stuck",
- "stack" : false,
- "seriesOverrides" : [],
- "nullPointMode" : "null",
- "lines" : true,
- "datasource" : "Local"
- },
- {
- "xaxis" : {
- "values" : [],
- "name" : null,
- "show" : true,
- "buckets" : null,
- "mode" : "time"
- },
- "yaxes" : [
+ ],
+ "fill": 1,
+ "span": 2,
+ "title": "OSD Host Loss Check",
+ "tooltip": {
+ "sort": 0,
+ "shared": true,
+ "value_type": "individual"
+ },
+ "id": 9,
+ "yaxes": [
{
- "show" : true,
- "label" : null,
- "logBase" : 1,
- "min" : "0",
- "max" : null,
- "format" : "decbytes"
- },
+ "logBase": 1,
+ "show": true,
+ "max": null,
+ "format": "decbytes",
+ "min": "0",
+ "label": null
+ },
{
- "show" : true,
- "label" : null,
- "logBase" : 1,
- "min" : null,
- "max" : null,
- "format" : "short"
+ "logBase": 1,
+ "show": true,
+ "max": null,
+ "format": "short",
+ "min": null,
+ "label": null
}
- ],
- "minSpan" : 2,
- "dashLength" : 10,
- "renderer" : "flot",
- "pointradius" : 5,
- "alert" : {
- "noDataState" : "ok",
- "handler" : 1,
- "executionErrorState" : "alerting",
- "frequency" : "60s",
- "conditions" : [
+ ],
+ "xaxis": {
+ "buckets": null,
+ "show": true,
+ "values": [],
+ "mode": "time",
+ "name": null
+ },
+ "seriesOverrides": [],
+ "percentage": false,
+ "type": "graph",
+ "dashes": false,
+ "description": "This graph checks the cluster @ 90% full is enough to support the loss of the largest OSD host",
+ "alert": {
+ "noDataState": "ok",
+ "name": "OSD Host Loss Check",
+ "frequency": "60s",
+ "notifications": [
+ {
+ "id": 1
+ }
+ ],
+ "handler": 1,
+ "executionErrorState": "alerting",
+ "message": "OSD Host Loss Free Space Check Failed",
+ "conditions": [
{
- "reducer" : {
- "type" : "min",
- "params" : []
- },
- "operator" : {
- "type" : "and"
- },
- "query" : {
- "params" : [
- "A",
- "5m",
+ "operator": {
+ "type": "and"
+ },
+ "query": {
+ "params": [
+ "A",
+ "5m",
"now"
]
- },
- "type" : "query",
- "evaluator" : {
- "type" : "lt",
- "params" : [
+ },
+ "evaluator": {
+ "type": "lt",
+ "params": [
0
]
- }
- }
- ],
- "message" : "OSD Host Loss Free Space Check Failed",
- "name" : "OSD Host Loss Check",
- "notifications" : [
- {
- "id" : 1
+ },
+ "reducer": {
+ "type": "min",
+ "params": []
+ },
+ "type": "query"
}
]
- },
- "spaceLength" : 10,
- "points" : false,
- "id" : 9,
- "dashes" : false,
- "legend" : {
- "current" : false,
- "show" : false,
- "min" : false,
- "values" : false,
- "max" : false,
- "total" : false,
- "avg" : false
- },
- "percentage" : false,
- "aliasColors" : {
- "Largest OSD Host" : "#890F02"
- },
- "steppedLine" : false,
- "timeFrom" : null,
- "type" : "graph",
- "timeShift" : null,
- "links" : [],
- "fill" : 1,
- "lines" : true,
- "seriesOverrides" : [],
- "title" : "OSD Host Loss Check",
- "stack" : false,
- "span" : 2,
- "nullPointMode" : "null",
- "datasource" : "Local",
- "thresholds" : [
+ },
+ "dashLength": 10,
+ "stack": false,
+ "timeShift": null,
+ "aliasColors": {
+ "Largest OSD Host": "#890F02"
+ },
+ "lines": true,
+ "legend": {
+ "total": false,
+ "show": false,
+ "max": false,
+ "min": false,
+ "current": false,
+ "values": false,
+ "avg": false
+ },
+ "points": false,
+ "datasource": "Local",
+ "pointradius": 5,
+ "minSpan": 2
+ },
+ {
+ "bars": false,
+ "timeFrom": "1h",
+ "links": [],
+ "thresholds": [
{
- "line" : true,
- "op" : "lt",
- "value" : 0,
- "fill" : true,
- "colorMode" : "critical"
+ "colorMode": "critical",
+ "line": true,
+ "fill": true,
+ "value": 1000,
+ "op": "gt"
}
- ],
- "linewidth" : 1,
- "tooltip" : {
- "shared" : true,
- "sort" : 0,
- "value_type" : "individual"
- },
- "targets" : [
+ ],
+ "spaceLength": 10,
+ "nullPointMode": "null",
+ "renderer": "flot",
+ "stack": false,
+ "linewidth": 1,
+ "steppedLine": false,
+ "targets": [
{
- "refId" : "A",
- "hide" : true,
- "textEditor" : true,
- "target" : "alias(scale(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_avail,1, \"maxSeries\")),0.9), \"Raw Freespace\")"
- },
+ "textEditor": true,
+ "refId": "A",
+ "target": "aliasByNode(currentAbove(keepLastValue(transformNull(collectd.*.$domain.cephmetrics.gauge.*.osd.*.perf.await,-1)),1000),1,-3)"
+ }
+ ],
+ "maxDataPoints": "",
+ "fill": 1,
+ "span": 2,
+ "title": "Slow OSD responses",
+ "tooltip": {
+ "sort": 0,
+ "shared": true,
+ "value_type": "individual"
+ },
+ "id": 10,
+ "yaxes": [
{
- "target" : "alias(maxSeries(groupByNode(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.osd.*.stat_bytes),1,\"sumSeries\")), \"Largest OSD Host\")",
- "textEditor" : true,
- "refId" : "B",
- "hide" : true
- },
+ "logBase": 1,
+ "min": "0",
+ "max": null,
+ "format": "none",
+ "label": "ms",
+ "show": true
+ },
{
- "targetFull" : "alias(diffSeries(alias(scale(maxSeries(groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.osd_bytes_avail,1, \"maxSeries\")),0.9), \"Raw Freespace\"),alias(maxSeries(groupByNode(keepLastValue(collectd.*.$domain.cephmetrics.gauge.*.osd.*.stat_bytes),1,\"sumSeries\")), \"Largest OSD Host\")),\"freespace after Node loss\")",
- "refId" : "C",
- "target" : "alias(diffSeries(#A,#B),\"freespace after Node loss\")",
- "textEditor" : true
+ "logBase": 1,
+ "format": "short",
+ "max": null,
+ "min": null,
+ "label": null,
+ "show": false
}
- ],
- "bars" : false,
- "description" : "This graph checks the cluster @ 90% full is enough to support the loss of the largest OSD host"
- },
+ ],
+ "xaxis": {
+ "buckets": null,
+ "show": true,
+ "values": [],
+ "mode": "time",
+ "name": null
+ },
+ "seriesOverrides": [],
+ "percentage": false,
+ "type": "graph",
+ "dashes": false,
+ "description": "Graph checking for OSD Latencies that are above 1s.",
+ "alert": {
+ "noDataState": "ok",
+ "name": "Slow OSD responses alert",
+ "frequency": "30s",
+ "notifications": [
+ {
+ "id": 1
+ }
+ ],
+ "handler": 1,
+ "executionErrorState": "alerting",
+ "message": "OSD Response time is > 1s",
+ "conditions": [
+ {
+ "operator": {
+ "type": "and"
+ },
+ "query": {
+ "params": [
+ "A",
+ "1m",
+ "now"
+ ]
+ },
+ "evaluator": {
+ "type": "gt",
+ "params": [
+ 1000
+ ]
+ },
+ "reducer": {
+ "type": "max",
+ "params": []
+ },
+ "type": "query"
+ }
+ ]
+ },
+ "hideTimeOverride": true,
+ "dashLength": 10,
+ "legend": {
+ "total": false,
+ "show": true,
+ "max": false,
+ "min": false,
+ "current": false,
+ "values": false,
+ "avg": false
+ },
+ "timeShift": null,
+ "aliasColors": {
+ "Largest OSD Host": "#890F02"
+ },
+ "lines": true,
+ "points": false,
+ "datasource": "Local",
+ "pointradius": 5,
+ "minSpan": 2
+ },
{
- "renderer" : "flot",
- "minSpan" : 2,
- "dashLength" : 10,
- "pointradius" : 5,
- "xaxis" : {
- "mode" : "time",
- "show" : true,
- "buckets" : null,
- "values" : [],
- "name" : null
- },
- "yaxes" : [
+ "bars": false,
+ "timeFrom": null,
+ "links": [],
+ "thresholds": [
{
- "min" : "0",
- "logBase" : 1,
- "label" : "ms",
- "show" : true,
- "format" : "none",
- "max" : null
- },
+ "colorMode": "critical",
+ "line": true,
+ "op": "gt",
+ "value": 10,
+ "fill": true
+ }
+ ],
+ "spaceLength": 10,
+ "nullPointMode": "null",
+ "renderer": "flot",
+ "linewidth": 1,
+ "steppedLine": false,
+ "id": 11,
+ "fill": 1,
+ "span": 2,
+ "title": "Network Errors",
+ "tooltip": {
+ "sort": 0,
+ "shared": true,
+ "value_type": "individual"
+ },
+ "targets": [
{
- "format" : "short",
- "max" : null,
- "min" : null,
- "logBase" : 1,
- "label" : null,
- "show" : false
+ "textEditor": true,
+ "target": "groupByNode(collectd.*.$domain.interface.*.if_{dropped,errors}.*,1,\"sumSeries\")",
+ "refId": "A"
}
- ],
- "legend" : {
- "max" : false,
- "total" : false,
- "avg" : false,
- "current" : false,
- "show" : true,
- "min" : false,
- "values" : false
- },
- "dashes" : false,
- "hideTimeOverride" : true,
- "percentage" : false,
- "maxDataPoints" : "",
- "alert" : {
- "executionErrorState" : "alerting",
- "frequency" : "30s",
- "handler" : 1,
- "noDataState" : "ok",
- "name" : "Slow OSD responses alert",
- "notifications" : [
+ ],
+ "points": false,
+ "xaxis": {
+ "buckets": null,
+ "show": true,
+ "values": [],
+ "mode": "time",
+ "name": null
+ },
+ "seriesOverrides": [],
+ "percentage": false,
+ "type": "graph",
+ "dashes": false,
+ "description": "Checks all interfaces for dropped/error packets, and alerts if more than 10 are seen in a 5m interval",
+ "alert": {
+ "noDataState": "no_data",
+ "name": "Network Errors alert",
+ "frequency": "30s",
+ "notifications": [
{
- "id" : 1
+ "id": 1
}
- ],
- "message" : "OSD Response time is > 1s",
- "conditions" : [
+ ],
+ "handler": 1,
+ "executionErrorState": "keep_state",
+ "message": "Network rx/tx issues detected",
+ "conditions": [
{
- "evaluator" : {
- "type" : "gt",
- "params" : [
- 1000
- ]
- },
- "type" : "query",
- "query" : {
- "params" : [
- "A",
- "1m",
+ "operator": {
+ "type": "and"
+ },
+ "query": {
+ "params": [
+ "A",
+ "5m",
"now"
]
- },
- "operator" : {
- "type" : "and"
- },
- "reducer" : {
- "type" : "max",
- "params" : []
- }
+ },
+ "evaluator": {
+ "type": "gt",
+ "params": [
+ 10
+ ]
+ },
+ "reducer": {
+ "type": "max",
+ "params": []
+ },
+ "type": "query"
}
]
- },
- "id" : 10,
- "points" : false,
- "spaceLength" : 10,
- "title" : "Slow OSD responses",
- "stack" : false,
- "nullPointMode" : "null",
- "span" : 2,
- "seriesOverrides" : [],
- "lines" : true,
- "datasource" : "Local",
- "timeShift" : null,
- "fill" : 1,
- "links" : [],
- "timeFrom" : "1h",
- "type" : "graph",
- "aliasColors" : {
- "Largest OSD Host" : "#890F02"
- },
- "steppedLine" : false,
- "bars" : false,
- "description" : "Graph checking for OSD Latencies that are above 1s.",
- "thresholds" : [
+ },
+ "dashLength": 10,
+ "stack": false,
+ "timeShift": null,
+ "aliasColors": {},
+ "lines": true,
+ "legend": {
+ "total": false,
+ "min": false,
+ "max": false,
+ "show": false,
+ "current": false,
+ "values": false,
+ "avg": false
+ },
+ "yaxes": [
{
- "fill" : true,
- "colorMode" : "critical",
- "line" : true,
- "op" : "gt",
- "value" : 1000
- }
- ],
- "targets" : [
+ "logBase": 1,
+ "format": "none",
+ "max": null,
+ "min": "0",
+ "label": null,
+ "show": true
+ },
{
- "refId" : "A",
- "textEditor" : true,
- "target" : "aliasByNode(currentAbove(keepLastValue(transformNull(collectd.*.$domain.cephmetrics.gauge.*.osd.*.perf.await,-1)),1000),1,-3)"
+ "logBase": 1,
+ "min": null,
+ "max": null,
+ "format": "short",
+ "show": false,
+ "label": null
}
- ],
- "tooltip" : {
- "sort" : 0,
- "value_type" : "individual",
- "shared" : true
- },
- "linewidth" : 1
- },
+ ],
+ "datasource": null,
+ "pointradius": 5,
+ "minSpan": 2
+ },
{
- "thresholds" : [
+ "bars": false,
+ "timeFrom": null,
+ "links": [],
+ "thresholds": [
{
- "value" : 10,
- "op" : "gt",
- "line" : true,
- "colorMode" : "critical",
- "fill" : true
+ "colorMode": "critical",
+ "line": true,
+ "fill": true,
+ "value": 85,
+ "op": "gt"
}
- ],
- "linewidth" : 1,
- "tooltip" : {
- "shared" : true,
- "sort" : 0,
- "value_type" : "individual"
- },
- "targets" : [
+ ],
+ "spaceLength": 10,
+ "nullPointMode": "null",
+ "renderer": "flot",
+ "linewidth": 2,
+ "steppedLine": false,
+ "targets": [
{
- "target" : "groupByNode(collectd.*.$domain.interface.*.if_{dropped,errors}.*,1,\"sumSeries\")",
- "textEditor" : true,
- "refId" : "A"
+ "textEditor": true,
+ "refId": "A",
+ "target": "groupByNode(collectd.*.$domain.cephmetrics.gauge.*.mon.pools.*.percent_used,-2,'maxSeries')"
}
- ],
- "bars" : false,
- "description" : "Checks all interfaces for dropped/error packets, and alerts if more than 10 are seen in a 5m interval",
- "aliasColors" : {},
- "steppedLine" : false,
- "type" : "graph",
- "timeFrom" : null,
- "fill" : 1,
- "timeShift" : null,
- "links" : [],
- "lines" : true,
- "span" : 2,
- "title" : "Network Errors",
- "stack" : false,
- "seriesOverrides" : [],
- "nullPointMode" : "null",
- "datasource" : null,
- "alert" : {
- "handler" : 1,
- "frequency" : "30s",
- "executionErrorState" : "keep_state",
- "noDataState" : "no_data",
- "notifications" : [
- {
- "id" : 1
- }
- ],
- "name" : "Network Errors alert",
- "conditions" : [
+ ],
+ "fill": 5,
+ "span": 2,
+ "title": "Pool Capacity",
+ "tooltip": {
+ "sort": 0,
+ "shared": true,
+ "value_type": "individual"
+ },
+ "id": 12,
+ "points": false,
+ "xaxis": {
+ "buckets": null,
+ "values": [],
+ "mode": "time",
+ "name": null,
+ "show": true
+ },
+ "seriesOverrides": [],
+ "percentage": false,
+ "type": "graph",
+ "dashes": false,
+ "repeat": null,
+ "alert": {
+ "noDataState": "keep_state",
+ "name": "Pool Capacity",
+ "frequency": "60s",
+ "notifications": [],
+ "handler": 1,
+ "executionErrorState": "alerting",
+ "conditions": [
{
- "query" : {
- "params" : [
- "A",
- "5m",
+ "operator": {
+ "type": "and"
+ },
+ "query": {
+ "params": [
+ "A",
+ "5m",
"now"
]
- },
- "reducer" : {
- "type" : "max",
- "params" : []
- },
- "operator" : {
- "type" : "and"
- },
- "evaluator" : {
- "type" : "gt",
- "params" : [
- 10
- ]
- },
- "type" : "query"
+ },
+ "evaluator": {
+ "params": [
+ 85
+ ],
+ "type": "gt"
+ },
+ "reducer": {
+ "params": [],
+ "type": "avg"
+ },
+ "type": "query"
}
- ],
- "message" : "Network rx/tx issues detected"
- },
- "spaceLength" : 10,
- "points" : false,
- "id" : 11,
- "dashes" : false,
- "legend" : {
- "min" : false,
- "values" : false,
- "current" : false,
- "show" : false,
- "total" : false,
- "avg" : false,
- "max" : false
- },
- "percentage" : false,
- "xaxis" : {
- "show" : true,
- "buckets" : null,
- "name" : null,
- "values" : [],
- "mode" : "time"
- },
- "yaxes" : [
+ ]
+ },
+ "dashLength": 10,
+ "stack": false,
+ "timeShift": null,
+ "aliasColors": {},
+ "lines": true,
+ "legend": {
+ "avg": false,
+ "min": false,
+ "max": false,
+ "show": true,
+ "current": false,
+ "values": false,
+ "total": false
+ },
+ "yaxes": [
{
- "format" : "none",
- "max" : null,
- "label" : null,
- "show" : true,
- "min" : "0",
- "logBase" : 1
- },
+ "logBase": 1,
+ "format": "percent",
+ "max": null,
+ "min": null,
+ "label": null,
+ "show": true
+ },
{
- "logBase" : 1,
- "min" : null,
- "show" : false,
- "label" : null,
- "max" : null,
- "format" : "short"
+ "logBase": 1,
+ "show": true,
+ "max": null,
+ "format": "short",
+ "label": null,
+ "min": null
}
- ],
- "minSpan" : 2,
- "dashLength" : 10,
- "renderer" : "flot",
- "pointradius" : 5
+ ],
+ "datasource": "Local",
+ "pointradius": 5,
+ "minSpan": 2
}
- ],
- "repeatIteration" : null,
- "repeat" : null
+ ],
+ "showTitle": true,
+ "repeatIteration": null
}
- ],
- "graphTooltip" : 0,
- "links" : [],
- "templating" : {
- "list" : []
- },
- "tags" : [],
- "editable" : false,
- "title" : "Alert Status"
+ ],
+ "templating": {
+ "list": []
+ },
+ "links": [],
+ "tags": [],
+ "graphTooltip": 0,
+ "hideControls": true,
+ "title": "Alert Status",
+ "editable": false,
+ "refresh": "10s",
+ "annotations": {
+ "list": []
+ },
+ "gnetId": null,
+ "version": 15,
+ "time": {
+ "to": "now",
+ "from": "now-1h"
+ },
+ "timezone": "browser",
+ "schemaVersion": 14,
+ "timepicker": {
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ],
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ]
+ },
+ "id": 24
}
}