From: Paul Cuzner Date: Mon, 24 Jul 2017 02:13:09 +0000 (+1200) Subject: alert-status dashboard : Enable default alerts X-Git-Tag: v1.0~40^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=35be1921cc95fc871ce140b4ea863d912bdb64a7;p=cephmetrics.git alert-status dashboard : Enable default alerts dashUpdater has been updated to automatically set up a cephmetrics notifications channel (if it's not already there), and the alert-status dashboard is loaded, which references the cephmetrics channel. The ansible templates has been updated to reflect the introduction of the alert-status dashboard --- diff --git a/ansible/roles/ceph-grafana/templates/dashboard.yml b/ansible/roles/ceph-grafana/templates/dashboard.yml index 4c07703..3cf4718 100644 --- a/ansible/roles/ceph-grafana/templates/dashboard.yml +++ b/ansible/roles/ceph-grafana/templates/dashboard.yml @@ -14,8 +14,10 @@ _dashboards: - latency-by-server - network-usage-by-node - osd-node-detail + - alert-status _credentials: user: admin password: admin _grafana_port: 3000 _home_dashboard: ceph-at-a-glance +_alert_dashboard: alert-status diff --git a/dashUpdater.py b/dashUpdater.py index de6ca98..e916d38 100644 --- a/dashUpdater.py +++ b/dashUpdater.py @@ -300,6 +300,56 @@ def setup_logging(): return logger +def get_notification_id(channel_name): + """ + Check whether the given notification channel has been defined to Grafana + :param (str) notification channel name + :return: (int) id of the channel, or 0 for doesn't exist + """ + + resp = get("http://{}:{}/api/" + "alert-notifications".format(config.grafana_host, + config.grafana_port), + auth=config.grafana_credentials) + + if resp.status_code == 200: + notifications = resp.json() # list if dicts returned by Grafana + + # convert the list into a dict for lookup purposes + channels = {channel.get('name'): channel.get('id') + for channel in notifications} + if channel_name in channels: + return channels[channel_name] + else: + return 0 + else: + raise DashBoardException("Unable to get nofification channels from" + " Grafana") + + +def define_notification(channel_name): + """ + Add a given "seed" notification channel to Grafana using http post + :param channel_name: (str) channel name + :return: (int) http response code from post operation + (dict) response json object + """ + + seed_channel = json.dumps({"name": channel_name, + "type": "email", + "isDefault": False + }) + + resp = post('http://{}:{}/api/' + 'alert-notifications'.format(config.grafana_host, + config.grafana_port), + headers=HEADERS, + auth=config.grafana_credentials, + data=seed_channel) + + return resp.status_code, resp.json() + + def main(): rc = 0 @@ -364,11 +414,32 @@ def main(): if dashname == config.alert_dashboard: # if processing is here, this is 1st run so the alert_dashboard # is new to grafana + channel_id = get_notification_id("cephmetrics") + if channel_id: + logger.info("- notification channel already in place") + else: + http_rc, resp_json = define_notification("cephmetrics") + if http_rc == 200: + channel_id = resp_json['id'] + logger.info("- notification channel added :" + "{}".format(channel_id)) + else: + raise DashBoardException("Problem adding notification " + "channel ({})".format(http_rc)) + dash_str = json.dumps(dashjson) + dash_str = dash_str.replace('"notifications": []', + '"notifications": [{{ "id":' + ' {0} }}]'.format(channel_id)) if config.domain: - dash_str = dash_str.replace('.$domain', ".{}".format(config.domain)) + logger.debug("- queries updated, replacing $domain with " + "'{}'".format(config.domain)) + dash_str = dash_str.replace('.$domain', + ".{}".format(config.domain)) else: - dash_str = dash_str.replace('.$domain', '') + logger.debug("- queries updated, replacing $domain with NULL") + dash_str = dash_str.replace('.$domain', + '') dashjson = json.loads(dash_str) diff --git a/dashboards/current/alert-status.json b/dashboards/current/alert-status.json index a62d8ae..3717404 100644 --- a/dashboards/current/alert-status.json +++ b/dashboards/current/alert-status.json @@ -6,11 +6,11 @@ "canStar": true, "slug": "alert-status", "expires": "0001-01-01T00:00:00Z", - "created": "2017-07-18T00:47:55Z", - "updated": "2017-07-20T07:13:31Z", - "updatedBy": "admin", - "createdBy": "admin", - "version": 22 + "created": "2017-07-23T22:23:27Z", + "updated": "2017-07-23T22:40:32Z", + "updatedBy": "admin@localhost", + "createdBy": "admin@localhost", + "version": 1 }, "dashboard": { "annotations": { @@ -20,7 +20,7 @@ "gnetId": null, "graphTooltip": 0, "hideControls": false, - "id": 13, + "id": 45, "links": [], "refresh": "30s", "rows": [{ @@ -72,9 +72,7 @@ "message": "Cluster Health is not OK", "name": "Overall Ceph Health", "noDataState": "no_data", - "notifications": [{ - "id": 1 - }] + "notifications": [] }, "aliasColors": { "Ceph Health": "#890F02", @@ -179,9 +177,7 @@ "message": "DIsks Near full detected within the cluster. Warning threshold is 80% full.", "name": "Disks Near Full", "noDataState": "ok", - "notifications": [{ - "id": 1 - }] + "notifications": [] }, "aliasColors": {}, "bars": false, @@ -282,9 +278,7 @@ "message": "OSD Down event", "name": "OSDs Down", "noDataState": "ok", - "notifications": [{ - "id": 1 - }] + "notifications": [] }, "aliasColors": {}, "bars": true, @@ -401,9 +395,7 @@ "message": "Cluster Capacity Limit Warning", "name": "Cluster Capacity", "noDataState": "keep_state", - "notifications": [{ - "id": 1 - }] + "notifications": [] }, "aliasColors": {}, "bars": false, @@ -531,9 +523,7 @@ "message": "Potential Disk Stall", "name": "Disk I/O Stalls", "noDataState": "no_data", - "notifications": [{ - "id": 1 - }] + "notifications": [] }, "aliasColors": {}, "bars": false, @@ -640,9 +630,7 @@ "message": "PG peering is a taking a long time to finish", "name": "PG Peering Delay", "noDataState": "ok", - "notifications": [{ - "id": 1 - }] + "notifications": [] }, "aliasColors": {}, "bars": false, @@ -744,9 +732,7 @@ "message": "PG's stuck inactive", "name": "PG's Stuck", "noDataState": "no_data", - "notifications": [{ - "id": 1 - }] + "notifications": [] }, "aliasColors": {}, "bars": false, @@ -835,6 +821,7 @@ "style": "dark", "tags": [], "templating": { + "list": [] }, "time": { "from": "now-24h", @@ -846,6 +833,6 @@ }, "timezone": "browser", "title": "Alert Status", - "version": 22 + "version": 1 } -} +} \ No newline at end of file