]> git-server-git.apps.pok.os.sepia.ceph.com Git - cephmetrics.git/commitdiff
alert-status dashboard : Enable default alerts
authorPaul Cuzner <pcuzner@redhat.com>
Mon, 24 Jul 2017 02:13:09 +0000 (14:13 +1200)
committerPaul Cuzner <pcuzner@redhat.com>
Mon, 24 Jul 2017 02:13:09 +0000 (14:13 +1200)
dashUpdater has been updated to automatically set up a cephmetrics
notifications channel (if it's not already there), and the alert-status
dashboard is loaded, which references the cephmetrics channel.

The ansible templates has been updated to reflect the introduction of the
alert-status dashboard

ansible/roles/ceph-grafana/templates/dashboard.yml
dashUpdater.py
dashboards/current/alert-status.json

index 4c07703ceb2b8ad867153154da5c30dda18e4ce4..3cf4718bdbf317666410c6d24a6d52a0e27a61a0 100644 (file)
@@ -14,8 +14,10 @@ _dashboards:
   - latency-by-server
   - network-usage-by-node
   - osd-node-detail
+  - alert-status
 _credentials:
   user: admin
   password: admin
 _grafana_port: 3000
 _home_dashboard: ceph-at-a-glance
+_alert_dashboard: alert-status
index de6ca9856fd3694aa93b90b216be591425e71031..e916d38e9b3cb62a154ef5ca4f6c92da58d23b5d 100644 (file)
@@ -300,6 +300,56 @@ def setup_logging():
     return logger
 
 
+def get_notification_id(channel_name):
+    """
+    Check whether the given notification channel has been defined to Grafana
+    :param (str) notification channel name
+    :return: (int) id of the channel, or 0 for doesn't exist
+    """
+
+    resp = get("http://{}:{}/api/"
+               "alert-notifications".format(config.grafana_host,
+                                            config.grafana_port),
+               auth=config.grafana_credentials)
+
+    if resp.status_code == 200:
+        notifications = resp.json()     # list if dicts returned by Grafana
+
+        # convert the list into a dict for lookup purposes
+        channels = {channel.get('name'): channel.get('id')
+                    for channel in notifications}
+        if channel_name in channels:
+            return channels[channel_name]
+        else:
+            return 0
+    else:
+        raise DashBoardException("Unable to get nofification channels from"
+                                 " Grafana")
+
+
+def define_notification(channel_name):
+    """
+    Add a given "seed" notification channel to Grafana using http post
+    :param channel_name: (str) channel name
+    :return: (int) http response code from post operation
+             (dict) response json object
+    """
+
+    seed_channel = json.dumps({"name": channel_name,
+                               "type": "email",
+                               "isDefault": False
+                               })
+
+    resp = post('http://{}:{}/api/'
+                'alert-notifications'.format(config.grafana_host,
+                                             config.grafana_port),
+                headers=HEADERS,
+                auth=config.grafana_credentials,
+                data=seed_channel)
+
+    return resp.status_code, resp.json()
+
+
 def main():
 
     rc = 0
@@ -364,11 +414,32 @@ def main():
         if dashname == config.alert_dashboard:
             # if processing is here, this is 1st run so the alert_dashboard
             # is new to grafana
+            channel_id = get_notification_id("cephmetrics")
+            if channel_id:
+                logger.info("- notification channel already in place")
+            else:
+                http_rc, resp_json = define_notification("cephmetrics")
+                if http_rc == 200:
+                    channel_id = resp_json['id']
+                    logger.info("- notification channel added :"
+                                "{}".format(channel_id))
+                else:
+                    raise DashBoardException("Problem adding notification "
+                                             "channel ({})".format(http_rc))
+
             dash_str = json.dumps(dashjson)
+            dash_str = dash_str.replace('"notifications": []',
+                                        '"notifications": [{{ "id":'
+                                        ' {0} }}]'.format(channel_id))
             if config.domain:
-                dash_str = dash_str.replace('.$domain', ".{}".format(config.domain))
+                logger.debug("- queries updated, replacing $domain with "
+                             "'{}'".format(config.domain))
+                dash_str = dash_str.replace('.$domain',
+                                            ".{}".format(config.domain))
             else:
-                dash_str = dash_str.replace('.$domain', '')
+                logger.debug("- queries updated, replacing $domain with NULL")
+                dash_str = dash_str.replace('.$domain',
+                                            '')
 
             dashjson = json.loads(dash_str)
 
index a62d8ae68911b12ac4dfb5ece8aa98d99cd66b79..37174044e9688e4ca1937cf455c3094d68aa1892 100644 (file)
@@ -6,11 +6,11 @@
                "canStar": true,
                "slug": "alert-status",
                "expires": "0001-01-01T00:00:00Z",
-               "created": "2017-07-18T00:47:55Z",
-               "updated": "2017-07-20T07:13:31Z",
-               "updatedBy": "admin",
-               "createdBy": "admin",
-               "version": 22
+               "created": "2017-07-23T22:23:27Z",
+               "updated": "2017-07-23T22:40:32Z",
+               "updatedBy": "admin@localhost",
+               "createdBy": "admin@localhost",
+               "version": 1
        },
        "dashboard": {
                "annotations": {
@@ -20,7 +20,7 @@
                "gnetId": null,
                "graphTooltip": 0,
                "hideControls": false,
-               "id": 13,
+               "id": 45,
                "links": [],
                "refresh": "30s",
                "rows": [{
@@ -72,9 +72,7 @@
                                        "message": "Cluster Health is not OK",
                                        "name": "Overall Ceph Health",
                                        "noDataState": "no_data",
-                                       "notifications": [{
-                                               "id": 1
-                                       }]
+                                       "notifications": []
                                },
                                "aliasColors": {
                                        "Ceph Health": "#890F02",
                                        "message": "DIsks Near full detected within the cluster. Warning threshold is 80% full.",
                                        "name": "Disks Near Full",
                                        "noDataState": "ok",
-                                       "notifications": [{
-                                               "id": 1
-                                       }]
+                                       "notifications": []
                                },
                                "aliasColors": {},
                                "bars": false,
                                        "message": "OSD Down event",
                                        "name": "OSDs Down",
                                        "noDataState": "ok",
-                                       "notifications": [{
-                                               "id": 1
-                                       }]
+                                       "notifications": []
                                },
                                "aliasColors": {},
                                "bars": true,
                                        "message": "Cluster Capacity Limit Warning",
                                        "name": "Cluster Capacity",
                                        "noDataState": "keep_state",
-                                       "notifications": [{
-                                               "id": 1
-                                       }]
+                                       "notifications": []
                                },
                                "aliasColors": {},
                                "bars": false,
                                        "message": "Potential Disk Stall",
                                        "name": "Disk I/O Stalls",
                                        "noDataState": "no_data",
-                                       "notifications": [{
-                                               "id": 1
-                                       }]
+                                       "notifications": []
                                },
                                "aliasColors": {},
                                "bars": false,
                                        "message": "PG peering is a taking a long time to finish",
                                        "name": "PG Peering Delay",
                                        "noDataState": "ok",
-                                       "notifications": [{
-                                               "id": 1
-                                       }]
+                                       "notifications": []
                                },
                                "aliasColors": {},
                                "bars": false,
                                        "message": "PG's stuck inactive",
                                        "name": "PG's Stuck",
                                        "noDataState": "no_data",
-                                       "notifications": [{
-                                               "id": 1
-                                       }]
+                                       "notifications": []
                                },
                                "aliasColors": {},
                                "bars": false,
                "style": "dark",
                "tags": [],
                "templating": {
+                       "list": []
                },
                "time": {
                        "from": "now-24h",
                },
                "timezone": "browser",
                "title": "Alert Status",
-               "version": 22
+               "version": 1
        }
-}
+}
\ No newline at end of file