]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/dashboard: Push Grafana dashboards on startup 26415/head
authorZack Cerza <zack@redhat.com>
Tue, 22 Jan 2019 00:30:44 +0000 (17:30 -0700)
committerZack Cerza <zack@redhat.com>
Tue, 4 Jun 2019 20:26:55 +0000 (14:26 -0600)
Because we didn't have viable way to populate Grafana dashboards in a
fully containerized context, we needed a different method. This will
work in packaged and containerized deployments.

We provide three methods to push the dashboard JSON files to the Grafana
instance: a command (ceph dashboard update-grafana-dashboards); an API
endpoint (/api/grafana/update_dashboards); and an automatic on-startup
feature that is disabled by default (ceph dashboard
set-grafana-update-dashboards true). The on-startup method will also
retry periodically to avoid racing with the startup of Grafana itself.

Signed-off-by: Zack Cerza <zack@redhat.com>
src/pybind/mgr/dashboard/controllers/grafana.py
src/pybind/mgr/dashboard/exceptions.py
src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts
src/pybind/mgr/dashboard/grafana.py [new file with mode: 0644]
src/pybind/mgr/dashboard/module.py
src/pybind/mgr/dashboard/settings.py
src/pybind/mgr/dashboard/tests/test_grafana.py
src/pybind/mgr/dashboard/tox.ini

index d165d45259d1eeda803ad9b8bc976e3385accdce..bb3e3a1e4a4b5e915fe0d0c656721515cf9deb29 100644 (file)
@@ -1,23 +1,14 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import
 
-import requests
-
-from . import ApiController, BaseController, Endpoint, ReadPermission
+from . import (ApiController, BaseController, Endpoint, ReadPermission,
+               UpdatePermission)
+from ..exceptions import DashboardException
+from ..grafana import GrafanaRestClient, push_local_dashboards
 from ..security import Scope
 from ..settings import Settings
 
 
-class GrafanaRestClient(object):
-
-    def url_validation(self, method, path):
-        response = requests.request(
-            method,
-            path)
-
-        return response.status_code
-
-
 @ApiController('/grafana', Scope.GRAFANA)
 class Grafana(BaseController):
 
@@ -36,3 +27,17 @@ class Grafana(BaseController):
             '/api/dashboards/uid/' + params
         response = grafana.url_validation(method, url)
         return response
+
+    @Endpoint(method='POST')
+    @UpdatePermission
+    def dashboards(self):
+        response = dict()
+        try:
+            response['success'] = push_local_dashboards()
+        except Exception as e:  # pylint: disable=broad-except
+            raise DashboardException(
+                msg=e.message,
+                component='grafana',
+                http_status_code=500,
+            )
+        return response
index 48452e3098f5d116f7e726e32a427339f5d08b16..b44a3f15b03287fbaa8be6ae0c011d7f8f1dfb84 100644 (file)
@@ -101,3 +101,7 @@ class RoleNotInUser(Exception):
         super(RoleNotInUser, self).__init__(
             "Role '{}' is not associated with user '{}'"
             .format(rolename, username))
+
+
+class GrafanaError(Exception):
+    pass
index d5244f672ee4f046bdde577db2cd107acdfe3501..9af1a50a6493927d3fb074e643f5d9fbcdb67922 100644 (file)
@@ -120,6 +120,10 @@ export class TaskMessageService {
       })
   };
 
+  grafana = {
+    update_dashboards: () => this.i18n('all dashboards')
+  };
+
   messages = {
     // Pool tasks
     'pool/create': this.newTaskMessage(
@@ -327,6 +331,12 @@ export class TaskMessageService {
     'nfs/edit': this.newTaskMessage(this.commonOperations.update, (metadata) => this.nfs(metadata)),
     'nfs/delete': this.newTaskMessage(this.commonOperations.delete, (metadata) =>
       this.nfs(metadata)
+    ),
+    // Grafana tasks
+    'grafana/dashboards/update': this.newTaskMessage(
+      this.commonOperations.update,
+      this.grafana.update_dashboards,
+      () => ({})
     )
   };
 
diff --git a/src/pybind/mgr/dashboard/grafana.py b/src/pybind/mgr/dashboard/grafana.py
new file mode 100644 (file)
index 0000000..1399dce
--- /dev/null
@@ -0,0 +1,133 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+import json
+import os
+import time
+import requests
+
+from . import logger
+from .exceptions import GrafanaError
+from .settings import Settings
+
+
+class GrafanaRestClient(object):
+
+    @staticmethod
+    def url_validation(method, path):
+        response = requests.request(
+            method,
+            path)
+
+        return response.status_code
+
+    @staticmethod
+    def push_dashboard(dashboard_obj):
+        if not Settings.GRAFANA_API_URL:
+            raise GrafanaError("The Grafana API URL is not set")
+        if not Settings.GRAFANA_API_URL.startswith('http'):
+            raise GrafanaError("The Grafana API URL is invalid")
+        if not Settings.GRAFANA_API_USERNAME:
+            raise GrafanaError("The Grafana API username is not set")
+        if not Settings.GRAFANA_API_PASSWORD:
+            raise GrafanaError("The Grafana API password is not set")
+        url = Settings.GRAFANA_API_URL.rstrip('/') + \
+            '/api/dashboards/db'
+        headers = {
+            'Accept': 'application/json',
+            'Content-Type': 'application/json',
+        }
+        payload = {
+            'dashboard': dashboard_obj,
+            'overwrite': True,
+        }
+        try:
+            response = requests.post(
+                url,
+                headers=headers,
+                data=json.dumps(payload),
+                auth=(Settings.GRAFANA_API_USERNAME,
+                      Settings.GRAFANA_API_PASSWORD),
+            )
+        except requests.ConnectionError:
+            raise GrafanaError("Could not connect to Grafana server")
+        response.raise_for_status()
+        return response.status_code, response.json()
+
+
+class Retrier(object):
+    def __init__(self, tries, sleep, func, *args, **kwargs):
+        """
+        Wraps a function. An instance of this class may be called to call that
+        function, retrying if it raises an exception. Sleeps between retries,
+        eventually reraising the original exception when retries are exhausted.
+        Once the function returns a value, that value is returned.
+
+        :param tries: How many times to try, before reraising the exception
+        :type tries: int
+        :param sleep: How many seconds to wait between tries
+        :type sleep: int|float
+        :param func: The function to execute
+        :type func: function
+        :param args: Any arguments to pass to the function
+        :type args: list
+        :param kwargs: Any keyword arguments to pass to the function
+        :type kwargs: dict
+        """
+        assert tries >= 1
+        self.tries = int(tries)
+        self.tried = 0
+        self.sleep = sleep
+        self.func = func
+        self.args = args
+        self.kwargs = kwargs
+
+    def __call__(self):
+        result = None
+        while self.tried < self.tries:
+            try:
+                result = self.func(*self.args, **self.kwargs)
+            except Exception:  # pylint: disable=broad-except
+                if self.tried == self.tries - 1:
+                    raise
+                else:
+                    self.tried += 1
+                    time.sleep(self.sleep)
+            else:
+                return result
+
+
+def load_local_dashboards():
+    if os.environ.get('CEPH_DEV') == '1' or 'UNITTEST' in os.environ:
+        path = os.path.abspath(os.path.join(
+            os.path.dirname(__file__),
+            '../../../../monitoring/grafana/dashboards/'
+        ))
+    else:
+        path = '/etc/grafana/dashboards/ceph-dashboard'
+    dashboards = dict()
+    for item in [p for p in os.listdir(path) if p.endswith('.json')]:
+        db_path = os.path.join(path, item)
+        with open(db_path) as f:
+            dashboards[item] = json.loads(f.read())
+    return dashboards
+
+
+def push_local_dashboards(tries=1, sleep=0):
+    try:
+        dashboards = load_local_dashboards()
+    except (EnvironmentError, ValueError):
+        logger.exception("Failed to load local dashboard files")
+        raise
+
+    def push():
+        try:
+            grafana = GrafanaRestClient()
+            for body in dashboards.values():
+                grafana.push_dashboard(body)
+        except Exception:
+            logger.exception("Failed to push dashboards to Grafana")
+            raise
+    retry = Retrier(tries, sleep, push)
+    retry()
+    return True
index 71760d7f0f5f6f0167f33677acd20bb9a664aeb6..7af9f3608d1ebd2ccaf8340c7c476361c5f8d8b0 100644 (file)
@@ -71,8 +71,9 @@ if 'COVERAGE_ENABLED' in os.environ:
 # pylint: disable=wrong-import-position
 from . import logger, mgr
 from .controllers import generate_routes, json_error_page
+from .grafana import push_local_dashboards
 from .tools import NotificationQueue, RequestLoggingTool, TaskManager, \
-                   prepare_url_prefix
+                   prepare_url_prefix, str_to_bool
 from .services.auth import AuthManager, AuthManagerTool, JwtManager
 from .services.sso import SSO_COMMANDS, \
                           handle_sso_command
@@ -289,6 +290,11 @@ class Module(MgrModule, CherryPyConfig):
             "desc": "Create self signed certificate",
             "perm": "w"
         },
+        {
+            "cmd": "dashboard grafana dashboards update",
+            "desc": "Push dashboards to Grafana",
+            "perm": "w",
+        },
     ]
     COMMANDS.extend(options_command_list())
     COMMANDS.extend(SSO_COMMANDS)
@@ -374,6 +380,16 @@ class Module(MgrModule, CherryPyConfig):
         NotificationQueue.start_queue()
         TaskManager.init()
         logger.info('Engine started.')
+        update_dashboards = str_to_bool(
+            self.get_module_option('GRAFANA_UPDATE_DASHBOARDS', 'False'))
+        if update_dashboards:
+            logger.info('Starting Grafana dashboard task')
+            TaskManager.run(
+                'grafana/dashboards/update',
+                {},
+                push_local_dashboards,
+                kwargs=dict(tries=10, sleep=60),
+            )
         # wait for the shutdown event
         self.shutdown_event.wait()
         self.shutdown_event.clear()
@@ -404,6 +420,9 @@ class Module(MgrModule, CherryPyConfig):
         if cmd['prefix'] == 'dashboard create-self-signed-cert':
             self.create_self_signed_cert()
             return 0, 'Self-signed certificate created', ''
+        if cmd['prefix'] == 'dashboard grafana dashboards update':
+            push_local_dashboards()
+            return 0, 'Grafana dashboards updated', ''
 
         return (-errno.EINVAL, '', 'Command not found \'{0}\''
                 .format(cmd['prefix']))
index c7f4956c76a0b47592a0ee36f3990d8d3ac80472..6b47ff731d5802940482392acac443706f6e1378 100644 (file)
@@ -39,6 +39,7 @@ class Options(object):
     GRAFANA_API_URL = ('', str)
     GRAFANA_API_USERNAME = ('admin', str)
     GRAFANA_API_PASSWORD = ('admin', str)
+    GRAFANA_UPDATE_DASHBOARDS = (False, bool)
 
     # NFS Ganesha settings
     GANESHA_CLUSTERS_RADOS_POOL_NAMESPACE = ('', str)
index abe5c15eaa4e6a3c28e17f83f1346b98159cfb68..f880400b06c22cfe289c3a5a7754bee976562d7c 100644 (file)
@@ -6,19 +6,45 @@ from .. import mgr
 class GrafanaTest(ControllerTestCase):
     @classmethod
     def setup_server(cls):
-        settings = {
-            'GRAFANA_API_URL': 'http://localhost:3000'
-        }
-        mgr.get_module_option.side_effect = settings.get
+        cls.server_settings()
         # pylint: disable=protected-access
         Grafana._cp_config['tools.authenticate.on'] = False
         cls.setup_controllers([Grafana])
 
+    @classmethod
+    def server_settings(
+            cls,
+            url='http://localhost:3000',
+            user='admin',
+            password='admin',
+    ):
+        settings = dict()
+        if url is not None:
+            settings['GRAFANA_API_URL'] = url
+        if user is not None:
+            settings['GRAFANA_API_USERNAME'] = user
+        if password is not None:
+            settings['GRAFANA_API_PASSWORD'] = password
+        mgr.get_module_option.side_effect = settings.get
+
     def test_url(self):
+        self.server_settings()
         self._get('/api/grafana/url')
         self.assertStatus(200)
         self.assertJsonBody({'instance': 'http://localhost:3000'})
 
     def test_validation(self):
+        self.server_settings()
         self._get('/api/grafana/validation/foo')
         self.assertStatus(500)
+
+    def test_dashboards(self):
+        self.server_settings(url=None)
+        self._post('/api/grafana/dashboards')
+        self.assertStatus(500)
+        self.server_settings(user=None)
+        self._post('/api/grafana/dashboards')
+        self.assertStatus(500)
+        self.server_settings(password=None)
+        self._post('/api/grafana/dashboards')
+        self.assertStatus(500)
index 96ccc522f503b166057382f18a401c870bd6c392..d404d59abe138e0b5b657b5794cfea7f2414fafe 100644 (file)
@@ -24,6 +24,6 @@ commands=
     cov: coverage combine {toxinidir}/{env:COVERAGE_FILE}
     cov: coverage report
     cov: coverage xml
-    lint: pylint --rcfile=.pylintrc --jobs=5 . module.py tools.py controllers tests services exceptions.py
+    lint: pylint --rcfile=.pylintrc --jobs=5 . module.py tools.py controllers tests services exceptions.py grafana.py
     lint: pycodestyle --max-line-length=100 --exclude=.tox,venv,frontend,.vscode --ignore=E402,E121,E123,E126,E226,E24,E704,W503,E741 .
     run: {posargs}