From e9f9d031f99dc290895124338b9eed52fc18b60f Mon Sep 17 00:00:00 2001 From: Zack Cerza Date: Mon, 21 Jan 2019 17:30:44 -0700 Subject: [PATCH] mgr/dashboard: Push Grafana dashboards on startup Because we didn't have viable way to populate Grafana dashboards in a fully containerized context, we needed a different method. This will work in packaged and containerized deployments. We provide three methods to push the dashboard JSON files to the Grafana instance: a command (ceph dashboard update-grafana-dashboards); an API endpoint (/api/grafana/update_dashboards); and an automatic on-startup feature that is disabled by default (ceph dashboard set-grafana-update-dashboards true). The on-startup method will also retry periodically to avoid racing with the startup of Grafana itself. Signed-off-by: Zack Cerza (cherry picked from commit c0bc7dafa777cfb349f372237ec91c5285dd4df2) --- .../mgr/dashboard/controllers/grafana.py | 31 ++-- src/pybind/mgr/dashboard/exceptions.py | 4 + .../shared/services/task-message.service.ts | 10 ++ src/pybind/mgr/dashboard/grafana.py | 133 ++++++++++++++++++ src/pybind/mgr/dashboard/module.py | 21 ++- src/pybind/mgr/dashboard/settings.py | 1 + .../mgr/dashboard/tests/test_grafana.py | 34 ++++- src/pybind/mgr/dashboard/tox.ini | 2 +- 8 files changed, 217 insertions(+), 19 deletions(-) create mode 100644 src/pybind/mgr/dashboard/grafana.py diff --git a/src/pybind/mgr/dashboard/controllers/grafana.py b/src/pybind/mgr/dashboard/controllers/grafana.py index d165d45259d1e..bb3e3a1e4a4b5 100644 --- a/src/pybind/mgr/dashboard/controllers/grafana.py +++ b/src/pybind/mgr/dashboard/controllers/grafana.py @@ -1,23 +1,14 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import -import requests - -from . import ApiController, BaseController, Endpoint, ReadPermission +from . import (ApiController, BaseController, Endpoint, ReadPermission, + UpdatePermission) +from ..exceptions import DashboardException +from ..grafana import GrafanaRestClient, push_local_dashboards from ..security import Scope from ..settings import Settings -class GrafanaRestClient(object): - - def url_validation(self, method, path): - response = requests.request( - method, - path) - - return response.status_code - - @ApiController('/grafana', Scope.GRAFANA) class Grafana(BaseController): @@ -36,3 +27,17 @@ class Grafana(BaseController): '/api/dashboards/uid/' + params response = grafana.url_validation(method, url) return response + + @Endpoint(method='POST') + @UpdatePermission + def dashboards(self): + response = dict() + try: + response['success'] = push_local_dashboards() + except Exception as e: # pylint: disable=broad-except + raise DashboardException( + msg=e.message, + component='grafana', + http_status_code=500, + ) + return response diff --git a/src/pybind/mgr/dashboard/exceptions.py b/src/pybind/mgr/dashboard/exceptions.py index 48452e3098f5d..b44a3f15b0328 100644 --- a/src/pybind/mgr/dashboard/exceptions.py +++ b/src/pybind/mgr/dashboard/exceptions.py @@ -101,3 +101,7 @@ class RoleNotInUser(Exception): super(RoleNotInUser, self).__init__( "Role '{}' is not associated with user '{}'" .format(rolename, username)) + + +class GrafanaError(Exception): + pass diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts index d5244f672ee4f..9af1a50a64939 100644 --- a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts +++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/task-message.service.ts @@ -120,6 +120,10 @@ export class TaskMessageService { }) }; + grafana = { + update_dashboards: () => this.i18n('all dashboards') + }; + messages = { // Pool tasks 'pool/create': this.newTaskMessage( @@ -327,6 +331,12 @@ export class TaskMessageService { 'nfs/edit': this.newTaskMessage(this.commonOperations.update, (metadata) => this.nfs(metadata)), 'nfs/delete': this.newTaskMessage(this.commonOperations.delete, (metadata) => this.nfs(metadata) + ), + // Grafana tasks + 'grafana/dashboards/update': this.newTaskMessage( + this.commonOperations.update, + this.grafana.update_dashboards, + () => ({}) ) }; diff --git a/src/pybind/mgr/dashboard/grafana.py b/src/pybind/mgr/dashboard/grafana.py new file mode 100644 index 0000000000000..1399dce3c4b23 --- /dev/null +++ b/src/pybind/mgr/dashboard/grafana.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import + +import json +import os +import time +import requests + +from . import logger +from .exceptions import GrafanaError +from .settings import Settings + + +class GrafanaRestClient(object): + + @staticmethod + def url_validation(method, path): + response = requests.request( + method, + path) + + return response.status_code + + @staticmethod + def push_dashboard(dashboard_obj): + if not Settings.GRAFANA_API_URL: + raise GrafanaError("The Grafana API URL is not set") + if not Settings.GRAFANA_API_URL.startswith('http'): + raise GrafanaError("The Grafana API URL is invalid") + if not Settings.GRAFANA_API_USERNAME: + raise GrafanaError("The Grafana API username is not set") + if not Settings.GRAFANA_API_PASSWORD: + raise GrafanaError("The Grafana API password is not set") + url = Settings.GRAFANA_API_URL.rstrip('/') + \ + '/api/dashboards/db' + headers = { + 'Accept': 'application/json', + 'Content-Type': 'application/json', + } + payload = { + 'dashboard': dashboard_obj, + 'overwrite': True, + } + try: + response = requests.post( + url, + headers=headers, + data=json.dumps(payload), + auth=(Settings.GRAFANA_API_USERNAME, + Settings.GRAFANA_API_PASSWORD), + ) + except requests.ConnectionError: + raise GrafanaError("Could not connect to Grafana server") + response.raise_for_status() + return response.status_code, response.json() + + +class Retrier(object): + def __init__(self, tries, sleep, func, *args, **kwargs): + """ + Wraps a function. An instance of this class may be called to call that + function, retrying if it raises an exception. Sleeps between retries, + eventually reraising the original exception when retries are exhausted. + Once the function returns a value, that value is returned. + + :param tries: How many times to try, before reraising the exception + :type tries: int + :param sleep: How many seconds to wait between tries + :type sleep: int|float + :param func: The function to execute + :type func: function + :param args: Any arguments to pass to the function + :type args: list + :param kwargs: Any keyword arguments to pass to the function + :type kwargs: dict + """ + assert tries >= 1 + self.tries = int(tries) + self.tried = 0 + self.sleep = sleep + self.func = func + self.args = args + self.kwargs = kwargs + + def __call__(self): + result = None + while self.tried < self.tries: + try: + result = self.func(*self.args, **self.kwargs) + except Exception: # pylint: disable=broad-except + if self.tried == self.tries - 1: + raise + else: + self.tried += 1 + time.sleep(self.sleep) + else: + return result + + +def load_local_dashboards(): + if os.environ.get('CEPH_DEV') == '1' or 'UNITTEST' in os.environ: + path = os.path.abspath(os.path.join( + os.path.dirname(__file__), + '../../../../monitoring/grafana/dashboards/' + )) + else: + path = '/etc/grafana/dashboards/ceph-dashboard' + dashboards = dict() + for item in [p for p in os.listdir(path) if p.endswith('.json')]: + db_path = os.path.join(path, item) + with open(db_path) as f: + dashboards[item] = json.loads(f.read()) + return dashboards + + +def push_local_dashboards(tries=1, sleep=0): + try: + dashboards = load_local_dashboards() + except (EnvironmentError, ValueError): + logger.exception("Failed to load local dashboard files") + raise + + def push(): + try: + grafana = GrafanaRestClient() + for body in dashboards.values(): + grafana.push_dashboard(body) + except Exception: + logger.exception("Failed to push dashboards to Grafana") + raise + retry = Retrier(tries, sleep, push) + retry() + return True diff --git a/src/pybind/mgr/dashboard/module.py b/src/pybind/mgr/dashboard/module.py index 71760d7f0f5f6..7af9f3608d1eb 100644 --- a/src/pybind/mgr/dashboard/module.py +++ b/src/pybind/mgr/dashboard/module.py @@ -71,8 +71,9 @@ if 'COVERAGE_ENABLED' in os.environ: # pylint: disable=wrong-import-position from . import logger, mgr from .controllers import generate_routes, json_error_page +from .grafana import push_local_dashboards from .tools import NotificationQueue, RequestLoggingTool, TaskManager, \ - prepare_url_prefix + prepare_url_prefix, str_to_bool from .services.auth import AuthManager, AuthManagerTool, JwtManager from .services.sso import SSO_COMMANDS, \ handle_sso_command @@ -289,6 +290,11 @@ class Module(MgrModule, CherryPyConfig): "desc": "Create self signed certificate", "perm": "w" }, + { + "cmd": "dashboard grafana dashboards update", + "desc": "Push dashboards to Grafana", + "perm": "w", + }, ] COMMANDS.extend(options_command_list()) COMMANDS.extend(SSO_COMMANDS) @@ -374,6 +380,16 @@ class Module(MgrModule, CherryPyConfig): NotificationQueue.start_queue() TaskManager.init() logger.info('Engine started.') + update_dashboards = str_to_bool( + self.get_module_option('GRAFANA_UPDATE_DASHBOARDS', 'False')) + if update_dashboards: + logger.info('Starting Grafana dashboard task') + TaskManager.run( + 'grafana/dashboards/update', + {}, + push_local_dashboards, + kwargs=dict(tries=10, sleep=60), + ) # wait for the shutdown event self.shutdown_event.wait() self.shutdown_event.clear() @@ -404,6 +420,9 @@ class Module(MgrModule, CherryPyConfig): if cmd['prefix'] == 'dashboard create-self-signed-cert': self.create_self_signed_cert() return 0, 'Self-signed certificate created', '' + if cmd['prefix'] == 'dashboard grafana dashboards update': + push_local_dashboards() + return 0, 'Grafana dashboards updated', '' return (-errno.EINVAL, '', 'Command not found \'{0}\'' .format(cmd['prefix'])) diff --git a/src/pybind/mgr/dashboard/settings.py b/src/pybind/mgr/dashboard/settings.py index c7f4956c76a0b..6b47ff731d580 100644 --- a/src/pybind/mgr/dashboard/settings.py +++ b/src/pybind/mgr/dashboard/settings.py @@ -39,6 +39,7 @@ class Options(object): GRAFANA_API_URL = ('', str) GRAFANA_API_USERNAME = ('admin', str) GRAFANA_API_PASSWORD = ('admin', str) + GRAFANA_UPDATE_DASHBOARDS = (False, bool) # NFS Ganesha settings GANESHA_CLUSTERS_RADOS_POOL_NAMESPACE = ('', str) diff --git a/src/pybind/mgr/dashboard/tests/test_grafana.py b/src/pybind/mgr/dashboard/tests/test_grafana.py index abe5c15eaa4e6..f880400b06c22 100644 --- a/src/pybind/mgr/dashboard/tests/test_grafana.py +++ b/src/pybind/mgr/dashboard/tests/test_grafana.py @@ -6,19 +6,45 @@ from .. import mgr class GrafanaTest(ControllerTestCase): @classmethod def setup_server(cls): - settings = { - 'GRAFANA_API_URL': 'http://localhost:3000' - } - mgr.get_module_option.side_effect = settings.get + cls.server_settings() # pylint: disable=protected-access Grafana._cp_config['tools.authenticate.on'] = False cls.setup_controllers([Grafana]) + @classmethod + def server_settings( + cls, + url='http://localhost:3000', + user='admin', + password='admin', + ): + settings = dict() + if url is not None: + settings['GRAFANA_API_URL'] = url + if user is not None: + settings['GRAFANA_API_USERNAME'] = user + if password is not None: + settings['GRAFANA_API_PASSWORD'] = password + mgr.get_module_option.side_effect = settings.get + def test_url(self): + self.server_settings() self._get('/api/grafana/url') self.assertStatus(200) self.assertJsonBody({'instance': 'http://localhost:3000'}) def test_validation(self): + self.server_settings() self._get('/api/grafana/validation/foo') self.assertStatus(500) + + def test_dashboards(self): + self.server_settings(url=None) + self._post('/api/grafana/dashboards') + self.assertStatus(500) + self.server_settings(user=None) + self._post('/api/grafana/dashboards') + self.assertStatus(500) + self.server_settings(password=None) + self._post('/api/grafana/dashboards') + self.assertStatus(500) diff --git a/src/pybind/mgr/dashboard/tox.ini b/src/pybind/mgr/dashboard/tox.ini index 96ccc522f503b..d404d59abe138 100644 --- a/src/pybind/mgr/dashboard/tox.ini +++ b/src/pybind/mgr/dashboard/tox.ini @@ -24,6 +24,6 @@ commands= cov: coverage combine {toxinidir}/{env:COVERAGE_FILE} cov: coverage report cov: coverage xml - lint: pylint --rcfile=.pylintrc --jobs=5 . module.py tools.py controllers tests services exceptions.py + lint: pylint --rcfile=.pylintrc --jobs=5 . module.py tools.py controllers tests services exceptions.py grafana.py lint: pycodestyle --max-line-length=100 --exclude=.tox,venv,frontend,.vscode --ignore=E402,E121,E123,E126,E226,E24,E704,W503,E741 . run: {posargs} -- 2.39.5